gcc/
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2014 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
10
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "stringpool.h"
27 #include "attribs.h"
28 #include "calls.h"
29 #include "stor-layout.h"
30 #include "varasm.h"
31 #include "tm_p.h"
32 #include "regs.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
36 #include "output.h"
37 #include "insn-codes.h"
38 #include "insn-attr.h"
39 #include "flags.h"
40 #include "except.h"
41 #include "function.h"
42 #include "recog.h"
43 #include "expr.h"
44 #include "optabs.h"
45 #include "diagnostic-core.h"
46 #include "toplev.h"
47 #include "basic-block.h"
48 #include "ggc.h"
49 #include "target.h"
50 #include "target-def.h"
51 #include "common/common-target.h"
52 #include "langhooks.h"
53 #include "reload.h"
54 #include "cgraph.h"
55 #include "hash-table.h"
56 #include "vec.h"
57 #include "basic-block.h"
58 #include "tree-ssa-alias.h"
59 #include "internal-fn.h"
60 #include "gimple-fold.h"
61 #include "tree-eh.h"
62 #include "gimple-expr.h"
63 #include "is-a.h"
64 #include "gimple.h"
65 #include "gimplify.h"
66 #include "cfgloop.h"
67 #include "dwarf2.h"
68 #include "df.h"
69 #include "tm-constrs.h"
70 #include "params.h"
71 #include "cselib.h"
72 #include "debug.h"
73 #include "sched-int.h"
74 #include "sbitmap.h"
75 #include "fibheap.h"
76 #include "opts.h"
77 #include "diagnostic.h"
78 #include "dumpfile.h"
79 #include "tree-pass.h"
80 #include "wide-int.h"
81 #include "context.h"
82 #include "pass_manager.h"
83 #include "target-globals.h"
84 #include "tree-vectorizer.h"
85 #include "shrink-wrap.h"
86 #include "builtins.h"
87
88 static rtx legitimize_dllimport_symbol (rtx, bool);
89 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
90 static rtx legitimize_pe_coff_symbol (rtx, bool);
91
92 #ifndef CHECK_STACK_LIMIT
93 #define CHECK_STACK_LIMIT (-1)
94 #endif
95
96 /* Return index of given mode in mult and division cost tables. */
97 #define MODE_INDEX(mode) \
98 ((mode) == QImode ? 0 \
99 : (mode) == HImode ? 1 \
100 : (mode) == SImode ? 2 \
101 : (mode) == DImode ? 3 \
102 : 4)
103
104 /* Processor costs (relative to an add) */
105 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
106 #define COSTS_N_BYTES(N) ((N) * 2)
107
108 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
109
110 static stringop_algs ix86_size_memcpy[2] = {
111 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
112 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
113 static stringop_algs ix86_size_memset[2] = {
114 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
115 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
116
117 const
118 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
119 COSTS_N_BYTES (2), /* cost of an add instruction */
120 COSTS_N_BYTES (3), /* cost of a lea instruction */
121 COSTS_N_BYTES (2), /* variable shift costs */
122 COSTS_N_BYTES (3), /* constant shift costs */
123 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
124 COSTS_N_BYTES (3), /* HI */
125 COSTS_N_BYTES (3), /* SI */
126 COSTS_N_BYTES (3), /* DI */
127 COSTS_N_BYTES (5)}, /* other */
128 0, /* cost of multiply per each bit set */
129 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
130 COSTS_N_BYTES (3), /* HI */
131 COSTS_N_BYTES (3), /* SI */
132 COSTS_N_BYTES (3), /* DI */
133 COSTS_N_BYTES (5)}, /* other */
134 COSTS_N_BYTES (3), /* cost of movsx */
135 COSTS_N_BYTES (3), /* cost of movzx */
136 0, /* "large" insn */
137 2, /* MOVE_RATIO */
138 2, /* cost for loading QImode using movzbl */
139 {2, 2, 2}, /* cost of loading integer registers
140 in QImode, HImode and SImode.
141 Relative to reg-reg move (2). */
142 {2, 2, 2}, /* cost of storing integer registers */
143 2, /* cost of reg,reg fld/fst */
144 {2, 2, 2}, /* cost of loading fp registers
145 in SFmode, DFmode and XFmode */
146 {2, 2, 2}, /* cost of storing fp registers
147 in SFmode, DFmode and XFmode */
148 3, /* cost of moving MMX register */
149 {3, 3}, /* cost of loading MMX registers
150 in SImode and DImode */
151 {3, 3}, /* cost of storing MMX registers
152 in SImode and DImode */
153 3, /* cost of moving SSE register */
154 {3, 3, 3}, /* cost of loading SSE registers
155 in SImode, DImode and TImode */
156 {3, 3, 3}, /* cost of storing SSE registers
157 in SImode, DImode and TImode */
158 3, /* MMX or SSE register to integer */
159 0, /* size of l1 cache */
160 0, /* size of l2 cache */
161 0, /* size of prefetch block */
162 0, /* number of parallel prefetches */
163 2, /* Branch cost */
164 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
165 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
166 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
167 COSTS_N_BYTES (2), /* cost of FABS instruction. */
168 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
169 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
170 ix86_size_memcpy,
171 ix86_size_memset,
172 1, /* scalar_stmt_cost. */
173 1, /* scalar load_cost. */
174 1, /* scalar_store_cost. */
175 1, /* vec_stmt_cost. */
176 1, /* vec_to_scalar_cost. */
177 1, /* scalar_to_vec_cost. */
178 1, /* vec_align_load_cost. */
179 1, /* vec_unalign_load_cost. */
180 1, /* vec_store_cost. */
181 1, /* cond_taken_branch_cost. */
182 1, /* cond_not_taken_branch_cost. */
183 };
184
185 /* Processor costs (relative to an add) */
186 static stringop_algs i386_memcpy[2] = {
187 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
188 DUMMY_STRINGOP_ALGS};
189 static stringop_algs i386_memset[2] = {
190 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
191 DUMMY_STRINGOP_ALGS};
192
193 static const
194 struct processor_costs i386_cost = { /* 386 specific costs */
195 COSTS_N_INSNS (1), /* cost of an add instruction */
196 COSTS_N_INSNS (1), /* cost of a lea instruction */
197 COSTS_N_INSNS (3), /* variable shift costs */
198 COSTS_N_INSNS (2), /* constant shift costs */
199 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
200 COSTS_N_INSNS (6), /* HI */
201 COSTS_N_INSNS (6), /* SI */
202 COSTS_N_INSNS (6), /* DI */
203 COSTS_N_INSNS (6)}, /* other */
204 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
205 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
206 COSTS_N_INSNS (23), /* HI */
207 COSTS_N_INSNS (23), /* SI */
208 COSTS_N_INSNS (23), /* DI */
209 COSTS_N_INSNS (23)}, /* other */
210 COSTS_N_INSNS (3), /* cost of movsx */
211 COSTS_N_INSNS (2), /* cost of movzx */
212 15, /* "large" insn */
213 3, /* MOVE_RATIO */
214 4, /* cost for loading QImode using movzbl */
215 {2, 4, 2}, /* cost of loading integer registers
216 in QImode, HImode and SImode.
217 Relative to reg-reg move (2). */
218 {2, 4, 2}, /* cost of storing integer registers */
219 2, /* cost of reg,reg fld/fst */
220 {8, 8, 8}, /* cost of loading fp registers
221 in SFmode, DFmode and XFmode */
222 {8, 8, 8}, /* cost of storing fp registers
223 in SFmode, DFmode and XFmode */
224 2, /* cost of moving MMX register */
225 {4, 8}, /* cost of loading MMX registers
226 in SImode and DImode */
227 {4, 8}, /* cost of storing MMX registers
228 in SImode and DImode */
229 2, /* cost of moving SSE register */
230 {4, 8, 16}, /* cost of loading SSE registers
231 in SImode, DImode and TImode */
232 {4, 8, 16}, /* cost of storing SSE registers
233 in SImode, DImode and TImode */
234 3, /* MMX or SSE register to integer */
235 0, /* size of l1 cache */
236 0, /* size of l2 cache */
237 0, /* size of prefetch block */
238 0, /* number of parallel prefetches */
239 1, /* Branch cost */
240 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
241 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
242 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
243 COSTS_N_INSNS (22), /* cost of FABS instruction. */
244 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
245 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
246 i386_memcpy,
247 i386_memset,
248 1, /* scalar_stmt_cost. */
249 1, /* scalar load_cost. */
250 1, /* scalar_store_cost. */
251 1, /* vec_stmt_cost. */
252 1, /* vec_to_scalar_cost. */
253 1, /* scalar_to_vec_cost. */
254 1, /* vec_align_load_cost. */
255 2, /* vec_unalign_load_cost. */
256 1, /* vec_store_cost. */
257 3, /* cond_taken_branch_cost. */
258 1, /* cond_not_taken_branch_cost. */
259 };
260
261 static stringop_algs i486_memcpy[2] = {
262 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
263 DUMMY_STRINGOP_ALGS};
264 static stringop_algs i486_memset[2] = {
265 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
266 DUMMY_STRINGOP_ALGS};
267
268 static const
269 struct processor_costs i486_cost = { /* 486 specific costs */
270 COSTS_N_INSNS (1), /* cost of an add instruction */
271 COSTS_N_INSNS (1), /* cost of a lea instruction */
272 COSTS_N_INSNS (3), /* variable shift costs */
273 COSTS_N_INSNS (2), /* constant shift costs */
274 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
275 COSTS_N_INSNS (12), /* HI */
276 COSTS_N_INSNS (12), /* SI */
277 COSTS_N_INSNS (12), /* DI */
278 COSTS_N_INSNS (12)}, /* other */
279 1, /* cost of multiply per each bit set */
280 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
281 COSTS_N_INSNS (40), /* HI */
282 COSTS_N_INSNS (40), /* SI */
283 COSTS_N_INSNS (40), /* DI */
284 COSTS_N_INSNS (40)}, /* other */
285 COSTS_N_INSNS (3), /* cost of movsx */
286 COSTS_N_INSNS (2), /* cost of movzx */
287 15, /* "large" insn */
288 3, /* MOVE_RATIO */
289 4, /* cost for loading QImode using movzbl */
290 {2, 4, 2}, /* cost of loading integer registers
291 in QImode, HImode and SImode.
292 Relative to reg-reg move (2). */
293 {2, 4, 2}, /* cost of storing integer registers */
294 2, /* cost of reg,reg fld/fst */
295 {8, 8, 8}, /* cost of loading fp registers
296 in SFmode, DFmode and XFmode */
297 {8, 8, 8}, /* cost of storing fp registers
298 in SFmode, DFmode and XFmode */
299 2, /* cost of moving MMX register */
300 {4, 8}, /* cost of loading MMX registers
301 in SImode and DImode */
302 {4, 8}, /* cost of storing MMX registers
303 in SImode and DImode */
304 2, /* cost of moving SSE register */
305 {4, 8, 16}, /* cost of loading SSE registers
306 in SImode, DImode and TImode */
307 {4, 8, 16}, /* cost of storing SSE registers
308 in SImode, DImode and TImode */
309 3, /* MMX or SSE register to integer */
310 4, /* size of l1 cache. 486 has 8kB cache
311 shared for code and data, so 4kB is
312 not really precise. */
313 4, /* size of l2 cache */
314 0, /* size of prefetch block */
315 0, /* number of parallel prefetches */
316 1, /* Branch cost */
317 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
318 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
319 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
320 COSTS_N_INSNS (3), /* cost of FABS instruction. */
321 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
322 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
323 i486_memcpy,
324 i486_memset,
325 1, /* scalar_stmt_cost. */
326 1, /* scalar load_cost. */
327 1, /* scalar_store_cost. */
328 1, /* vec_stmt_cost. */
329 1, /* vec_to_scalar_cost. */
330 1, /* scalar_to_vec_cost. */
331 1, /* vec_align_load_cost. */
332 2, /* vec_unalign_load_cost. */
333 1, /* vec_store_cost. */
334 3, /* cond_taken_branch_cost. */
335 1, /* cond_not_taken_branch_cost. */
336 };
337
338 static stringop_algs pentium_memcpy[2] = {
339 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
340 DUMMY_STRINGOP_ALGS};
341 static stringop_algs pentium_memset[2] = {
342 {libcall, {{-1, rep_prefix_4_byte, false}}},
343 DUMMY_STRINGOP_ALGS};
344
345 static const
346 struct processor_costs pentium_cost = {
347 COSTS_N_INSNS (1), /* cost of an add instruction */
348 COSTS_N_INSNS (1), /* cost of a lea instruction */
349 COSTS_N_INSNS (4), /* variable shift costs */
350 COSTS_N_INSNS (1), /* constant shift costs */
351 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
352 COSTS_N_INSNS (11), /* HI */
353 COSTS_N_INSNS (11), /* SI */
354 COSTS_N_INSNS (11), /* DI */
355 COSTS_N_INSNS (11)}, /* other */
356 0, /* cost of multiply per each bit set */
357 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
358 COSTS_N_INSNS (25), /* HI */
359 COSTS_N_INSNS (25), /* SI */
360 COSTS_N_INSNS (25), /* DI */
361 COSTS_N_INSNS (25)}, /* other */
362 COSTS_N_INSNS (3), /* cost of movsx */
363 COSTS_N_INSNS (2), /* cost of movzx */
364 8, /* "large" insn */
365 6, /* MOVE_RATIO */
366 6, /* cost for loading QImode using movzbl */
367 {2, 4, 2}, /* cost of loading integer registers
368 in QImode, HImode and SImode.
369 Relative to reg-reg move (2). */
370 {2, 4, 2}, /* cost of storing integer registers */
371 2, /* cost of reg,reg fld/fst */
372 {2, 2, 6}, /* cost of loading fp registers
373 in SFmode, DFmode and XFmode */
374 {4, 4, 6}, /* cost of storing fp registers
375 in SFmode, DFmode and XFmode */
376 8, /* cost of moving MMX register */
377 {8, 8}, /* cost of loading MMX registers
378 in SImode and DImode */
379 {8, 8}, /* cost of storing MMX registers
380 in SImode and DImode */
381 2, /* cost of moving SSE register */
382 {4, 8, 16}, /* cost of loading SSE registers
383 in SImode, DImode and TImode */
384 {4, 8, 16}, /* cost of storing SSE registers
385 in SImode, DImode and TImode */
386 3, /* MMX or SSE register to integer */
387 8, /* size of l1 cache. */
388 8, /* size of l2 cache */
389 0, /* size of prefetch block */
390 0, /* number of parallel prefetches */
391 2, /* Branch cost */
392 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
393 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
394 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
395 COSTS_N_INSNS (1), /* cost of FABS instruction. */
396 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
397 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
398 pentium_memcpy,
399 pentium_memset,
400 1, /* scalar_stmt_cost. */
401 1, /* scalar load_cost. */
402 1, /* scalar_store_cost. */
403 1, /* vec_stmt_cost. */
404 1, /* vec_to_scalar_cost. */
405 1, /* scalar_to_vec_cost. */
406 1, /* vec_align_load_cost. */
407 2, /* vec_unalign_load_cost. */
408 1, /* vec_store_cost. */
409 3, /* cond_taken_branch_cost. */
410 1, /* cond_not_taken_branch_cost. */
411 };
412
413 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
414 (we ensure the alignment). For small blocks inline loop is still a
415 noticeable win, for bigger blocks either rep movsl or rep movsb is
416 way to go. Rep movsb has apparently more expensive startup time in CPU,
417 but after 4K the difference is down in the noise. */
418 static stringop_algs pentiumpro_memcpy[2] = {
419 {rep_prefix_4_byte, {{128, loop, false}, {1024, unrolled_loop, false},
420 {8192, rep_prefix_4_byte, false},
421 {-1, rep_prefix_1_byte, false}}},
422 DUMMY_STRINGOP_ALGS};
423 static stringop_algs pentiumpro_memset[2] = {
424 {rep_prefix_4_byte, {{1024, unrolled_loop, false},
425 {8192, rep_prefix_4_byte, false},
426 {-1, libcall, false}}},
427 DUMMY_STRINGOP_ALGS};
428 static const
429 struct processor_costs pentiumpro_cost = {
430 COSTS_N_INSNS (1), /* cost of an add instruction */
431 COSTS_N_INSNS (1), /* cost of a lea instruction */
432 COSTS_N_INSNS (1), /* variable shift costs */
433 COSTS_N_INSNS (1), /* constant shift costs */
434 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
435 COSTS_N_INSNS (4), /* HI */
436 COSTS_N_INSNS (4), /* SI */
437 COSTS_N_INSNS (4), /* DI */
438 COSTS_N_INSNS (4)}, /* other */
439 0, /* cost of multiply per each bit set */
440 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
441 COSTS_N_INSNS (17), /* HI */
442 COSTS_N_INSNS (17), /* SI */
443 COSTS_N_INSNS (17), /* DI */
444 COSTS_N_INSNS (17)}, /* other */
445 COSTS_N_INSNS (1), /* cost of movsx */
446 COSTS_N_INSNS (1), /* cost of movzx */
447 8, /* "large" insn */
448 6, /* MOVE_RATIO */
449 2, /* cost for loading QImode using movzbl */
450 {4, 4, 4}, /* cost of loading integer registers
451 in QImode, HImode and SImode.
452 Relative to reg-reg move (2). */
453 {2, 2, 2}, /* cost of storing integer registers */
454 2, /* cost of reg,reg fld/fst */
455 {2, 2, 6}, /* cost of loading fp registers
456 in SFmode, DFmode and XFmode */
457 {4, 4, 6}, /* cost of storing fp registers
458 in SFmode, DFmode and XFmode */
459 2, /* cost of moving MMX register */
460 {2, 2}, /* cost of loading MMX registers
461 in SImode and DImode */
462 {2, 2}, /* cost of storing MMX registers
463 in SImode and DImode */
464 2, /* cost of moving SSE register */
465 {2, 2, 8}, /* cost of loading SSE registers
466 in SImode, DImode and TImode */
467 {2, 2, 8}, /* cost of storing SSE registers
468 in SImode, DImode and TImode */
469 3, /* MMX or SSE register to integer */
470 8, /* size of l1 cache. */
471 256, /* size of l2 cache */
472 32, /* size of prefetch block */
473 6, /* number of parallel prefetches */
474 2, /* Branch cost */
475 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
476 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
477 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
478 COSTS_N_INSNS (2), /* cost of FABS instruction. */
479 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
480 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
481 pentiumpro_memcpy,
482 pentiumpro_memset,
483 1, /* scalar_stmt_cost. */
484 1, /* scalar load_cost. */
485 1, /* scalar_store_cost. */
486 1, /* vec_stmt_cost. */
487 1, /* vec_to_scalar_cost. */
488 1, /* scalar_to_vec_cost. */
489 1, /* vec_align_load_cost. */
490 2, /* vec_unalign_load_cost. */
491 1, /* vec_store_cost. */
492 3, /* cond_taken_branch_cost. */
493 1, /* cond_not_taken_branch_cost. */
494 };
495
496 static stringop_algs geode_memcpy[2] = {
497 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
498 DUMMY_STRINGOP_ALGS};
499 static stringop_algs geode_memset[2] = {
500 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
501 DUMMY_STRINGOP_ALGS};
502 static const
503 struct processor_costs geode_cost = {
504 COSTS_N_INSNS (1), /* cost of an add instruction */
505 COSTS_N_INSNS (1), /* cost of a lea instruction */
506 COSTS_N_INSNS (2), /* variable shift costs */
507 COSTS_N_INSNS (1), /* constant shift costs */
508 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
509 COSTS_N_INSNS (4), /* HI */
510 COSTS_N_INSNS (7), /* SI */
511 COSTS_N_INSNS (7), /* DI */
512 COSTS_N_INSNS (7)}, /* other */
513 0, /* cost of multiply per each bit set */
514 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
515 COSTS_N_INSNS (23), /* HI */
516 COSTS_N_INSNS (39), /* SI */
517 COSTS_N_INSNS (39), /* DI */
518 COSTS_N_INSNS (39)}, /* other */
519 COSTS_N_INSNS (1), /* cost of movsx */
520 COSTS_N_INSNS (1), /* cost of movzx */
521 8, /* "large" insn */
522 4, /* MOVE_RATIO */
523 1, /* cost for loading QImode using movzbl */
524 {1, 1, 1}, /* cost of loading integer registers
525 in QImode, HImode and SImode.
526 Relative to reg-reg move (2). */
527 {1, 1, 1}, /* cost of storing integer registers */
528 1, /* cost of reg,reg fld/fst */
529 {1, 1, 1}, /* cost of loading fp registers
530 in SFmode, DFmode and XFmode */
531 {4, 6, 6}, /* cost of storing fp registers
532 in SFmode, DFmode and XFmode */
533
534 1, /* cost of moving MMX register */
535 {1, 1}, /* cost of loading MMX registers
536 in SImode and DImode */
537 {1, 1}, /* cost of storing MMX registers
538 in SImode and DImode */
539 1, /* cost of moving SSE register */
540 {1, 1, 1}, /* cost of loading SSE registers
541 in SImode, DImode and TImode */
542 {1, 1, 1}, /* cost of storing SSE registers
543 in SImode, DImode and TImode */
544 1, /* MMX or SSE register to integer */
545 64, /* size of l1 cache. */
546 128, /* size of l2 cache. */
547 32, /* size of prefetch block */
548 1, /* number of parallel prefetches */
549 1, /* Branch cost */
550 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
551 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
552 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
553 COSTS_N_INSNS (1), /* cost of FABS instruction. */
554 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
555 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
556 geode_memcpy,
557 geode_memset,
558 1, /* scalar_stmt_cost. */
559 1, /* scalar load_cost. */
560 1, /* scalar_store_cost. */
561 1, /* vec_stmt_cost. */
562 1, /* vec_to_scalar_cost. */
563 1, /* scalar_to_vec_cost. */
564 1, /* vec_align_load_cost. */
565 2, /* vec_unalign_load_cost. */
566 1, /* vec_store_cost. */
567 3, /* cond_taken_branch_cost. */
568 1, /* cond_not_taken_branch_cost. */
569 };
570
571 static stringop_algs k6_memcpy[2] = {
572 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
573 DUMMY_STRINGOP_ALGS};
574 static stringop_algs k6_memset[2] = {
575 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
576 DUMMY_STRINGOP_ALGS};
577 static const
578 struct processor_costs k6_cost = {
579 COSTS_N_INSNS (1), /* cost of an add instruction */
580 COSTS_N_INSNS (2), /* cost of a lea instruction */
581 COSTS_N_INSNS (1), /* variable shift costs */
582 COSTS_N_INSNS (1), /* constant shift costs */
583 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
584 COSTS_N_INSNS (3), /* HI */
585 COSTS_N_INSNS (3), /* SI */
586 COSTS_N_INSNS (3), /* DI */
587 COSTS_N_INSNS (3)}, /* other */
588 0, /* cost of multiply per each bit set */
589 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
590 COSTS_N_INSNS (18), /* HI */
591 COSTS_N_INSNS (18), /* SI */
592 COSTS_N_INSNS (18), /* DI */
593 COSTS_N_INSNS (18)}, /* other */
594 COSTS_N_INSNS (2), /* cost of movsx */
595 COSTS_N_INSNS (2), /* cost of movzx */
596 8, /* "large" insn */
597 4, /* MOVE_RATIO */
598 3, /* cost for loading QImode using movzbl */
599 {4, 5, 4}, /* cost of loading integer registers
600 in QImode, HImode and SImode.
601 Relative to reg-reg move (2). */
602 {2, 3, 2}, /* cost of storing integer registers */
603 4, /* cost of reg,reg fld/fst */
604 {6, 6, 6}, /* cost of loading fp registers
605 in SFmode, DFmode and XFmode */
606 {4, 4, 4}, /* cost of storing fp registers
607 in SFmode, DFmode and XFmode */
608 2, /* cost of moving MMX register */
609 {2, 2}, /* cost of loading MMX registers
610 in SImode and DImode */
611 {2, 2}, /* cost of storing MMX registers
612 in SImode and DImode */
613 2, /* cost of moving SSE register */
614 {2, 2, 8}, /* cost of loading SSE registers
615 in SImode, DImode and TImode */
616 {2, 2, 8}, /* cost of storing SSE registers
617 in SImode, DImode and TImode */
618 6, /* MMX or SSE register to integer */
619 32, /* size of l1 cache. */
620 32, /* size of l2 cache. Some models
621 have integrated l2 cache, but
622 optimizing for k6 is not important
623 enough to worry about that. */
624 32, /* size of prefetch block */
625 1, /* number of parallel prefetches */
626 1, /* Branch cost */
627 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
628 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
629 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
630 COSTS_N_INSNS (2), /* cost of FABS instruction. */
631 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
632 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
633 k6_memcpy,
634 k6_memset,
635 1, /* scalar_stmt_cost. */
636 1, /* scalar load_cost. */
637 1, /* scalar_store_cost. */
638 1, /* vec_stmt_cost. */
639 1, /* vec_to_scalar_cost. */
640 1, /* scalar_to_vec_cost. */
641 1, /* vec_align_load_cost. */
642 2, /* vec_unalign_load_cost. */
643 1, /* vec_store_cost. */
644 3, /* cond_taken_branch_cost. */
645 1, /* cond_not_taken_branch_cost. */
646 };
647
648 /* For some reason, Athlon deals better with REP prefix (relative to loops)
649 compared to K8. Alignment becomes important after 8 bytes for memcpy and
650 128 bytes for memset. */
651 static stringop_algs athlon_memcpy[2] = {
652 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
653 DUMMY_STRINGOP_ALGS};
654 static stringop_algs athlon_memset[2] = {
655 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
656 DUMMY_STRINGOP_ALGS};
657 static const
658 struct processor_costs athlon_cost = {
659 COSTS_N_INSNS (1), /* cost of an add instruction */
660 COSTS_N_INSNS (2), /* cost of a lea instruction */
661 COSTS_N_INSNS (1), /* variable shift costs */
662 COSTS_N_INSNS (1), /* constant shift costs */
663 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
664 COSTS_N_INSNS (5), /* HI */
665 COSTS_N_INSNS (5), /* SI */
666 COSTS_N_INSNS (5), /* DI */
667 COSTS_N_INSNS (5)}, /* other */
668 0, /* cost of multiply per each bit set */
669 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
670 COSTS_N_INSNS (26), /* HI */
671 COSTS_N_INSNS (42), /* SI */
672 COSTS_N_INSNS (74), /* DI */
673 COSTS_N_INSNS (74)}, /* other */
674 COSTS_N_INSNS (1), /* cost of movsx */
675 COSTS_N_INSNS (1), /* cost of movzx */
676 8, /* "large" insn */
677 9, /* MOVE_RATIO */
678 4, /* cost for loading QImode using movzbl */
679 {3, 4, 3}, /* cost of loading integer registers
680 in QImode, HImode and SImode.
681 Relative to reg-reg move (2). */
682 {3, 4, 3}, /* cost of storing integer registers */
683 4, /* cost of reg,reg fld/fst */
684 {4, 4, 12}, /* cost of loading fp registers
685 in SFmode, DFmode and XFmode */
686 {6, 6, 8}, /* cost of storing fp registers
687 in SFmode, DFmode and XFmode */
688 2, /* cost of moving MMX register */
689 {4, 4}, /* cost of loading MMX registers
690 in SImode and DImode */
691 {4, 4}, /* cost of storing MMX registers
692 in SImode and DImode */
693 2, /* cost of moving SSE register */
694 {4, 4, 6}, /* cost of loading SSE registers
695 in SImode, DImode and TImode */
696 {4, 4, 5}, /* cost of storing SSE registers
697 in SImode, DImode and TImode */
698 5, /* MMX or SSE register to integer */
699 64, /* size of l1 cache. */
700 256, /* size of l2 cache. */
701 64, /* size of prefetch block */
702 6, /* number of parallel prefetches */
703 5, /* Branch cost */
704 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
705 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
706 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
707 COSTS_N_INSNS (2), /* cost of FABS instruction. */
708 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
709 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
710 athlon_memcpy,
711 athlon_memset,
712 1, /* scalar_stmt_cost. */
713 1, /* scalar load_cost. */
714 1, /* scalar_store_cost. */
715 1, /* vec_stmt_cost. */
716 1, /* vec_to_scalar_cost. */
717 1, /* scalar_to_vec_cost. */
718 1, /* vec_align_load_cost. */
719 2, /* vec_unalign_load_cost. */
720 1, /* vec_store_cost. */
721 3, /* cond_taken_branch_cost. */
722 1, /* cond_not_taken_branch_cost. */
723 };
724
725 /* K8 has optimized REP instruction for medium sized blocks, but for very
726 small blocks it is better to use loop. For large blocks, libcall can
727 do nontemporary accesses and beat inline considerably. */
728 static stringop_algs k8_memcpy[2] = {
729 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
730 {-1, rep_prefix_4_byte, false}}},
731 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
732 {-1, libcall, false}}}};
733 static stringop_algs k8_memset[2] = {
734 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
735 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
736 {libcall, {{48, unrolled_loop, false},
737 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
738 static const
739 struct processor_costs k8_cost = {
740 COSTS_N_INSNS (1), /* cost of an add instruction */
741 COSTS_N_INSNS (2), /* cost of a lea instruction */
742 COSTS_N_INSNS (1), /* variable shift costs */
743 COSTS_N_INSNS (1), /* constant shift costs */
744 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
745 COSTS_N_INSNS (4), /* HI */
746 COSTS_N_INSNS (3), /* SI */
747 COSTS_N_INSNS (4), /* DI */
748 COSTS_N_INSNS (5)}, /* other */
749 0, /* cost of multiply per each bit set */
750 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
751 COSTS_N_INSNS (26), /* HI */
752 COSTS_N_INSNS (42), /* SI */
753 COSTS_N_INSNS (74), /* DI */
754 COSTS_N_INSNS (74)}, /* other */
755 COSTS_N_INSNS (1), /* cost of movsx */
756 COSTS_N_INSNS (1), /* cost of movzx */
757 8, /* "large" insn */
758 9, /* MOVE_RATIO */
759 4, /* cost for loading QImode using movzbl */
760 {3, 4, 3}, /* cost of loading integer registers
761 in QImode, HImode and SImode.
762 Relative to reg-reg move (2). */
763 {3, 4, 3}, /* cost of storing integer registers */
764 4, /* cost of reg,reg fld/fst */
765 {4, 4, 12}, /* cost of loading fp registers
766 in SFmode, DFmode and XFmode */
767 {6, 6, 8}, /* cost of storing fp registers
768 in SFmode, DFmode and XFmode */
769 2, /* cost of moving MMX register */
770 {3, 3}, /* cost of loading MMX registers
771 in SImode and DImode */
772 {4, 4}, /* cost of storing MMX registers
773 in SImode and DImode */
774 2, /* cost of moving SSE register */
775 {4, 3, 6}, /* cost of loading SSE registers
776 in SImode, DImode and TImode */
777 {4, 4, 5}, /* cost of storing SSE registers
778 in SImode, DImode and TImode */
779 5, /* MMX or SSE register to integer */
780 64, /* size of l1 cache. */
781 512, /* size of l2 cache. */
782 64, /* size of prefetch block */
783 /* New AMD processors never drop prefetches; if they cannot be performed
784 immediately, they are queued. We set number of simultaneous prefetches
785 to a large constant to reflect this (it probably is not a good idea not
786 to limit number of prefetches at all, as their execution also takes some
787 time). */
788 100, /* number of parallel prefetches */
789 3, /* Branch cost */
790 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
791 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
792 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
793 COSTS_N_INSNS (2), /* cost of FABS instruction. */
794 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
795 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
796
797 k8_memcpy,
798 k8_memset,
799 4, /* scalar_stmt_cost. */
800 2, /* scalar load_cost. */
801 2, /* scalar_store_cost. */
802 5, /* vec_stmt_cost. */
803 0, /* vec_to_scalar_cost. */
804 2, /* scalar_to_vec_cost. */
805 2, /* vec_align_load_cost. */
806 3, /* vec_unalign_load_cost. */
807 3, /* vec_store_cost. */
808 3, /* cond_taken_branch_cost. */
809 2, /* cond_not_taken_branch_cost. */
810 };
811
812 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
813 very small blocks it is better to use loop. For large blocks, libcall can
814 do nontemporary accesses and beat inline considerably. */
815 static stringop_algs amdfam10_memcpy[2] = {
816 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
817 {-1, rep_prefix_4_byte, false}}},
818 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
819 {-1, libcall, false}}}};
820 static stringop_algs amdfam10_memset[2] = {
821 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
822 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
823 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
824 {-1, libcall, false}}}};
825 struct processor_costs amdfam10_cost = {
826 COSTS_N_INSNS (1), /* cost of an add instruction */
827 COSTS_N_INSNS (2), /* cost of a lea instruction */
828 COSTS_N_INSNS (1), /* variable shift costs */
829 COSTS_N_INSNS (1), /* constant shift costs */
830 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
831 COSTS_N_INSNS (4), /* HI */
832 COSTS_N_INSNS (3), /* SI */
833 COSTS_N_INSNS (4), /* DI */
834 COSTS_N_INSNS (5)}, /* other */
835 0, /* cost of multiply per each bit set */
836 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
837 COSTS_N_INSNS (35), /* HI */
838 COSTS_N_INSNS (51), /* SI */
839 COSTS_N_INSNS (83), /* DI */
840 COSTS_N_INSNS (83)}, /* other */
841 COSTS_N_INSNS (1), /* cost of movsx */
842 COSTS_N_INSNS (1), /* cost of movzx */
843 8, /* "large" insn */
844 9, /* MOVE_RATIO */
845 4, /* cost for loading QImode using movzbl */
846 {3, 4, 3}, /* cost of loading integer registers
847 in QImode, HImode and SImode.
848 Relative to reg-reg move (2). */
849 {3, 4, 3}, /* cost of storing integer registers */
850 4, /* cost of reg,reg fld/fst */
851 {4, 4, 12}, /* cost of loading fp registers
852 in SFmode, DFmode and XFmode */
853 {6, 6, 8}, /* cost of storing fp registers
854 in SFmode, DFmode and XFmode */
855 2, /* cost of moving MMX register */
856 {3, 3}, /* cost of loading MMX registers
857 in SImode and DImode */
858 {4, 4}, /* cost of storing MMX registers
859 in SImode and DImode */
860 2, /* cost of moving SSE register */
861 {4, 4, 3}, /* cost of loading SSE registers
862 in SImode, DImode and TImode */
863 {4, 4, 5}, /* cost of storing SSE registers
864 in SImode, DImode and TImode */
865 3, /* MMX or SSE register to integer */
866 /* On K8:
867 MOVD reg64, xmmreg Double FSTORE 4
868 MOVD reg32, xmmreg Double FSTORE 4
869 On AMDFAM10:
870 MOVD reg64, xmmreg Double FADD 3
871 1/1 1/1
872 MOVD reg32, xmmreg Double FADD 3
873 1/1 1/1 */
874 64, /* size of l1 cache. */
875 512, /* size of l2 cache. */
876 64, /* size of prefetch block */
877 /* New AMD processors never drop prefetches; if they cannot be performed
878 immediately, they are queued. We set number of simultaneous prefetches
879 to a large constant to reflect this (it probably is not a good idea not
880 to limit number of prefetches at all, as their execution also takes some
881 time). */
882 100, /* number of parallel prefetches */
883 2, /* Branch cost */
884 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
885 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
886 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
887 COSTS_N_INSNS (2), /* cost of FABS instruction. */
888 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
889 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
890
891 amdfam10_memcpy,
892 amdfam10_memset,
893 4, /* scalar_stmt_cost. */
894 2, /* scalar load_cost. */
895 2, /* scalar_store_cost. */
896 6, /* vec_stmt_cost. */
897 0, /* vec_to_scalar_cost. */
898 2, /* scalar_to_vec_cost. */
899 2, /* vec_align_load_cost. */
900 2, /* vec_unalign_load_cost. */
901 2, /* vec_store_cost. */
902 2, /* cond_taken_branch_cost. */
903 1, /* cond_not_taken_branch_cost. */
904 };
905
906 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
907 very small blocks it is better to use loop. For large blocks, libcall
908 can do nontemporary accesses and beat inline considerably. */
909 static stringop_algs bdver1_memcpy[2] = {
910 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
911 {-1, rep_prefix_4_byte, false}}},
912 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
913 {-1, libcall, false}}}};
914 static stringop_algs bdver1_memset[2] = {
915 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
916 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
917 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
918 {-1, libcall, false}}}};
919
920 const struct processor_costs bdver1_cost = {
921 COSTS_N_INSNS (1), /* cost of an add instruction */
922 COSTS_N_INSNS (1), /* cost of a lea instruction */
923 COSTS_N_INSNS (1), /* variable shift costs */
924 COSTS_N_INSNS (1), /* constant shift costs */
925 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
926 COSTS_N_INSNS (4), /* HI */
927 COSTS_N_INSNS (4), /* SI */
928 COSTS_N_INSNS (6), /* DI */
929 COSTS_N_INSNS (6)}, /* other */
930 0, /* cost of multiply per each bit set */
931 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
932 COSTS_N_INSNS (35), /* HI */
933 COSTS_N_INSNS (51), /* SI */
934 COSTS_N_INSNS (83), /* DI */
935 COSTS_N_INSNS (83)}, /* other */
936 COSTS_N_INSNS (1), /* cost of movsx */
937 COSTS_N_INSNS (1), /* cost of movzx */
938 8, /* "large" insn */
939 9, /* MOVE_RATIO */
940 4, /* cost for loading QImode using movzbl */
941 {5, 5, 4}, /* cost of loading integer registers
942 in QImode, HImode and SImode.
943 Relative to reg-reg move (2). */
944 {4, 4, 4}, /* cost of storing integer registers */
945 2, /* cost of reg,reg fld/fst */
946 {5, 5, 12}, /* cost of loading fp registers
947 in SFmode, DFmode and XFmode */
948 {4, 4, 8}, /* cost of storing fp registers
949 in SFmode, DFmode and XFmode */
950 2, /* cost of moving MMX register */
951 {4, 4}, /* cost of loading MMX registers
952 in SImode and DImode */
953 {4, 4}, /* cost of storing MMX registers
954 in SImode and DImode */
955 2, /* cost of moving SSE register */
956 {4, 4, 4}, /* cost of loading SSE registers
957 in SImode, DImode and TImode */
958 {4, 4, 4}, /* cost of storing SSE registers
959 in SImode, DImode and TImode */
960 2, /* MMX or SSE register to integer */
961 /* On K8:
962 MOVD reg64, xmmreg Double FSTORE 4
963 MOVD reg32, xmmreg Double FSTORE 4
964 On AMDFAM10:
965 MOVD reg64, xmmreg Double FADD 3
966 1/1 1/1
967 MOVD reg32, xmmreg Double FADD 3
968 1/1 1/1 */
969 16, /* size of l1 cache. */
970 2048, /* size of l2 cache. */
971 64, /* size of prefetch block */
972 /* New AMD processors never drop prefetches; if they cannot be performed
973 immediately, they are queued. We set number of simultaneous prefetches
974 to a large constant to reflect this (it probably is not a good idea not
975 to limit number of prefetches at all, as their execution also takes some
976 time). */
977 100, /* number of parallel prefetches */
978 2, /* Branch cost */
979 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
980 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
981 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
982 COSTS_N_INSNS (2), /* cost of FABS instruction. */
983 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
984 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
985
986 bdver1_memcpy,
987 bdver1_memset,
988 6, /* scalar_stmt_cost. */
989 4, /* scalar load_cost. */
990 4, /* scalar_store_cost. */
991 6, /* vec_stmt_cost. */
992 0, /* vec_to_scalar_cost. */
993 2, /* scalar_to_vec_cost. */
994 4, /* vec_align_load_cost. */
995 4, /* vec_unalign_load_cost. */
996 4, /* vec_store_cost. */
997 2, /* cond_taken_branch_cost. */
998 1, /* cond_not_taken_branch_cost. */
999 };
1000
1001 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1002 very small blocks it is better to use loop. For large blocks, libcall
1003 can do nontemporary accesses and beat inline considerably. */
1004
1005 static stringop_algs bdver2_memcpy[2] = {
1006 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1007 {-1, rep_prefix_4_byte, false}}},
1008 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1009 {-1, libcall, false}}}};
1010 static stringop_algs bdver2_memset[2] = {
1011 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1012 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1013 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1014 {-1, libcall, false}}}};
1015
1016 const struct processor_costs bdver2_cost = {
1017 COSTS_N_INSNS (1), /* cost of an add instruction */
1018 COSTS_N_INSNS (1), /* cost of a lea instruction */
1019 COSTS_N_INSNS (1), /* variable shift costs */
1020 COSTS_N_INSNS (1), /* constant shift costs */
1021 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1022 COSTS_N_INSNS (4), /* HI */
1023 COSTS_N_INSNS (4), /* SI */
1024 COSTS_N_INSNS (6), /* DI */
1025 COSTS_N_INSNS (6)}, /* other */
1026 0, /* cost of multiply per each bit set */
1027 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1028 COSTS_N_INSNS (35), /* HI */
1029 COSTS_N_INSNS (51), /* SI */
1030 COSTS_N_INSNS (83), /* DI */
1031 COSTS_N_INSNS (83)}, /* other */
1032 COSTS_N_INSNS (1), /* cost of movsx */
1033 COSTS_N_INSNS (1), /* cost of movzx */
1034 8, /* "large" insn */
1035 9, /* MOVE_RATIO */
1036 4, /* cost for loading QImode using movzbl */
1037 {5, 5, 4}, /* cost of loading integer registers
1038 in QImode, HImode and SImode.
1039 Relative to reg-reg move (2). */
1040 {4, 4, 4}, /* cost of storing integer registers */
1041 2, /* cost of reg,reg fld/fst */
1042 {5, 5, 12}, /* cost of loading fp registers
1043 in SFmode, DFmode and XFmode */
1044 {4, 4, 8}, /* cost of storing fp registers
1045 in SFmode, DFmode and XFmode */
1046 2, /* cost of moving MMX register */
1047 {4, 4}, /* cost of loading MMX registers
1048 in SImode and DImode */
1049 {4, 4}, /* cost of storing MMX registers
1050 in SImode and DImode */
1051 2, /* cost of moving SSE register */
1052 {4, 4, 4}, /* cost of loading SSE registers
1053 in SImode, DImode and TImode */
1054 {4, 4, 4}, /* cost of storing SSE registers
1055 in SImode, DImode and TImode */
1056 2, /* MMX or SSE register to integer */
1057 /* On K8:
1058 MOVD reg64, xmmreg Double FSTORE 4
1059 MOVD reg32, xmmreg Double FSTORE 4
1060 On AMDFAM10:
1061 MOVD reg64, xmmreg Double FADD 3
1062 1/1 1/1
1063 MOVD reg32, xmmreg Double FADD 3
1064 1/1 1/1 */
1065 16, /* size of l1 cache. */
1066 2048, /* size of l2 cache. */
1067 64, /* size of prefetch block */
1068 /* New AMD processors never drop prefetches; if they cannot be performed
1069 immediately, they are queued. We set number of simultaneous prefetches
1070 to a large constant to reflect this (it probably is not a good idea not
1071 to limit number of prefetches at all, as their execution also takes some
1072 time). */
1073 100, /* number of parallel prefetches */
1074 2, /* Branch cost */
1075 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1076 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1077 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1078 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1079 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1080 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1081
1082 bdver2_memcpy,
1083 bdver2_memset,
1084 6, /* scalar_stmt_cost. */
1085 4, /* scalar load_cost. */
1086 4, /* scalar_store_cost. */
1087 6, /* vec_stmt_cost. */
1088 0, /* vec_to_scalar_cost. */
1089 2, /* scalar_to_vec_cost. */
1090 4, /* vec_align_load_cost. */
1091 4, /* vec_unalign_load_cost. */
1092 4, /* vec_store_cost. */
1093 2, /* cond_taken_branch_cost. */
1094 1, /* cond_not_taken_branch_cost. */
1095 };
1096
1097
1098 /* BDVER3 has optimized REP instruction for medium sized blocks, but for
1099 very small blocks it is better to use loop. For large blocks, libcall
1100 can do nontemporary accesses and beat inline considerably. */
1101 static stringop_algs bdver3_memcpy[2] = {
1102 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1103 {-1, rep_prefix_4_byte, false}}},
1104 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1105 {-1, libcall, false}}}};
1106 static stringop_algs bdver3_memset[2] = {
1107 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1108 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1109 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1110 {-1, libcall, false}}}};
1111 struct processor_costs bdver3_cost = {
1112 COSTS_N_INSNS (1), /* cost of an add instruction */
1113 COSTS_N_INSNS (1), /* cost of a lea instruction */
1114 COSTS_N_INSNS (1), /* variable shift costs */
1115 COSTS_N_INSNS (1), /* constant shift costs */
1116 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1117 COSTS_N_INSNS (4), /* HI */
1118 COSTS_N_INSNS (4), /* SI */
1119 COSTS_N_INSNS (6), /* DI */
1120 COSTS_N_INSNS (6)}, /* other */
1121 0, /* cost of multiply per each bit set */
1122 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1123 COSTS_N_INSNS (35), /* HI */
1124 COSTS_N_INSNS (51), /* SI */
1125 COSTS_N_INSNS (83), /* DI */
1126 COSTS_N_INSNS (83)}, /* other */
1127 COSTS_N_INSNS (1), /* cost of movsx */
1128 COSTS_N_INSNS (1), /* cost of movzx */
1129 8, /* "large" insn */
1130 9, /* MOVE_RATIO */
1131 4, /* cost for loading QImode using movzbl */
1132 {5, 5, 4}, /* cost of loading integer registers
1133 in QImode, HImode and SImode.
1134 Relative to reg-reg move (2). */
1135 {4, 4, 4}, /* cost of storing integer registers */
1136 2, /* cost of reg,reg fld/fst */
1137 {5, 5, 12}, /* cost of loading fp registers
1138 in SFmode, DFmode and XFmode */
1139 {4, 4, 8}, /* cost of storing fp registers
1140 in SFmode, DFmode and XFmode */
1141 2, /* cost of moving MMX register */
1142 {4, 4}, /* cost of loading MMX registers
1143 in SImode and DImode */
1144 {4, 4}, /* cost of storing MMX registers
1145 in SImode and DImode */
1146 2, /* cost of moving SSE register */
1147 {4, 4, 4}, /* cost of loading SSE registers
1148 in SImode, DImode and TImode */
1149 {4, 4, 4}, /* cost of storing SSE registers
1150 in SImode, DImode and TImode */
1151 2, /* MMX or SSE register to integer */
1152 16, /* size of l1 cache. */
1153 2048, /* size of l2 cache. */
1154 64, /* size of prefetch block */
1155 /* New AMD processors never drop prefetches; if they cannot be performed
1156 immediately, they are queued. We set number of simultaneous prefetches
1157 to a large constant to reflect this (it probably is not a good idea not
1158 to limit number of prefetches at all, as their execution also takes some
1159 time). */
1160 100, /* number of parallel prefetches */
1161 2, /* Branch cost */
1162 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1163 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1164 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1165 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1166 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1167 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1168
1169 bdver3_memcpy,
1170 bdver3_memset,
1171 6, /* scalar_stmt_cost. */
1172 4, /* scalar load_cost. */
1173 4, /* scalar_store_cost. */
1174 6, /* vec_stmt_cost. */
1175 0, /* vec_to_scalar_cost. */
1176 2, /* scalar_to_vec_cost. */
1177 4, /* vec_align_load_cost. */
1178 4, /* vec_unalign_load_cost. */
1179 4, /* vec_store_cost. */
1180 2, /* cond_taken_branch_cost. */
1181 1, /* cond_not_taken_branch_cost. */
1182 };
1183
1184 /* BDVER4 has optimized REP instruction for medium sized blocks, but for
1185 very small blocks it is better to use loop. For large blocks, libcall
1186 can do nontemporary accesses and beat inline considerably. */
1187 static stringop_algs bdver4_memcpy[2] = {
1188 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1189 {-1, rep_prefix_4_byte, false}}},
1190 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1191 {-1, libcall, false}}}};
1192 static stringop_algs bdver4_memset[2] = {
1193 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1194 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1195 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1196 {-1, libcall, false}}}};
1197 struct processor_costs bdver4_cost = {
1198 COSTS_N_INSNS (1), /* cost of an add instruction */
1199 COSTS_N_INSNS (1), /* cost of a lea instruction */
1200 COSTS_N_INSNS (1), /* variable shift costs */
1201 COSTS_N_INSNS (1), /* constant shift costs */
1202 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1203 COSTS_N_INSNS (4), /* HI */
1204 COSTS_N_INSNS (4), /* SI */
1205 COSTS_N_INSNS (6), /* DI */
1206 COSTS_N_INSNS (6)}, /* other */
1207 0, /* cost of multiply per each bit set */
1208 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1209 COSTS_N_INSNS (35), /* HI */
1210 COSTS_N_INSNS (51), /* SI */
1211 COSTS_N_INSNS (83), /* DI */
1212 COSTS_N_INSNS (83)}, /* other */
1213 COSTS_N_INSNS (1), /* cost of movsx */
1214 COSTS_N_INSNS (1), /* cost of movzx */
1215 8, /* "large" insn */
1216 9, /* MOVE_RATIO */
1217 4, /* cost for loading QImode using movzbl */
1218 {5, 5, 4}, /* cost of loading integer registers
1219 in QImode, HImode and SImode.
1220 Relative to reg-reg move (2). */
1221 {4, 4, 4}, /* cost of storing integer registers */
1222 2, /* cost of reg,reg fld/fst */
1223 {5, 5, 12}, /* cost of loading fp registers
1224 in SFmode, DFmode and XFmode */
1225 {4, 4, 8}, /* cost of storing fp registers
1226 in SFmode, DFmode and XFmode */
1227 2, /* cost of moving MMX register */
1228 {4, 4}, /* cost of loading MMX registers
1229 in SImode and DImode */
1230 {4, 4}, /* cost of storing MMX registers
1231 in SImode and DImode */
1232 2, /* cost of moving SSE register */
1233 {4, 4, 4}, /* cost of loading SSE registers
1234 in SImode, DImode and TImode */
1235 {4, 4, 4}, /* cost of storing SSE registers
1236 in SImode, DImode and TImode */
1237 2, /* MMX or SSE register to integer */
1238 16, /* size of l1 cache. */
1239 2048, /* size of l2 cache. */
1240 64, /* size of prefetch block */
1241 /* New AMD processors never drop prefetches; if they cannot be performed
1242 immediately, they are queued. We set number of simultaneous prefetches
1243 to a large constant to reflect this (it probably is not a good idea not
1244 to limit number of prefetches at all, as their execution also takes some
1245 time). */
1246 100, /* number of parallel prefetches */
1247 2, /* Branch cost */
1248 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1249 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1250 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1251 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1252 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1253 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1254
1255 bdver4_memcpy,
1256 bdver4_memset,
1257 6, /* scalar_stmt_cost. */
1258 4, /* scalar load_cost. */
1259 4, /* scalar_store_cost. */
1260 6, /* vec_stmt_cost. */
1261 0, /* vec_to_scalar_cost. */
1262 2, /* scalar_to_vec_cost. */
1263 4, /* vec_align_load_cost. */
1264 4, /* vec_unalign_load_cost. */
1265 4, /* vec_store_cost. */
1266 2, /* cond_taken_branch_cost. */
1267 1, /* cond_not_taken_branch_cost. */
1268 };
1269
1270 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1271 very small blocks it is better to use loop. For large blocks, libcall can
1272 do nontemporary accesses and beat inline considerably. */
1273 static stringop_algs btver1_memcpy[2] = {
1274 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1275 {-1, rep_prefix_4_byte, false}}},
1276 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1277 {-1, libcall, false}}}};
1278 static stringop_algs btver1_memset[2] = {
1279 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1280 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1281 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1282 {-1, libcall, false}}}};
1283 const struct processor_costs btver1_cost = {
1284 COSTS_N_INSNS (1), /* cost of an add instruction */
1285 COSTS_N_INSNS (2), /* cost of a lea instruction */
1286 COSTS_N_INSNS (1), /* variable shift costs */
1287 COSTS_N_INSNS (1), /* constant shift costs */
1288 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1289 COSTS_N_INSNS (4), /* HI */
1290 COSTS_N_INSNS (3), /* SI */
1291 COSTS_N_INSNS (4), /* DI */
1292 COSTS_N_INSNS (5)}, /* other */
1293 0, /* cost of multiply per each bit set */
1294 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1295 COSTS_N_INSNS (35), /* HI */
1296 COSTS_N_INSNS (51), /* SI */
1297 COSTS_N_INSNS (83), /* DI */
1298 COSTS_N_INSNS (83)}, /* other */
1299 COSTS_N_INSNS (1), /* cost of movsx */
1300 COSTS_N_INSNS (1), /* cost of movzx */
1301 8, /* "large" insn */
1302 9, /* MOVE_RATIO */
1303 4, /* cost for loading QImode using movzbl */
1304 {3, 4, 3}, /* cost of loading integer registers
1305 in QImode, HImode and SImode.
1306 Relative to reg-reg move (2). */
1307 {3, 4, 3}, /* cost of storing integer registers */
1308 4, /* cost of reg,reg fld/fst */
1309 {4, 4, 12}, /* cost of loading fp registers
1310 in SFmode, DFmode and XFmode */
1311 {6, 6, 8}, /* cost of storing fp registers
1312 in SFmode, DFmode and XFmode */
1313 2, /* cost of moving MMX register */
1314 {3, 3}, /* cost of loading MMX registers
1315 in SImode and DImode */
1316 {4, 4}, /* cost of storing MMX registers
1317 in SImode and DImode */
1318 2, /* cost of moving SSE register */
1319 {4, 4, 3}, /* cost of loading SSE registers
1320 in SImode, DImode and TImode */
1321 {4, 4, 5}, /* cost of storing SSE registers
1322 in SImode, DImode and TImode */
1323 3, /* MMX or SSE register to integer */
1324 /* On K8:
1325 MOVD reg64, xmmreg Double FSTORE 4
1326 MOVD reg32, xmmreg Double FSTORE 4
1327 On AMDFAM10:
1328 MOVD reg64, xmmreg Double FADD 3
1329 1/1 1/1
1330 MOVD reg32, xmmreg Double FADD 3
1331 1/1 1/1 */
1332 32, /* size of l1 cache. */
1333 512, /* size of l2 cache. */
1334 64, /* size of prefetch block */
1335 100, /* number of parallel prefetches */
1336 2, /* Branch cost */
1337 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1338 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1339 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1340 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1341 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1342 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1343
1344 btver1_memcpy,
1345 btver1_memset,
1346 4, /* scalar_stmt_cost. */
1347 2, /* scalar load_cost. */
1348 2, /* scalar_store_cost. */
1349 6, /* vec_stmt_cost. */
1350 0, /* vec_to_scalar_cost. */
1351 2, /* scalar_to_vec_cost. */
1352 2, /* vec_align_load_cost. */
1353 2, /* vec_unalign_load_cost. */
1354 2, /* vec_store_cost. */
1355 2, /* cond_taken_branch_cost. */
1356 1, /* cond_not_taken_branch_cost. */
1357 };
1358
1359 static stringop_algs btver2_memcpy[2] = {
1360 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1361 {-1, rep_prefix_4_byte, false}}},
1362 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1363 {-1, libcall, false}}}};
1364 static stringop_algs btver2_memset[2] = {
1365 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1366 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1367 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1368 {-1, libcall, false}}}};
1369 const struct processor_costs btver2_cost = {
1370 COSTS_N_INSNS (1), /* cost of an add instruction */
1371 COSTS_N_INSNS (2), /* cost of a lea instruction */
1372 COSTS_N_INSNS (1), /* variable shift costs */
1373 COSTS_N_INSNS (1), /* constant shift costs */
1374 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1375 COSTS_N_INSNS (4), /* HI */
1376 COSTS_N_INSNS (3), /* SI */
1377 COSTS_N_INSNS (4), /* DI */
1378 COSTS_N_INSNS (5)}, /* other */
1379 0, /* cost of multiply per each bit set */
1380 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1381 COSTS_N_INSNS (35), /* HI */
1382 COSTS_N_INSNS (51), /* SI */
1383 COSTS_N_INSNS (83), /* DI */
1384 COSTS_N_INSNS (83)}, /* other */
1385 COSTS_N_INSNS (1), /* cost of movsx */
1386 COSTS_N_INSNS (1), /* cost of movzx */
1387 8, /* "large" insn */
1388 9, /* MOVE_RATIO */
1389 4, /* cost for loading QImode using movzbl */
1390 {3, 4, 3}, /* cost of loading integer registers
1391 in QImode, HImode and SImode.
1392 Relative to reg-reg move (2). */
1393 {3, 4, 3}, /* cost of storing integer registers */
1394 4, /* cost of reg,reg fld/fst */
1395 {4, 4, 12}, /* cost of loading fp registers
1396 in SFmode, DFmode and XFmode */
1397 {6, 6, 8}, /* cost of storing fp registers
1398 in SFmode, DFmode and XFmode */
1399 2, /* cost of moving MMX register */
1400 {3, 3}, /* cost of loading MMX registers
1401 in SImode and DImode */
1402 {4, 4}, /* cost of storing MMX registers
1403 in SImode and DImode */
1404 2, /* cost of moving SSE register */
1405 {4, 4, 3}, /* cost of loading SSE registers
1406 in SImode, DImode and TImode */
1407 {4, 4, 5}, /* cost of storing SSE registers
1408 in SImode, DImode and TImode */
1409 3, /* MMX or SSE register to integer */
1410 /* On K8:
1411 MOVD reg64, xmmreg Double FSTORE 4
1412 MOVD reg32, xmmreg Double FSTORE 4
1413 On AMDFAM10:
1414 MOVD reg64, xmmreg Double FADD 3
1415 1/1 1/1
1416 MOVD reg32, xmmreg Double FADD 3
1417 1/1 1/1 */
1418 32, /* size of l1 cache. */
1419 2048, /* size of l2 cache. */
1420 64, /* size of prefetch block */
1421 100, /* number of parallel prefetches */
1422 2, /* Branch cost */
1423 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1424 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1425 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1426 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1427 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1428 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1429 btver2_memcpy,
1430 btver2_memset,
1431 4, /* scalar_stmt_cost. */
1432 2, /* scalar load_cost. */
1433 2, /* scalar_store_cost. */
1434 6, /* vec_stmt_cost. */
1435 0, /* vec_to_scalar_cost. */
1436 2, /* scalar_to_vec_cost. */
1437 2, /* vec_align_load_cost. */
1438 2, /* vec_unalign_load_cost. */
1439 2, /* vec_store_cost. */
1440 2, /* cond_taken_branch_cost. */
1441 1, /* cond_not_taken_branch_cost. */
1442 };
1443
1444 static stringop_algs pentium4_memcpy[2] = {
1445 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1446 DUMMY_STRINGOP_ALGS};
1447 static stringop_algs pentium4_memset[2] = {
1448 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1449 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1450 DUMMY_STRINGOP_ALGS};
1451
1452 static const
1453 struct processor_costs pentium4_cost = {
1454 COSTS_N_INSNS (1), /* cost of an add instruction */
1455 COSTS_N_INSNS (3), /* cost of a lea instruction */
1456 COSTS_N_INSNS (4), /* variable shift costs */
1457 COSTS_N_INSNS (4), /* constant shift costs */
1458 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1459 COSTS_N_INSNS (15), /* HI */
1460 COSTS_N_INSNS (15), /* SI */
1461 COSTS_N_INSNS (15), /* DI */
1462 COSTS_N_INSNS (15)}, /* other */
1463 0, /* cost of multiply per each bit set */
1464 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1465 COSTS_N_INSNS (56), /* HI */
1466 COSTS_N_INSNS (56), /* SI */
1467 COSTS_N_INSNS (56), /* DI */
1468 COSTS_N_INSNS (56)}, /* other */
1469 COSTS_N_INSNS (1), /* cost of movsx */
1470 COSTS_N_INSNS (1), /* cost of movzx */
1471 16, /* "large" insn */
1472 6, /* MOVE_RATIO */
1473 2, /* cost for loading QImode using movzbl */
1474 {4, 5, 4}, /* cost of loading integer registers
1475 in QImode, HImode and SImode.
1476 Relative to reg-reg move (2). */
1477 {2, 3, 2}, /* cost of storing integer registers */
1478 2, /* cost of reg,reg fld/fst */
1479 {2, 2, 6}, /* cost of loading fp registers
1480 in SFmode, DFmode and XFmode */
1481 {4, 4, 6}, /* cost of storing fp registers
1482 in SFmode, DFmode and XFmode */
1483 2, /* cost of moving MMX register */
1484 {2, 2}, /* cost of loading MMX registers
1485 in SImode and DImode */
1486 {2, 2}, /* cost of storing MMX registers
1487 in SImode and DImode */
1488 12, /* cost of moving SSE register */
1489 {12, 12, 12}, /* cost of loading SSE registers
1490 in SImode, DImode and TImode */
1491 {2, 2, 8}, /* cost of storing SSE registers
1492 in SImode, DImode and TImode */
1493 10, /* MMX or SSE register to integer */
1494 8, /* size of l1 cache. */
1495 256, /* size of l2 cache. */
1496 64, /* size of prefetch block */
1497 6, /* number of parallel prefetches */
1498 2, /* Branch cost */
1499 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1500 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1501 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1502 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1503 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1504 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1505 pentium4_memcpy,
1506 pentium4_memset,
1507 1, /* scalar_stmt_cost. */
1508 1, /* scalar load_cost. */
1509 1, /* scalar_store_cost. */
1510 1, /* vec_stmt_cost. */
1511 1, /* vec_to_scalar_cost. */
1512 1, /* scalar_to_vec_cost. */
1513 1, /* vec_align_load_cost. */
1514 2, /* vec_unalign_load_cost. */
1515 1, /* vec_store_cost. */
1516 3, /* cond_taken_branch_cost. */
1517 1, /* cond_not_taken_branch_cost. */
1518 };
1519
1520 static stringop_algs nocona_memcpy[2] = {
1521 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1522 {libcall, {{32, loop, false}, {20000, rep_prefix_8_byte, false},
1523 {100000, unrolled_loop, false}, {-1, libcall, false}}}};
1524
1525 static stringop_algs nocona_memset[2] = {
1526 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1527 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1528 {libcall, {{24, loop, false}, {64, unrolled_loop, false},
1529 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1530
1531 static const
1532 struct processor_costs nocona_cost = {
1533 COSTS_N_INSNS (1), /* cost of an add instruction */
1534 COSTS_N_INSNS (1), /* cost of a lea instruction */
1535 COSTS_N_INSNS (1), /* variable shift costs */
1536 COSTS_N_INSNS (1), /* constant shift costs */
1537 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1538 COSTS_N_INSNS (10), /* HI */
1539 COSTS_N_INSNS (10), /* SI */
1540 COSTS_N_INSNS (10), /* DI */
1541 COSTS_N_INSNS (10)}, /* other */
1542 0, /* cost of multiply per each bit set */
1543 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1544 COSTS_N_INSNS (66), /* HI */
1545 COSTS_N_INSNS (66), /* SI */
1546 COSTS_N_INSNS (66), /* DI */
1547 COSTS_N_INSNS (66)}, /* other */
1548 COSTS_N_INSNS (1), /* cost of movsx */
1549 COSTS_N_INSNS (1), /* cost of movzx */
1550 16, /* "large" insn */
1551 17, /* MOVE_RATIO */
1552 4, /* cost for loading QImode using movzbl */
1553 {4, 4, 4}, /* cost of loading integer registers
1554 in QImode, HImode and SImode.
1555 Relative to reg-reg move (2). */
1556 {4, 4, 4}, /* cost of storing integer registers */
1557 3, /* cost of reg,reg fld/fst */
1558 {12, 12, 12}, /* cost of loading fp registers
1559 in SFmode, DFmode and XFmode */
1560 {4, 4, 4}, /* cost of storing fp registers
1561 in SFmode, DFmode and XFmode */
1562 6, /* cost of moving MMX register */
1563 {12, 12}, /* cost of loading MMX registers
1564 in SImode and DImode */
1565 {12, 12}, /* cost of storing MMX registers
1566 in SImode and DImode */
1567 6, /* cost of moving SSE register */
1568 {12, 12, 12}, /* cost of loading SSE registers
1569 in SImode, DImode and TImode */
1570 {12, 12, 12}, /* cost of storing SSE registers
1571 in SImode, DImode and TImode */
1572 8, /* MMX or SSE register to integer */
1573 8, /* size of l1 cache. */
1574 1024, /* size of l2 cache. */
1575 64, /* size of prefetch block */
1576 8, /* number of parallel prefetches */
1577 1, /* Branch cost */
1578 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1579 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1580 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1581 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1582 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1583 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1584 nocona_memcpy,
1585 nocona_memset,
1586 1, /* scalar_stmt_cost. */
1587 1, /* scalar load_cost. */
1588 1, /* scalar_store_cost. */
1589 1, /* vec_stmt_cost. */
1590 1, /* vec_to_scalar_cost. */
1591 1, /* scalar_to_vec_cost. */
1592 1, /* vec_align_load_cost. */
1593 2, /* vec_unalign_load_cost. */
1594 1, /* vec_store_cost. */
1595 3, /* cond_taken_branch_cost. */
1596 1, /* cond_not_taken_branch_cost. */
1597 };
1598
1599 static stringop_algs atom_memcpy[2] = {
1600 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1601 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1602 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1603 static stringop_algs atom_memset[2] = {
1604 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1605 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1606 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1607 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1608 static const
1609 struct processor_costs atom_cost = {
1610 COSTS_N_INSNS (1), /* cost of an add instruction */
1611 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1612 COSTS_N_INSNS (1), /* variable shift costs */
1613 COSTS_N_INSNS (1), /* constant shift costs */
1614 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1615 COSTS_N_INSNS (4), /* HI */
1616 COSTS_N_INSNS (3), /* SI */
1617 COSTS_N_INSNS (4), /* DI */
1618 COSTS_N_INSNS (2)}, /* other */
1619 0, /* cost of multiply per each bit set */
1620 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1621 COSTS_N_INSNS (26), /* HI */
1622 COSTS_N_INSNS (42), /* SI */
1623 COSTS_N_INSNS (74), /* DI */
1624 COSTS_N_INSNS (74)}, /* other */
1625 COSTS_N_INSNS (1), /* cost of movsx */
1626 COSTS_N_INSNS (1), /* cost of movzx */
1627 8, /* "large" insn */
1628 17, /* MOVE_RATIO */
1629 4, /* cost for loading QImode using movzbl */
1630 {4, 4, 4}, /* cost of loading integer registers
1631 in QImode, HImode and SImode.
1632 Relative to reg-reg move (2). */
1633 {4, 4, 4}, /* cost of storing integer registers */
1634 4, /* cost of reg,reg fld/fst */
1635 {12, 12, 12}, /* cost of loading fp registers
1636 in SFmode, DFmode and XFmode */
1637 {6, 6, 8}, /* cost of storing fp registers
1638 in SFmode, DFmode and XFmode */
1639 2, /* cost of moving MMX register */
1640 {8, 8}, /* cost of loading MMX registers
1641 in SImode and DImode */
1642 {8, 8}, /* cost of storing MMX registers
1643 in SImode and DImode */
1644 2, /* cost of moving SSE register */
1645 {8, 8, 8}, /* cost of loading SSE registers
1646 in SImode, DImode and TImode */
1647 {8, 8, 8}, /* cost of storing SSE registers
1648 in SImode, DImode and TImode */
1649 5, /* MMX or SSE register to integer */
1650 32, /* size of l1 cache. */
1651 256, /* size of l2 cache. */
1652 64, /* size of prefetch block */
1653 6, /* number of parallel prefetches */
1654 3, /* Branch cost */
1655 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1656 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1657 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1658 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1659 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1660 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1661 atom_memcpy,
1662 atom_memset,
1663 1, /* scalar_stmt_cost. */
1664 1, /* scalar load_cost. */
1665 1, /* scalar_store_cost. */
1666 1, /* vec_stmt_cost. */
1667 1, /* vec_to_scalar_cost. */
1668 1, /* scalar_to_vec_cost. */
1669 1, /* vec_align_load_cost. */
1670 2, /* vec_unalign_load_cost. */
1671 1, /* vec_store_cost. */
1672 3, /* cond_taken_branch_cost. */
1673 1, /* cond_not_taken_branch_cost. */
1674 };
1675
1676 static stringop_algs slm_memcpy[2] = {
1677 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1678 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1679 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1680 static stringop_algs slm_memset[2] = {
1681 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1682 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1683 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1684 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1685 static const
1686 struct processor_costs slm_cost = {
1687 COSTS_N_INSNS (1), /* cost of an add instruction */
1688 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1689 COSTS_N_INSNS (1), /* variable shift costs */
1690 COSTS_N_INSNS (1), /* constant shift costs */
1691 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1692 COSTS_N_INSNS (3), /* HI */
1693 COSTS_N_INSNS (3), /* SI */
1694 COSTS_N_INSNS (4), /* DI */
1695 COSTS_N_INSNS (2)}, /* other */
1696 0, /* cost of multiply per each bit set */
1697 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1698 COSTS_N_INSNS (26), /* HI */
1699 COSTS_N_INSNS (42), /* SI */
1700 COSTS_N_INSNS (74), /* DI */
1701 COSTS_N_INSNS (74)}, /* other */
1702 COSTS_N_INSNS (1), /* cost of movsx */
1703 COSTS_N_INSNS (1), /* cost of movzx */
1704 8, /* "large" insn */
1705 17, /* MOVE_RATIO */
1706 4, /* cost for loading QImode using movzbl */
1707 {4, 4, 4}, /* cost of loading integer registers
1708 in QImode, HImode and SImode.
1709 Relative to reg-reg move (2). */
1710 {4, 4, 4}, /* cost of storing integer registers */
1711 4, /* cost of reg,reg fld/fst */
1712 {12, 12, 12}, /* cost of loading fp registers
1713 in SFmode, DFmode and XFmode */
1714 {6, 6, 8}, /* cost of storing fp registers
1715 in SFmode, DFmode and XFmode */
1716 2, /* cost of moving MMX register */
1717 {8, 8}, /* cost of loading MMX registers
1718 in SImode and DImode */
1719 {8, 8}, /* cost of storing MMX registers
1720 in SImode and DImode */
1721 2, /* cost of moving SSE register */
1722 {8, 8, 8}, /* cost of loading SSE registers
1723 in SImode, DImode and TImode */
1724 {8, 8, 8}, /* cost of storing SSE registers
1725 in SImode, DImode and TImode */
1726 5, /* MMX or SSE register to integer */
1727 32, /* size of l1 cache. */
1728 256, /* size of l2 cache. */
1729 64, /* size of prefetch block */
1730 6, /* number of parallel prefetches */
1731 3, /* Branch cost */
1732 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1733 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1734 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1735 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1736 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1737 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1738 slm_memcpy,
1739 slm_memset,
1740 1, /* scalar_stmt_cost. */
1741 1, /* scalar load_cost. */
1742 1, /* scalar_store_cost. */
1743 1, /* vec_stmt_cost. */
1744 4, /* vec_to_scalar_cost. */
1745 1, /* scalar_to_vec_cost. */
1746 1, /* vec_align_load_cost. */
1747 2, /* vec_unalign_load_cost. */
1748 1, /* vec_store_cost. */
1749 3, /* cond_taken_branch_cost. */
1750 1, /* cond_not_taken_branch_cost. */
1751 };
1752
1753 static stringop_algs intel_memcpy[2] = {
1754 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1755 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1756 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1757 static stringop_algs intel_memset[2] = {
1758 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1759 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1760 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1761 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1762 static const
1763 struct processor_costs intel_cost = {
1764 COSTS_N_INSNS (1), /* cost of an add instruction */
1765 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1766 COSTS_N_INSNS (1), /* variable shift costs */
1767 COSTS_N_INSNS (1), /* constant shift costs */
1768 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1769 COSTS_N_INSNS (3), /* HI */
1770 COSTS_N_INSNS (3), /* SI */
1771 COSTS_N_INSNS (4), /* DI */
1772 COSTS_N_INSNS (2)}, /* other */
1773 0, /* cost of multiply per each bit set */
1774 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1775 COSTS_N_INSNS (26), /* HI */
1776 COSTS_N_INSNS (42), /* SI */
1777 COSTS_N_INSNS (74), /* DI */
1778 COSTS_N_INSNS (74)}, /* other */
1779 COSTS_N_INSNS (1), /* cost of movsx */
1780 COSTS_N_INSNS (1), /* cost of movzx */
1781 8, /* "large" insn */
1782 17, /* MOVE_RATIO */
1783 4, /* cost for loading QImode using movzbl */
1784 {4, 4, 4}, /* cost of loading integer registers
1785 in QImode, HImode and SImode.
1786 Relative to reg-reg move (2). */
1787 {4, 4, 4}, /* cost of storing integer registers */
1788 4, /* cost of reg,reg fld/fst */
1789 {12, 12, 12}, /* cost of loading fp registers
1790 in SFmode, DFmode and XFmode */
1791 {6, 6, 8}, /* cost of storing fp registers
1792 in SFmode, DFmode and XFmode */
1793 2, /* cost of moving MMX register */
1794 {8, 8}, /* cost of loading MMX registers
1795 in SImode and DImode */
1796 {8, 8}, /* cost of storing MMX registers
1797 in SImode and DImode */
1798 2, /* cost of moving SSE register */
1799 {8, 8, 8}, /* cost of loading SSE registers
1800 in SImode, DImode and TImode */
1801 {8, 8, 8}, /* cost of storing SSE registers
1802 in SImode, DImode and TImode */
1803 5, /* MMX or SSE register to integer */
1804 32, /* size of l1 cache. */
1805 256, /* size of l2 cache. */
1806 64, /* size of prefetch block */
1807 6, /* number of parallel prefetches */
1808 3, /* Branch cost */
1809 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1810 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1811 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1812 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1813 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1814 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1815 intel_memcpy,
1816 intel_memset,
1817 1, /* scalar_stmt_cost. */
1818 1, /* scalar load_cost. */
1819 1, /* scalar_store_cost. */
1820 1, /* vec_stmt_cost. */
1821 4, /* vec_to_scalar_cost. */
1822 1, /* scalar_to_vec_cost. */
1823 1, /* vec_align_load_cost. */
1824 2, /* vec_unalign_load_cost. */
1825 1, /* vec_store_cost. */
1826 3, /* cond_taken_branch_cost. */
1827 1, /* cond_not_taken_branch_cost. */
1828 };
1829
1830 /* Generic should produce code tuned for Core-i7 (and newer chips)
1831 and btver1 (and newer chips). */
1832
1833 static stringop_algs generic_memcpy[2] = {
1834 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1835 {-1, libcall, false}}},
1836 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1837 {-1, libcall, false}}}};
1838 static stringop_algs generic_memset[2] = {
1839 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1840 {-1, libcall, false}}},
1841 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1842 {-1, libcall, false}}}};
1843 static const
1844 struct processor_costs generic_cost = {
1845 COSTS_N_INSNS (1), /* cost of an add instruction */
1846 /* On all chips taken into consideration lea is 2 cycles and more. With
1847 this cost however our current implementation of synth_mult results in
1848 use of unnecessary temporary registers causing regression on several
1849 SPECfp benchmarks. */
1850 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1851 COSTS_N_INSNS (1), /* variable shift costs */
1852 COSTS_N_INSNS (1), /* constant shift costs */
1853 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1854 COSTS_N_INSNS (4), /* HI */
1855 COSTS_N_INSNS (3), /* SI */
1856 COSTS_N_INSNS (4), /* DI */
1857 COSTS_N_INSNS (2)}, /* other */
1858 0, /* cost of multiply per each bit set */
1859 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1860 COSTS_N_INSNS (26), /* HI */
1861 COSTS_N_INSNS (42), /* SI */
1862 COSTS_N_INSNS (74), /* DI */
1863 COSTS_N_INSNS (74)}, /* other */
1864 COSTS_N_INSNS (1), /* cost of movsx */
1865 COSTS_N_INSNS (1), /* cost of movzx */
1866 8, /* "large" insn */
1867 17, /* MOVE_RATIO */
1868 4, /* cost for loading QImode using movzbl */
1869 {4, 4, 4}, /* cost of loading integer registers
1870 in QImode, HImode and SImode.
1871 Relative to reg-reg move (2). */
1872 {4, 4, 4}, /* cost of storing integer registers */
1873 4, /* cost of reg,reg fld/fst */
1874 {12, 12, 12}, /* cost of loading fp registers
1875 in SFmode, DFmode and XFmode */
1876 {6, 6, 8}, /* cost of storing fp registers
1877 in SFmode, DFmode and XFmode */
1878 2, /* cost of moving MMX register */
1879 {8, 8}, /* cost of loading MMX registers
1880 in SImode and DImode */
1881 {8, 8}, /* cost of storing MMX registers
1882 in SImode and DImode */
1883 2, /* cost of moving SSE register */
1884 {8, 8, 8}, /* cost of loading SSE registers
1885 in SImode, DImode and TImode */
1886 {8, 8, 8}, /* cost of storing SSE registers
1887 in SImode, DImode and TImode */
1888 5, /* MMX or SSE register to integer */
1889 32, /* size of l1 cache. */
1890 512, /* size of l2 cache. */
1891 64, /* size of prefetch block */
1892 6, /* number of parallel prefetches */
1893 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1894 value is increased to perhaps more appropriate value of 5. */
1895 3, /* Branch cost */
1896 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1897 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1898 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1899 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1900 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1901 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1902 generic_memcpy,
1903 generic_memset,
1904 1, /* scalar_stmt_cost. */
1905 1, /* scalar load_cost. */
1906 1, /* scalar_store_cost. */
1907 1, /* vec_stmt_cost. */
1908 1, /* vec_to_scalar_cost. */
1909 1, /* scalar_to_vec_cost. */
1910 1, /* vec_align_load_cost. */
1911 2, /* vec_unalign_load_cost. */
1912 1, /* vec_store_cost. */
1913 3, /* cond_taken_branch_cost. */
1914 1, /* cond_not_taken_branch_cost. */
1915 };
1916
1917 /* core_cost should produce code tuned for Core familly of CPUs. */
1918 static stringop_algs core_memcpy[2] = {
1919 {libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}},
1920 {libcall, {{24, loop, true}, {128, rep_prefix_8_byte, true},
1921 {-1, libcall, false}}}};
1922 static stringop_algs core_memset[2] = {
1923 {libcall, {{6, loop_1_byte, true},
1924 {24, loop, true},
1925 {8192, rep_prefix_4_byte, true},
1926 {-1, libcall, false}}},
1927 {libcall, {{24, loop, true}, {512, rep_prefix_8_byte, true},
1928 {-1, libcall, false}}}};
1929
1930 static const
1931 struct processor_costs core_cost = {
1932 COSTS_N_INSNS (1), /* cost of an add instruction */
1933 /* On all chips taken into consideration lea is 2 cycles and more. With
1934 this cost however our current implementation of synth_mult results in
1935 use of unnecessary temporary registers causing regression on several
1936 SPECfp benchmarks. */
1937 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1938 COSTS_N_INSNS (1), /* variable shift costs */
1939 COSTS_N_INSNS (1), /* constant shift costs */
1940 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1941 COSTS_N_INSNS (4), /* HI */
1942 COSTS_N_INSNS (3), /* SI */
1943 COSTS_N_INSNS (4), /* DI */
1944 COSTS_N_INSNS (2)}, /* other */
1945 0, /* cost of multiply per each bit set */
1946 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1947 COSTS_N_INSNS (26), /* HI */
1948 COSTS_N_INSNS (42), /* SI */
1949 COSTS_N_INSNS (74), /* DI */
1950 COSTS_N_INSNS (74)}, /* other */
1951 COSTS_N_INSNS (1), /* cost of movsx */
1952 COSTS_N_INSNS (1), /* cost of movzx */
1953 8, /* "large" insn */
1954 17, /* MOVE_RATIO */
1955 4, /* cost for loading QImode using movzbl */
1956 {4, 4, 4}, /* cost of loading integer registers
1957 in QImode, HImode and SImode.
1958 Relative to reg-reg move (2). */
1959 {4, 4, 4}, /* cost of storing integer registers */
1960 4, /* cost of reg,reg fld/fst */
1961 {12, 12, 12}, /* cost of loading fp registers
1962 in SFmode, DFmode and XFmode */
1963 {6, 6, 8}, /* cost of storing fp registers
1964 in SFmode, DFmode and XFmode */
1965 2, /* cost of moving MMX register */
1966 {8, 8}, /* cost of loading MMX registers
1967 in SImode and DImode */
1968 {8, 8}, /* cost of storing MMX registers
1969 in SImode and DImode */
1970 2, /* cost of moving SSE register */
1971 {8, 8, 8}, /* cost of loading SSE registers
1972 in SImode, DImode and TImode */
1973 {8, 8, 8}, /* cost of storing SSE registers
1974 in SImode, DImode and TImode */
1975 5, /* MMX or SSE register to integer */
1976 64, /* size of l1 cache. */
1977 512, /* size of l2 cache. */
1978 64, /* size of prefetch block */
1979 6, /* number of parallel prefetches */
1980 /* FIXME perhaps more appropriate value is 5. */
1981 3, /* Branch cost */
1982 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1983 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1984 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1985 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1986 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1987 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1988 core_memcpy,
1989 core_memset,
1990 1, /* scalar_stmt_cost. */
1991 1, /* scalar load_cost. */
1992 1, /* scalar_store_cost. */
1993 1, /* vec_stmt_cost. */
1994 1, /* vec_to_scalar_cost. */
1995 1, /* scalar_to_vec_cost. */
1996 1, /* vec_align_load_cost. */
1997 2, /* vec_unalign_load_cost. */
1998 1, /* vec_store_cost. */
1999 3, /* cond_taken_branch_cost. */
2000 1, /* cond_not_taken_branch_cost. */
2001 };
2002
2003
2004 /* Set by -mtune. */
2005 const struct processor_costs *ix86_tune_cost = &pentium_cost;
2006
2007 /* Set by -mtune or -Os. */
2008 const struct processor_costs *ix86_cost = &pentium_cost;
2009
2010 /* Processor feature/optimization bitmasks. */
2011 #define m_386 (1<<PROCESSOR_I386)
2012 #define m_486 (1<<PROCESSOR_I486)
2013 #define m_PENT (1<<PROCESSOR_PENTIUM)
2014 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
2015 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
2016 #define m_NOCONA (1<<PROCESSOR_NOCONA)
2017 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
2018 #define m_CORE2 (1<<PROCESSOR_CORE2)
2019 #define m_NEHALEM (1<<PROCESSOR_NEHALEM)
2020 #define m_SANDYBRIDGE (1<<PROCESSOR_SANDYBRIDGE)
2021 #define m_HASWELL (1<<PROCESSOR_HASWELL)
2022 #define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL)
2023 #define m_BONNELL (1<<PROCESSOR_BONNELL)
2024 #define m_SILVERMONT (1<<PROCESSOR_SILVERMONT)
2025 #define m_INTEL (1<<PROCESSOR_INTEL)
2026
2027 #define m_GEODE (1<<PROCESSOR_GEODE)
2028 #define m_K6 (1<<PROCESSOR_K6)
2029 #define m_K6_GEODE (m_K6 | m_GEODE)
2030 #define m_K8 (1<<PROCESSOR_K8)
2031 #define m_ATHLON (1<<PROCESSOR_ATHLON)
2032 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
2033 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
2034 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
2035 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
2036 #define m_BDVER3 (1<<PROCESSOR_BDVER3)
2037 #define m_BDVER4 (1<<PROCESSOR_BDVER4)
2038 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
2039 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
2040 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
2041 #define m_BTVER (m_BTVER1 | m_BTVER2)
2042 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER)
2043
2044 #define m_GENERIC (1<<PROCESSOR_GENERIC)
2045
2046 const char* ix86_tune_feature_names[X86_TUNE_LAST] = {
2047 #undef DEF_TUNE
2048 #define DEF_TUNE(tune, name, selector) name,
2049 #include "x86-tune.def"
2050 #undef DEF_TUNE
2051 };
2052
2053 /* Feature tests against the various tunings. */
2054 unsigned char ix86_tune_features[X86_TUNE_LAST];
2055
2056 /* Feature tests against the various tunings used to create ix86_tune_features
2057 based on the processor mask. */
2058 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
2059 #undef DEF_TUNE
2060 #define DEF_TUNE(tune, name, selector) selector,
2061 #include "x86-tune.def"
2062 #undef DEF_TUNE
2063 };
2064
2065 /* Feature tests against the various architecture variations. */
2066 unsigned char ix86_arch_features[X86_ARCH_LAST];
2067
2068 /* Feature tests against the various architecture variations, used to create
2069 ix86_arch_features based on the processor mask. */
2070 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2071 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
2072 ~(m_386 | m_486 | m_PENT | m_K6),
2073
2074 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2075 ~m_386,
2076
2077 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2078 ~(m_386 | m_486),
2079
2080 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2081 ~m_386,
2082
2083 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2084 ~m_386,
2085 };
2086
2087 /* In case the average insn count for single function invocation is
2088 lower than this constant, emit fast (but longer) prologue and
2089 epilogue code. */
2090 #define FAST_PROLOGUE_INSN_COUNT 20
2091
2092 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2093 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2094 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2095 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2096
2097 /* Array of the smallest class containing reg number REGNO, indexed by
2098 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2099
2100 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2101 {
2102 /* ax, dx, cx, bx */
2103 AREG, DREG, CREG, BREG,
2104 /* si, di, bp, sp */
2105 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2106 /* FP registers */
2107 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2108 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2109 /* arg pointer */
2110 NON_Q_REGS,
2111 /* flags, fpsr, fpcr, frame */
2112 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2113 /* SSE registers */
2114 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2115 SSE_REGS, SSE_REGS,
2116 /* MMX registers */
2117 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2118 MMX_REGS, MMX_REGS,
2119 /* REX registers */
2120 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2121 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2122 /* SSE REX registers */
2123 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2124 SSE_REGS, SSE_REGS,
2125 /* AVX-512 SSE registers */
2126 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2127 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2128 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2129 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2130 /* Mask registers. */
2131 MASK_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2132 MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2133 };
2134
2135 /* The "default" register map used in 32bit mode. */
2136
2137 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2138 {
2139 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2140 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2141 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2142 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2143 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2144 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2145 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2146 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2147 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2148 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2149 };
2150
2151 /* The "default" register map used in 64bit mode. */
2152
2153 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2154 {
2155 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2156 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2157 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2158 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2159 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2160 8,9,10,11,12,13,14,15, /* extended integer registers */
2161 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2162 67, 68, 69, 70, 71, 72, 73, 74, /* AVX-512 registers 16-23 */
2163 75, 76, 77, 78, 79, 80, 81, 82, /* AVX-512 registers 24-31 */
2164 118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
2165 };
2166
2167 /* Define the register numbers to be used in Dwarf debugging information.
2168 The SVR4 reference port C compiler uses the following register numbers
2169 in its Dwarf output code:
2170 0 for %eax (gcc regno = 0)
2171 1 for %ecx (gcc regno = 2)
2172 2 for %edx (gcc regno = 1)
2173 3 for %ebx (gcc regno = 3)
2174 4 for %esp (gcc regno = 7)
2175 5 for %ebp (gcc regno = 6)
2176 6 for %esi (gcc regno = 4)
2177 7 for %edi (gcc regno = 5)
2178 The following three DWARF register numbers are never generated by
2179 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2180 believes these numbers have these meanings.
2181 8 for %eip (no gcc equivalent)
2182 9 for %eflags (gcc regno = 17)
2183 10 for %trapno (no gcc equivalent)
2184 It is not at all clear how we should number the FP stack registers
2185 for the x86 architecture. If the version of SDB on x86/svr4 were
2186 a bit less brain dead with respect to floating-point then we would
2187 have a precedent to follow with respect to DWARF register numbers
2188 for x86 FP registers, but the SDB on x86/svr4 is so completely
2189 broken with respect to FP registers that it is hardly worth thinking
2190 of it as something to strive for compatibility with.
2191 The version of x86/svr4 SDB I have at the moment does (partially)
2192 seem to believe that DWARF register number 11 is associated with
2193 the x86 register %st(0), but that's about all. Higher DWARF
2194 register numbers don't seem to be associated with anything in
2195 particular, and even for DWARF regno 11, SDB only seems to under-
2196 stand that it should say that a variable lives in %st(0) (when
2197 asked via an `=' command) if we said it was in DWARF regno 11,
2198 but SDB still prints garbage when asked for the value of the
2199 variable in question (via a `/' command).
2200 (Also note that the labels SDB prints for various FP stack regs
2201 when doing an `x' command are all wrong.)
2202 Note that these problems generally don't affect the native SVR4
2203 C compiler because it doesn't allow the use of -O with -g and
2204 because when it is *not* optimizing, it allocates a memory
2205 location for each floating-point variable, and the memory
2206 location is what gets described in the DWARF AT_location
2207 attribute for the variable in question.
2208 Regardless of the severe mental illness of the x86/svr4 SDB, we
2209 do something sensible here and we use the following DWARF
2210 register numbers. Note that these are all stack-top-relative
2211 numbers.
2212 11 for %st(0) (gcc regno = 8)
2213 12 for %st(1) (gcc regno = 9)
2214 13 for %st(2) (gcc regno = 10)
2215 14 for %st(3) (gcc regno = 11)
2216 15 for %st(4) (gcc regno = 12)
2217 16 for %st(5) (gcc regno = 13)
2218 17 for %st(6) (gcc regno = 14)
2219 18 for %st(7) (gcc regno = 15)
2220 */
2221 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2222 {
2223 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2224 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2225 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2226 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2227 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2228 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2229 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2230 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2231 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2232 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2233 };
2234
2235 /* Define parameter passing and return registers. */
2236
2237 static int const x86_64_int_parameter_registers[6] =
2238 {
2239 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2240 };
2241
2242 static int const x86_64_ms_abi_int_parameter_registers[4] =
2243 {
2244 CX_REG, DX_REG, R8_REG, R9_REG
2245 };
2246
2247 static int const x86_64_int_return_registers[4] =
2248 {
2249 AX_REG, DX_REG, DI_REG, SI_REG
2250 };
2251
2252 /* Additional registers that are clobbered by SYSV calls. */
2253
2254 int const x86_64_ms_sysv_extra_clobbered_registers[12] =
2255 {
2256 SI_REG, DI_REG,
2257 XMM6_REG, XMM7_REG,
2258 XMM8_REG, XMM9_REG, XMM10_REG, XMM11_REG,
2259 XMM12_REG, XMM13_REG, XMM14_REG, XMM15_REG
2260 };
2261
2262 /* Define the structure for the machine field in struct function. */
2263
2264 struct GTY(()) stack_local_entry {
2265 unsigned short mode;
2266 unsigned short n;
2267 rtx rtl;
2268 struct stack_local_entry *next;
2269 };
2270
2271 /* Structure describing stack frame layout.
2272 Stack grows downward:
2273
2274 [arguments]
2275 <- ARG_POINTER
2276 saved pc
2277
2278 saved static chain if ix86_static_chain_on_stack
2279
2280 saved frame pointer if frame_pointer_needed
2281 <- HARD_FRAME_POINTER
2282 [saved regs]
2283 <- regs_save_offset
2284 [padding0]
2285
2286 [saved SSE regs]
2287 <- sse_regs_save_offset
2288 [padding1] |
2289 | <- FRAME_POINTER
2290 [va_arg registers] |
2291 |
2292 [frame] |
2293 |
2294 [padding2] | = to_allocate
2295 <- STACK_POINTER
2296 */
2297 struct ix86_frame
2298 {
2299 int nsseregs;
2300 int nregs;
2301 int va_arg_size;
2302 int red_zone_size;
2303 int outgoing_arguments_size;
2304
2305 /* The offsets relative to ARG_POINTER. */
2306 HOST_WIDE_INT frame_pointer_offset;
2307 HOST_WIDE_INT hard_frame_pointer_offset;
2308 HOST_WIDE_INT stack_pointer_offset;
2309 HOST_WIDE_INT hfp_save_offset;
2310 HOST_WIDE_INT reg_save_offset;
2311 HOST_WIDE_INT sse_reg_save_offset;
2312
2313 /* When save_regs_using_mov is set, emit prologue using
2314 move instead of push instructions. */
2315 bool save_regs_using_mov;
2316 };
2317
2318 /* Which cpu are we scheduling for. */
2319 enum attr_cpu ix86_schedule;
2320
2321 /* Which cpu are we optimizing for. */
2322 enum processor_type ix86_tune;
2323
2324 /* Which instruction set architecture to use. */
2325 enum processor_type ix86_arch;
2326
2327 /* True if processor has SSE prefetch instruction. */
2328 unsigned char x86_prefetch_sse;
2329
2330 /* -mstackrealign option */
2331 static const char ix86_force_align_arg_pointer_string[]
2332 = "force_align_arg_pointer";
2333
2334 static rtx (*ix86_gen_leave) (void);
2335 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2336 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2337 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2338 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2339 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2340 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2341 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2342 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2343 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2344 static rtx (*ix86_gen_tls_global_dynamic_64) (rtx, rtx, rtx);
2345 static rtx (*ix86_gen_tls_local_dynamic_base_64) (rtx, rtx);
2346
2347 /* Preferred alignment for stack boundary in bits. */
2348 unsigned int ix86_preferred_stack_boundary;
2349
2350 /* Alignment for incoming stack boundary in bits specified at
2351 command line. */
2352 static unsigned int ix86_user_incoming_stack_boundary;
2353
2354 /* Default alignment for incoming stack boundary in bits. */
2355 static unsigned int ix86_default_incoming_stack_boundary;
2356
2357 /* Alignment for incoming stack boundary in bits. */
2358 unsigned int ix86_incoming_stack_boundary;
2359
2360 /* Calling abi specific va_list type nodes. */
2361 static GTY(()) tree sysv_va_list_type_node;
2362 static GTY(()) tree ms_va_list_type_node;
2363
2364 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2365 char internal_label_prefix[16];
2366 int internal_label_prefix_len;
2367
2368 /* Fence to use after loop using movnt. */
2369 tree x86_mfence;
2370
2371 /* Register class used for passing given 64bit part of the argument.
2372 These represent classes as documented by the PS ABI, with the exception
2373 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2374 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2375
2376 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2377 whenever possible (upper half does contain padding). */
2378 enum x86_64_reg_class
2379 {
2380 X86_64_NO_CLASS,
2381 X86_64_INTEGER_CLASS,
2382 X86_64_INTEGERSI_CLASS,
2383 X86_64_SSE_CLASS,
2384 X86_64_SSESF_CLASS,
2385 X86_64_SSEDF_CLASS,
2386 X86_64_SSEUP_CLASS,
2387 X86_64_X87_CLASS,
2388 X86_64_X87UP_CLASS,
2389 X86_64_COMPLEX_X87_CLASS,
2390 X86_64_MEMORY_CLASS
2391 };
2392
2393 #define MAX_CLASSES 8
2394
2395 /* Table of constants used by fldpi, fldln2, etc.... */
2396 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2397 static bool ext_80387_constants_init = 0;
2398
2399 \f
2400 static struct machine_function * ix86_init_machine_status (void);
2401 static rtx ix86_function_value (const_tree, const_tree, bool);
2402 static bool ix86_function_value_regno_p (const unsigned int);
2403 static unsigned int ix86_function_arg_boundary (enum machine_mode,
2404 const_tree);
2405 static rtx ix86_static_chain (const_tree, bool);
2406 static int ix86_function_regparm (const_tree, const_tree);
2407 static void ix86_compute_frame_layout (struct ix86_frame *);
2408 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
2409 rtx, rtx, int);
2410 static void ix86_add_new_builtins (HOST_WIDE_INT);
2411 static tree ix86_canonical_va_list_type (tree);
2412 static void predict_jump (int);
2413 static unsigned int split_stack_prologue_scratch_regno (void);
2414 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2415
2416 enum ix86_function_specific_strings
2417 {
2418 IX86_FUNCTION_SPECIFIC_ARCH,
2419 IX86_FUNCTION_SPECIFIC_TUNE,
2420 IX86_FUNCTION_SPECIFIC_MAX
2421 };
2422
2423 static char *ix86_target_string (HOST_WIDE_INT, int, const char *,
2424 const char *, enum fpmath_unit, bool);
2425 static void ix86_function_specific_save (struct cl_target_option *,
2426 struct gcc_options *opts);
2427 static void ix86_function_specific_restore (struct gcc_options *opts,
2428 struct cl_target_option *);
2429 static void ix86_function_specific_print (FILE *, int,
2430 struct cl_target_option *);
2431 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2432 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2433 struct gcc_options *,
2434 struct gcc_options *,
2435 struct gcc_options *);
2436 static bool ix86_can_inline_p (tree, tree);
2437 static void ix86_set_current_function (tree);
2438 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2439
2440 static enum calling_abi ix86_function_abi (const_tree);
2441
2442 \f
2443 #ifndef SUBTARGET32_DEFAULT_CPU
2444 #define SUBTARGET32_DEFAULT_CPU "i386"
2445 #endif
2446
2447 /* Whether -mtune= or -march= were specified */
2448 static int ix86_tune_defaulted;
2449 static int ix86_arch_specified;
2450
2451 /* Vectorization library interface and handlers. */
2452 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2453
2454 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2455 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2456
2457 /* Processor target table, indexed by processor number */
2458 struct ptt
2459 {
2460 const char *const name; /* processor name */
2461 const struct processor_costs *cost; /* Processor costs */
2462 const int align_loop; /* Default alignments. */
2463 const int align_loop_max_skip;
2464 const int align_jump;
2465 const int align_jump_max_skip;
2466 const int align_func;
2467 };
2468
2469 /* This table must be in sync with enum processor_type in i386.h. */
2470 static const struct ptt processor_target_table[PROCESSOR_max] =
2471 {
2472 {"generic", &generic_cost, 16, 10, 16, 10, 16},
2473 {"i386", &i386_cost, 4, 3, 4, 3, 4},
2474 {"i486", &i486_cost, 16, 15, 16, 15, 16},
2475 {"pentium", &pentium_cost, 16, 7, 16, 7, 16},
2476 {"pentiumpro", &pentiumpro_cost, 16, 15, 16, 10, 16},
2477 {"pentium4", &pentium4_cost, 0, 0, 0, 0, 0},
2478 {"nocona", &nocona_cost, 0, 0, 0, 0, 0},
2479 {"core2", &core_cost, 16, 10, 16, 10, 16},
2480 {"nehalem", &core_cost, 16, 10, 16, 10, 16},
2481 {"sandybridge", &core_cost, 16, 10, 16, 10, 16},
2482 {"haswell", &core_cost, 16, 10, 16, 10, 16},
2483 {"bonnell", &atom_cost, 16, 15, 16, 7, 16},
2484 {"silvermont", &slm_cost, 16, 15, 16, 7, 16},
2485 {"intel", &intel_cost, 16, 15, 16, 7, 16},
2486 {"geode", &geode_cost, 0, 0, 0, 0, 0},
2487 {"k6", &k6_cost, 32, 7, 32, 7, 32},
2488 {"athlon", &athlon_cost, 16, 7, 16, 7, 16},
2489 {"k8", &k8_cost, 16, 7, 16, 7, 16},
2490 {"amdfam10", &amdfam10_cost, 32, 24, 32, 7, 32},
2491 {"bdver1", &bdver1_cost, 16, 10, 16, 7, 11},
2492 {"bdver2", &bdver2_cost, 16, 10, 16, 7, 11},
2493 {"bdver3", &bdver3_cost, 16, 10, 16, 7, 11},
2494 {"bdver4", &bdver4_cost, 16, 10, 16, 7, 11},
2495 {"btver1", &btver1_cost, 16, 10, 16, 7, 11},
2496 {"btver2", &btver2_cost, 16, 10, 16, 7, 11}
2497 };
2498 \f
2499 static unsigned int
2500 rest_of_handle_insert_vzeroupper (void)
2501 {
2502 int i;
2503
2504 /* vzeroupper instructions are inserted immediately after reload to
2505 account for possible spills from 256bit registers. The pass
2506 reuses mode switching infrastructure by re-running mode insertion
2507 pass, so disable entities that have already been processed. */
2508 for (i = 0; i < MAX_386_ENTITIES; i++)
2509 ix86_optimize_mode_switching[i] = 0;
2510
2511 ix86_optimize_mode_switching[AVX_U128] = 1;
2512
2513 /* Call optimize_mode_switching. */
2514 g->get_passes ()->execute_pass_mode_switching ();
2515 return 0;
2516 }
2517
2518 namespace {
2519
2520 const pass_data pass_data_insert_vzeroupper =
2521 {
2522 RTL_PASS, /* type */
2523 "vzeroupper", /* name */
2524 OPTGROUP_NONE, /* optinfo_flags */
2525 TV_NONE, /* tv_id */
2526 0, /* properties_required */
2527 0, /* properties_provided */
2528 0, /* properties_destroyed */
2529 0, /* todo_flags_start */
2530 TODO_df_finish, /* todo_flags_finish */
2531 };
2532
2533 class pass_insert_vzeroupper : public rtl_opt_pass
2534 {
2535 public:
2536 pass_insert_vzeroupper(gcc::context *ctxt)
2537 : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
2538 {}
2539
2540 /* opt_pass methods: */
2541 virtual bool gate (function *)
2542 {
2543 return TARGET_AVX && !TARGET_AVX512F && TARGET_VZEROUPPER;
2544 }
2545
2546 virtual unsigned int execute (function *)
2547 {
2548 return rest_of_handle_insert_vzeroupper ();
2549 }
2550
2551 }; // class pass_insert_vzeroupper
2552
2553 } // anon namespace
2554
2555 rtl_opt_pass *
2556 make_pass_insert_vzeroupper (gcc::context *ctxt)
2557 {
2558 return new pass_insert_vzeroupper (ctxt);
2559 }
2560
2561 /* Return true if a red-zone is in use. */
2562
2563 static inline bool
2564 ix86_using_red_zone (void)
2565 {
2566 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2567 }
2568 \f
2569 /* Return a string that documents the current -m options. The caller is
2570 responsible for freeing the string. */
2571
2572 static char *
2573 ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
2574 const char *tune, enum fpmath_unit fpmath,
2575 bool add_nl_p)
2576 {
2577 struct ix86_target_opts
2578 {
2579 const char *option; /* option string */
2580 HOST_WIDE_INT mask; /* isa mask options */
2581 };
2582
2583 /* This table is ordered so that options like -msse4.2 that imply
2584 preceding options while match those first. */
2585 static struct ix86_target_opts isa_opts[] =
2586 {
2587 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2588 { "-mfma", OPTION_MASK_ISA_FMA },
2589 { "-mxop", OPTION_MASK_ISA_XOP },
2590 { "-mlwp", OPTION_MASK_ISA_LWP },
2591 { "-mavx512f", OPTION_MASK_ISA_AVX512F },
2592 { "-mavx512er", OPTION_MASK_ISA_AVX512ER },
2593 { "-mavx512cd", OPTION_MASK_ISA_AVX512CD },
2594 { "-mavx512pf", OPTION_MASK_ISA_AVX512PF },
2595 { "-mavx512dq", OPTION_MASK_ISA_AVX512DQ },
2596 { "-mavx512bw", OPTION_MASK_ISA_AVX512BW },
2597 { "-mavx512vl", OPTION_MASK_ISA_AVX512VL },
2598 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2599 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2600 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2601 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2602 { "-msse3", OPTION_MASK_ISA_SSE3 },
2603 { "-msse2", OPTION_MASK_ISA_SSE2 },
2604 { "-msse", OPTION_MASK_ISA_SSE },
2605 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2606 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2607 { "-mmmx", OPTION_MASK_ISA_MMX },
2608 { "-mabm", OPTION_MASK_ISA_ABM },
2609 { "-mbmi", OPTION_MASK_ISA_BMI },
2610 { "-mbmi2", OPTION_MASK_ISA_BMI2 },
2611 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
2612 { "-mhle", OPTION_MASK_ISA_HLE },
2613 { "-mfxsr", OPTION_MASK_ISA_FXSR },
2614 { "-mrdseed", OPTION_MASK_ISA_RDSEED },
2615 { "-mprfchw", OPTION_MASK_ISA_PRFCHW },
2616 { "-madx", OPTION_MASK_ISA_ADX },
2617 { "-mtbm", OPTION_MASK_ISA_TBM },
2618 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2619 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2620 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2621 { "-maes", OPTION_MASK_ISA_AES },
2622 { "-msha", OPTION_MASK_ISA_SHA },
2623 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2624 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2625 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2626 { "-mf16c", OPTION_MASK_ISA_F16C },
2627 { "-mrtm", OPTION_MASK_ISA_RTM },
2628 { "-mxsave", OPTION_MASK_ISA_XSAVE },
2629 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT },
2630 { "-mprefetchwt1", OPTION_MASK_ISA_PREFETCHWT1 },
2631 { "-mclflushopt", OPTION_MASK_ISA_CLFLUSHOPT },
2632 { "-mxsavec", OPTION_MASK_ISA_XSAVEC },
2633 { "-mxsaves", OPTION_MASK_ISA_XSAVES },
2634 };
2635
2636 /* Flag options. */
2637 static struct ix86_target_opts flag_opts[] =
2638 {
2639 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2640 { "-mlong-double-128", MASK_LONG_DOUBLE_128 },
2641 { "-mlong-double-64", MASK_LONG_DOUBLE_64 },
2642 { "-m80387", MASK_80387 },
2643 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2644 { "-malign-double", MASK_ALIGN_DOUBLE },
2645 { "-mcld", MASK_CLD },
2646 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2647 { "-mieee-fp", MASK_IEEE_FP },
2648 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2649 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2650 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2651 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2652 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2653 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2654 { "-mno-red-zone", MASK_NO_RED_ZONE },
2655 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2656 { "-mrecip", MASK_RECIP },
2657 { "-mrtd", MASK_RTD },
2658 { "-msseregparm", MASK_SSEREGPARM },
2659 { "-mstack-arg-probe", MASK_STACK_PROBE },
2660 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2661 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2662 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2663 { "-mvzeroupper", MASK_VZEROUPPER },
2664 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
2665 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
2666 { "-mprefer-avx128", MASK_PREFER_AVX128},
2667 };
2668
2669 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2670
2671 char isa_other[40];
2672 char target_other[40];
2673 unsigned num = 0;
2674 unsigned i, j;
2675 char *ret;
2676 char *ptr;
2677 size_t len;
2678 size_t line_len;
2679 size_t sep_len;
2680 const char *abi;
2681
2682 memset (opts, '\0', sizeof (opts));
2683
2684 /* Add -march= option. */
2685 if (arch)
2686 {
2687 opts[num][0] = "-march=";
2688 opts[num++][1] = arch;
2689 }
2690
2691 /* Add -mtune= option. */
2692 if (tune)
2693 {
2694 opts[num][0] = "-mtune=";
2695 opts[num++][1] = tune;
2696 }
2697
2698 /* Add -m32/-m64/-mx32. */
2699 if ((isa & OPTION_MASK_ISA_64BIT) != 0)
2700 {
2701 if ((isa & OPTION_MASK_ABI_64) != 0)
2702 abi = "-m64";
2703 else
2704 abi = "-mx32";
2705 isa &= ~ (OPTION_MASK_ISA_64BIT
2706 | OPTION_MASK_ABI_64
2707 | OPTION_MASK_ABI_X32);
2708 }
2709 else
2710 abi = "-m32";
2711 opts[num++][0] = abi;
2712
2713 /* Pick out the options in isa options. */
2714 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2715 {
2716 if ((isa & isa_opts[i].mask) != 0)
2717 {
2718 opts[num++][0] = isa_opts[i].option;
2719 isa &= ~ isa_opts[i].mask;
2720 }
2721 }
2722
2723 if (isa && add_nl_p)
2724 {
2725 opts[num++][0] = isa_other;
2726 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)",
2727 isa);
2728 }
2729
2730 /* Add flag options. */
2731 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2732 {
2733 if ((flags & flag_opts[i].mask) != 0)
2734 {
2735 opts[num++][0] = flag_opts[i].option;
2736 flags &= ~ flag_opts[i].mask;
2737 }
2738 }
2739
2740 if (flags && add_nl_p)
2741 {
2742 opts[num++][0] = target_other;
2743 sprintf (target_other, "(other flags: %#x)", flags);
2744 }
2745
2746 /* Add -fpmath= option. */
2747 if (fpmath)
2748 {
2749 opts[num][0] = "-mfpmath=";
2750 switch ((int) fpmath)
2751 {
2752 case FPMATH_387:
2753 opts[num++][1] = "387";
2754 break;
2755
2756 case FPMATH_SSE:
2757 opts[num++][1] = "sse";
2758 break;
2759
2760 case FPMATH_387 | FPMATH_SSE:
2761 opts[num++][1] = "sse+387";
2762 break;
2763
2764 default:
2765 gcc_unreachable ();
2766 }
2767 }
2768
2769 /* Any options? */
2770 if (num == 0)
2771 return NULL;
2772
2773 gcc_assert (num < ARRAY_SIZE (opts));
2774
2775 /* Size the string. */
2776 len = 0;
2777 sep_len = (add_nl_p) ? 3 : 1;
2778 for (i = 0; i < num; i++)
2779 {
2780 len += sep_len;
2781 for (j = 0; j < 2; j++)
2782 if (opts[i][j])
2783 len += strlen (opts[i][j]);
2784 }
2785
2786 /* Build the string. */
2787 ret = ptr = (char *) xmalloc (len);
2788 line_len = 0;
2789
2790 for (i = 0; i < num; i++)
2791 {
2792 size_t len2[2];
2793
2794 for (j = 0; j < 2; j++)
2795 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2796
2797 if (i != 0)
2798 {
2799 *ptr++ = ' ';
2800 line_len++;
2801
2802 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2803 {
2804 *ptr++ = '\\';
2805 *ptr++ = '\n';
2806 line_len = 0;
2807 }
2808 }
2809
2810 for (j = 0; j < 2; j++)
2811 if (opts[i][j])
2812 {
2813 memcpy (ptr, opts[i][j], len2[j]);
2814 ptr += len2[j];
2815 line_len += len2[j];
2816 }
2817 }
2818
2819 *ptr = '\0';
2820 gcc_assert (ret + len >= ptr);
2821
2822 return ret;
2823 }
2824
2825 /* Return true, if profiling code should be emitted before
2826 prologue. Otherwise it returns false.
2827 Note: For x86 with "hotfix" it is sorried. */
2828 static bool
2829 ix86_profile_before_prologue (void)
2830 {
2831 return flag_fentry != 0;
2832 }
2833
2834 /* Function that is callable from the debugger to print the current
2835 options. */
2836 void ATTRIBUTE_UNUSED
2837 ix86_debug_options (void)
2838 {
2839 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2840 ix86_arch_string, ix86_tune_string,
2841 ix86_fpmath, true);
2842
2843 if (opts)
2844 {
2845 fprintf (stderr, "%s\n\n", opts);
2846 free (opts);
2847 }
2848 else
2849 fputs ("<no options>\n\n", stderr);
2850
2851 return;
2852 }
2853
2854 static const char *stringop_alg_names[] = {
2855 #define DEF_ENUM
2856 #define DEF_ALG(alg, name) #name,
2857 #include "stringop.def"
2858 #undef DEF_ENUM
2859 #undef DEF_ALG
2860 };
2861
2862 /* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=.
2863 The string is of the following form (or comma separated list of it):
2864
2865 strategy_alg:max_size:[align|noalign]
2866
2867 where the full size range for the strategy is either [0, max_size] or
2868 [min_size, max_size], in which min_size is the max_size + 1 of the
2869 preceding range. The last size range must have max_size == -1.
2870
2871 Examples:
2872
2873 1.
2874 -mmemcpy-strategy=libcall:-1:noalign
2875
2876 this is equivalent to (for known size memcpy) -mstringop-strategy=libcall
2877
2878
2879 2.
2880 -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign
2881
2882 This is to tell the compiler to use the following strategy for memset
2883 1) when the expected size is between [1, 16], use rep_8byte strategy;
2884 2) when the size is between [17, 2048], use vector_loop;
2885 3) when the size is > 2048, use libcall. */
2886
2887 struct stringop_size_range
2888 {
2889 int max;
2890 stringop_alg alg;
2891 bool noalign;
2892 };
2893
2894 static void
2895 ix86_parse_stringop_strategy_string (char *strategy_str, bool is_memset)
2896 {
2897 const struct stringop_algs *default_algs;
2898 stringop_size_range input_ranges[MAX_STRINGOP_ALGS];
2899 char *curr_range_str, *next_range_str;
2900 int i = 0, n = 0;
2901
2902 if (is_memset)
2903 default_algs = &ix86_cost->memset[TARGET_64BIT != 0];
2904 else
2905 default_algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
2906
2907 curr_range_str = strategy_str;
2908
2909 do
2910 {
2911 int maxs;
2912 char alg_name[128];
2913 char align[16];
2914 next_range_str = strchr (curr_range_str, ',');
2915 if (next_range_str)
2916 *next_range_str++ = '\0';
2917
2918 if (3 != sscanf (curr_range_str, "%20[^:]:%d:%10s",
2919 alg_name, &maxs, align))
2920 {
2921 error ("wrong arg %s to option %s", curr_range_str,
2922 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2923 return;
2924 }
2925
2926 if (n > 0 && (maxs < (input_ranges[n - 1].max + 1) && maxs != -1))
2927 {
2928 error ("size ranges of option %s should be increasing",
2929 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2930 return;
2931 }
2932
2933 for (i = 0; i < last_alg; i++)
2934 if (!strcmp (alg_name, stringop_alg_names[i]))
2935 break;
2936
2937 if (i == last_alg)
2938 {
2939 error ("wrong stringop strategy name %s specified for option %s",
2940 alg_name,
2941 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2942 return;
2943 }
2944
2945 input_ranges[n].max = maxs;
2946 input_ranges[n].alg = (stringop_alg) i;
2947 if (!strcmp (align, "align"))
2948 input_ranges[n].noalign = false;
2949 else if (!strcmp (align, "noalign"))
2950 input_ranges[n].noalign = true;
2951 else
2952 {
2953 error ("unknown alignment %s specified for option %s",
2954 align, is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2955 return;
2956 }
2957 n++;
2958 curr_range_str = next_range_str;
2959 }
2960 while (curr_range_str);
2961
2962 if (input_ranges[n - 1].max != -1)
2963 {
2964 error ("the max value for the last size range should be -1"
2965 " for option %s",
2966 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2967 return;
2968 }
2969
2970 if (n > MAX_STRINGOP_ALGS)
2971 {
2972 error ("too many size ranges specified in option %s",
2973 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2974 return;
2975 }
2976
2977 /* Now override the default algs array. */
2978 for (i = 0; i < n; i++)
2979 {
2980 *const_cast<int *>(&default_algs->size[i].max) = input_ranges[i].max;
2981 *const_cast<stringop_alg *>(&default_algs->size[i].alg)
2982 = input_ranges[i].alg;
2983 *const_cast<int *>(&default_algs->size[i].noalign)
2984 = input_ranges[i].noalign;
2985 }
2986 }
2987
2988 \f
2989 /* parse -mtune-ctrl= option. When DUMP is true,
2990 print the features that are explicitly set. */
2991
2992 static void
2993 parse_mtune_ctrl_str (bool dump)
2994 {
2995 if (!ix86_tune_ctrl_string)
2996 return;
2997
2998 char *next_feature_string = NULL;
2999 char *curr_feature_string = xstrdup (ix86_tune_ctrl_string);
3000 char *orig = curr_feature_string;
3001 int i;
3002 do
3003 {
3004 bool clear = false;
3005
3006 next_feature_string = strchr (curr_feature_string, ',');
3007 if (next_feature_string)
3008 *next_feature_string++ = '\0';
3009 if (*curr_feature_string == '^')
3010 {
3011 curr_feature_string++;
3012 clear = true;
3013 }
3014 for (i = 0; i < X86_TUNE_LAST; i++)
3015 {
3016 if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
3017 {
3018 ix86_tune_features[i] = !clear;
3019 if (dump)
3020 fprintf (stderr, "Explicitly %s feature %s\n",
3021 clear ? "clear" : "set", ix86_tune_feature_names[i]);
3022 break;
3023 }
3024 }
3025 if (i == X86_TUNE_LAST)
3026 error ("Unknown parameter to option -mtune-ctrl: %s",
3027 clear ? curr_feature_string - 1 : curr_feature_string);
3028 curr_feature_string = next_feature_string;
3029 }
3030 while (curr_feature_string);
3031 free (orig);
3032 }
3033
3034 /* Helper function to set ix86_tune_features. IX86_TUNE is the
3035 processor type. */
3036
3037 static void
3038 set_ix86_tune_features (enum processor_type ix86_tune, bool dump)
3039 {
3040 unsigned int ix86_tune_mask = 1u << ix86_tune;
3041 int i;
3042
3043 for (i = 0; i < X86_TUNE_LAST; ++i)
3044 {
3045 if (ix86_tune_no_default)
3046 ix86_tune_features[i] = 0;
3047 else
3048 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3049 }
3050
3051 if (dump)
3052 {
3053 fprintf (stderr, "List of x86 specific tuning parameter names:\n");
3054 for (i = 0; i < X86_TUNE_LAST; i++)
3055 fprintf (stderr, "%s : %s\n", ix86_tune_feature_names[i],
3056 ix86_tune_features[i] ? "on" : "off");
3057 }
3058
3059 parse_mtune_ctrl_str (dump);
3060 }
3061
3062
3063 /* Override various settings based on options. If MAIN_ARGS_P, the
3064 options are from the command line, otherwise they are from
3065 attributes. */
3066
3067 static void
3068 ix86_option_override_internal (bool main_args_p,
3069 struct gcc_options *opts,
3070 struct gcc_options *opts_set)
3071 {
3072 int i;
3073 unsigned int ix86_arch_mask;
3074 const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL);
3075 const char *prefix;
3076 const char *suffix;
3077 const char *sw;
3078
3079 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
3080 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
3081 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
3082 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
3083 #define PTA_AES (HOST_WIDE_INT_1 << 4)
3084 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
3085 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
3086 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
3087 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
3088 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
3089 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
3090 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
3091 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
3092 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
3093 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
3094 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
3095 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
3096 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
3097 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
3098 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
3099 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
3100 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
3101 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
3102 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
3103 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
3104 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
3105 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
3106 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
3107 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
3108 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
3109 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
3110 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
3111 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
3112 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
3113 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
3114 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
3115 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
3116 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
3117 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
3118 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
3119 #define PTA_AVX512F (HOST_WIDE_INT_1 << 40)
3120 #define PTA_AVX512ER (HOST_WIDE_INT_1 << 41)
3121 #define PTA_AVX512PF (HOST_WIDE_INT_1 << 42)
3122 #define PTA_AVX512CD (HOST_WIDE_INT_1 << 43)
3123 #define PTA_SHA (HOST_WIDE_INT_1 << 45)
3124 #define PTA_PREFETCHWT1 (HOST_WIDE_INT_1 << 46)
3125 #define PTA_CLFLUSHOPT (HOST_WIDE_INT_1 << 47)
3126 #define PTA_XSAVEC (HOST_WIDE_INT_1 << 48)
3127 #define PTA_XSAVES (HOST_WIDE_INT_1 << 49)
3128 #define PTA_AVX512DQ (HOST_WIDE_INT_1 << 50)
3129 #define PTA_AVX512BW (HOST_WIDE_INT_1 << 51)
3130 #define PTA_AVX512VL (HOST_WIDE_INT_1 << 52)
3131
3132 #define PTA_CORE2 \
3133 (PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \
3134 | PTA_CX16 | PTA_FXSR)
3135 #define PTA_NEHALEM \
3136 (PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_POPCNT)
3137 #define PTA_WESTMERE \
3138 (PTA_NEHALEM | PTA_AES | PTA_PCLMUL)
3139 #define PTA_SANDYBRIDGE \
3140 (PTA_WESTMERE | PTA_AVX | PTA_XSAVE | PTA_XSAVEOPT)
3141 #define PTA_IVYBRIDGE \
3142 (PTA_SANDYBRIDGE | PTA_FSGSBASE | PTA_RDRND | PTA_F16C)
3143 #define PTA_HASWELL \
3144 (PTA_IVYBRIDGE | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_LZCNT \
3145 | PTA_FMA | PTA_MOVBE | PTA_HLE)
3146 #define PTA_BROADWELL \
3147 (PTA_HASWELL | PTA_ADX | PTA_PRFCHW | PTA_RDSEED)
3148 #define PTA_BONNELL \
3149 (PTA_CORE2 | PTA_MOVBE)
3150 #define PTA_SILVERMONT \
3151 (PTA_WESTMERE | PTA_MOVBE)
3152
3153 /* if this reaches 64, need to widen struct pta flags below */
3154
3155 static struct pta
3156 {
3157 const char *const name; /* processor name or nickname. */
3158 const enum processor_type processor;
3159 const enum attr_cpu schedule;
3160 const unsigned HOST_WIDE_INT flags;
3161 }
3162 const processor_alias_table[] =
3163 {
3164 {"i386", PROCESSOR_I386, CPU_NONE, 0},
3165 {"i486", PROCESSOR_I486, CPU_NONE, 0},
3166 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3167 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3168 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
3169 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
3170 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3171 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3172 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3173 PTA_MMX | PTA_SSE | PTA_FXSR},
3174 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3175 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3176 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_FXSR},
3177 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3178 PTA_MMX | PTA_SSE | PTA_FXSR},
3179 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3180 PTA_MMX | PTA_SSE | PTA_FXSR},
3181 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3182 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3183 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
3184 PTA_MMX |PTA_SSE | PTA_SSE2 | PTA_FXSR},
3185 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
3186 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3187 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
3188 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3189 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
3190 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3191 | PTA_CX16 | PTA_NO_SAHF | PTA_FXSR},
3192 {"core2", PROCESSOR_CORE2, CPU_CORE2, PTA_CORE2},
3193 {"nehalem", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3194 {"corei7", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3195 {"westmere", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_WESTMERE},
3196 {"sandybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3197 PTA_SANDYBRIDGE},
3198 {"corei7-avx", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3199 PTA_SANDYBRIDGE},
3200 {"ivybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3201 PTA_IVYBRIDGE},
3202 {"core-avx-i", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3203 PTA_IVYBRIDGE},
3204 {"haswell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3205 {"core-avx2", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3206 {"broadwell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_BROADWELL},
3207 {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3208 {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3209 {"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3210 {"slm", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3211 {"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM},
3212 {"geode", PROCESSOR_GEODE, CPU_GEODE,
3213 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3214 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
3215 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3216 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3217 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3218 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3219 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3220 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3221 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3222 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3223 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3224 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3225 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3226 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3227 {"x86-64", PROCESSOR_K8, CPU_K8,
3228 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3229 {"k8", PROCESSOR_K8, CPU_K8,
3230 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3231 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3232 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3233 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3234 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3235 {"opteron", PROCESSOR_K8, CPU_K8,
3236 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3237 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3238 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3239 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3240 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3241 {"athlon64", PROCESSOR_K8, CPU_K8,
3242 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3243 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3244 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3245 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3246 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3247 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3248 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3249 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3250 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3251 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3252 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3253 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3254 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3255 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3256 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3257 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3258 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3259 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3260 | PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3261 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
3262 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3263 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3264 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3265 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3266 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3267 {"bdver3", PROCESSOR_BDVER3, CPU_BDVER3,
3268 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3269 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3270 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3271 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3272 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE
3273 | PTA_XSAVEOPT | PTA_FSGSBASE},
3274 {"bdver4", PROCESSOR_BDVER4, CPU_BDVER4,
3275 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3276 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3277 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
3278 | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2
3279 | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR
3280 | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE | PTA_RDRND
3281 | PTA_MOVBE},
3282 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
3283 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3284 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW
3285 | PTA_FXSR | PTA_XSAVE},
3286 {"btver2", PROCESSOR_BTVER2, CPU_BTVER2,
3287 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3288 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_SSE4_1
3289 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3290 | PTA_BMI | PTA_F16C | PTA_MOVBE | PTA_PRFCHW
3291 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT},
3292
3293 {"generic", PROCESSOR_GENERIC, CPU_GENERIC,
3294 PTA_64BIT
3295 | PTA_HLE /* flags are only used for -march switch. */ },
3296 };
3297
3298 /* -mrecip options. */
3299 static struct
3300 {
3301 const char *string; /* option name */
3302 unsigned int mask; /* mask bits to set */
3303 }
3304 const recip_options[] =
3305 {
3306 { "all", RECIP_MASK_ALL },
3307 { "none", RECIP_MASK_NONE },
3308 { "div", RECIP_MASK_DIV },
3309 { "sqrt", RECIP_MASK_SQRT },
3310 { "vec-div", RECIP_MASK_VEC_DIV },
3311 { "vec-sqrt", RECIP_MASK_VEC_SQRT },
3312 };
3313
3314 int const pta_size = ARRAY_SIZE (processor_alias_table);
3315
3316 /* Set up prefix/suffix so the error messages refer to either the command
3317 line argument, or the attribute(target). */
3318 if (main_args_p)
3319 {
3320 prefix = "-m";
3321 suffix = "";
3322 sw = "switch";
3323 }
3324 else
3325 {
3326 prefix = "option(\"";
3327 suffix = "\")";
3328 sw = "attribute";
3329 }
3330
3331 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3332 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3333 if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3334 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ABI_64 | OPTION_MASK_ABI_X32);
3335 #ifdef TARGET_BI_ARCH
3336 else
3337 {
3338 #if TARGET_BI_ARCH == 1
3339 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3340 is on and OPTION_MASK_ABI_X32 is off. We turn off
3341 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3342 -mx32. */
3343 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3344 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3345 #else
3346 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3347 on and OPTION_MASK_ABI_64 is off. We turn off
3348 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3349 -m64. */
3350 if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3351 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3352 #endif
3353 }
3354 #endif
3355
3356 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3357 {
3358 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3359 OPTION_MASK_ABI_64 for TARGET_X32. */
3360 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3361 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3362 }
3363 else if (TARGET_16BIT_P (opts->x_ix86_isa_flags))
3364 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ISA_64BIT
3365 | OPTION_MASK_ABI_X32
3366 | OPTION_MASK_ABI_64);
3367 else if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3368 {
3369 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3370 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3371 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3372 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3373 }
3374
3375 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3376 SUBTARGET_OVERRIDE_OPTIONS;
3377 #endif
3378
3379 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3380 SUBSUBTARGET_OVERRIDE_OPTIONS;
3381 #endif
3382
3383 /* -fPIC is the default for x86_64. */
3384 if (TARGET_MACHO && TARGET_64BIT_P (opts->x_ix86_isa_flags))
3385 opts->x_flag_pic = 2;
3386
3387 /* Need to check -mtune=generic first. */
3388 if (opts->x_ix86_tune_string)
3389 {
3390 /* As special support for cross compilers we read -mtune=native
3391 as -mtune=generic. With native compilers we won't see the
3392 -mtune=native, as it was changed by the driver. */
3393 if (!strcmp (opts->x_ix86_tune_string, "native"))
3394 {
3395 opts->x_ix86_tune_string = "generic";
3396 }
3397 else if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3398 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3399 "%stune=k8%s or %stune=generic%s instead as appropriate",
3400 prefix, suffix, prefix, suffix, prefix, suffix);
3401 }
3402 else
3403 {
3404 if (opts->x_ix86_arch_string)
3405 opts->x_ix86_tune_string = opts->x_ix86_arch_string;
3406 if (!opts->x_ix86_tune_string)
3407 {
3408 opts->x_ix86_tune_string
3409 = processor_target_table[TARGET_CPU_DEFAULT].name;
3410 ix86_tune_defaulted = 1;
3411 }
3412
3413 /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string
3414 or defaulted. We need to use a sensible tune option. */
3415 if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3416 {
3417 opts->x_ix86_tune_string = "generic";
3418 }
3419 }
3420
3421 if (opts->x_ix86_stringop_alg == rep_prefix_8_byte
3422 && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3423 {
3424 /* rep; movq isn't available in 32-bit code. */
3425 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3426 opts->x_ix86_stringop_alg = no_stringop;
3427 }
3428
3429 if (!opts->x_ix86_arch_string)
3430 opts->x_ix86_arch_string
3431 = TARGET_64BIT_P (opts->x_ix86_isa_flags)
3432 ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3433 else
3434 ix86_arch_specified = 1;
3435
3436 if (opts_set->x_ix86_pmode)
3437 {
3438 if ((TARGET_LP64_P (opts->x_ix86_isa_flags)
3439 && opts->x_ix86_pmode == PMODE_SI)
3440 || (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
3441 && opts->x_ix86_pmode == PMODE_DI))
3442 error ("address mode %qs not supported in the %s bit mode",
3443 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "short" : "long",
3444 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "64" : "32");
3445 }
3446 else
3447 opts->x_ix86_pmode = TARGET_LP64_P (opts->x_ix86_isa_flags)
3448 ? PMODE_DI : PMODE_SI;
3449
3450 if (!opts_set->x_ix86_abi)
3451 opts->x_ix86_abi = DEFAULT_ABI;
3452
3453 /* For targets using ms ABI enable ms-extensions, if not
3454 explicit turned off. For non-ms ABI we turn off this
3455 option. */
3456 if (!opts_set->x_flag_ms_extensions)
3457 opts->x_flag_ms_extensions = (MS_ABI == DEFAULT_ABI);
3458
3459 if (opts_set->x_ix86_cmodel)
3460 {
3461 switch (opts->x_ix86_cmodel)
3462 {
3463 case CM_SMALL:
3464 case CM_SMALL_PIC:
3465 if (opts->x_flag_pic)
3466 opts->x_ix86_cmodel = CM_SMALL_PIC;
3467 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3468 error ("code model %qs not supported in the %s bit mode",
3469 "small", "32");
3470 break;
3471
3472 case CM_MEDIUM:
3473 case CM_MEDIUM_PIC:
3474 if (opts->x_flag_pic)
3475 opts->x_ix86_cmodel = CM_MEDIUM_PIC;
3476 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3477 error ("code model %qs not supported in the %s bit mode",
3478 "medium", "32");
3479 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3480 error ("code model %qs not supported in x32 mode",
3481 "medium");
3482 break;
3483
3484 case CM_LARGE:
3485 case CM_LARGE_PIC:
3486 if (opts->x_flag_pic)
3487 opts->x_ix86_cmodel = CM_LARGE_PIC;
3488 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3489 error ("code model %qs not supported in the %s bit mode",
3490 "large", "32");
3491 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3492 error ("code model %qs not supported in x32 mode",
3493 "large");
3494 break;
3495
3496 case CM_32:
3497 if (opts->x_flag_pic)
3498 error ("code model %s does not support PIC mode", "32");
3499 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3500 error ("code model %qs not supported in the %s bit mode",
3501 "32", "64");
3502 break;
3503
3504 case CM_KERNEL:
3505 if (opts->x_flag_pic)
3506 {
3507 error ("code model %s does not support PIC mode", "kernel");
3508 opts->x_ix86_cmodel = CM_32;
3509 }
3510 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3511 error ("code model %qs not supported in the %s bit mode",
3512 "kernel", "32");
3513 break;
3514
3515 default:
3516 gcc_unreachable ();
3517 }
3518 }
3519 else
3520 {
3521 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3522 use of rip-relative addressing. This eliminates fixups that
3523 would otherwise be needed if this object is to be placed in a
3524 DLL, and is essentially just as efficient as direct addressing. */
3525 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3526 && (TARGET_RDOS || TARGET_PECOFF))
3527 opts->x_ix86_cmodel = CM_MEDIUM_PIC, opts->x_flag_pic = 1;
3528 else if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3529 opts->x_ix86_cmodel = opts->x_flag_pic ? CM_SMALL_PIC : CM_SMALL;
3530 else
3531 opts->x_ix86_cmodel = CM_32;
3532 }
3533 if (TARGET_MACHO && opts->x_ix86_asm_dialect == ASM_INTEL)
3534 {
3535 error ("-masm=intel not supported in this configuration");
3536 opts->x_ix86_asm_dialect = ASM_ATT;
3537 }
3538 if ((TARGET_64BIT_P (opts->x_ix86_isa_flags) != 0)
3539 != ((opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3540 sorry ("%i-bit mode not compiled in",
3541 (opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3542
3543 for (i = 0; i < pta_size; i++)
3544 if (! strcmp (opts->x_ix86_arch_string, processor_alias_table[i].name))
3545 {
3546 ix86_schedule = processor_alias_table[i].schedule;
3547 ix86_arch = processor_alias_table[i].processor;
3548 /* Default cpu tuning to the architecture. */
3549 ix86_tune = ix86_arch;
3550
3551 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3552 && !(processor_alias_table[i].flags & PTA_64BIT))
3553 error ("CPU you selected does not support x86-64 "
3554 "instruction set");
3555
3556 if (processor_alias_table[i].flags & PTA_MMX
3557 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3558 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3559 if (processor_alias_table[i].flags & PTA_3DNOW
3560 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3561 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3562 if (processor_alias_table[i].flags & PTA_3DNOW_A
3563 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3564 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3565 if (processor_alias_table[i].flags & PTA_SSE
3566 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3567 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3568 if (processor_alias_table[i].flags & PTA_SSE2
3569 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3570 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3571 if (processor_alias_table[i].flags & PTA_SSE3
3572 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3573 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3574 if (processor_alias_table[i].flags & PTA_SSSE3
3575 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3576 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3577 if (processor_alias_table[i].flags & PTA_SSE4_1
3578 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3579 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3580 if (processor_alias_table[i].flags & PTA_SSE4_2
3581 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3582 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3583 if (processor_alias_table[i].flags & PTA_AVX
3584 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3585 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3586 if (processor_alias_table[i].flags & PTA_AVX2
3587 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
3588 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
3589 if (processor_alias_table[i].flags & PTA_FMA
3590 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3591 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3592 if (processor_alias_table[i].flags & PTA_SSE4A
3593 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3594 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3595 if (processor_alias_table[i].flags & PTA_FMA4
3596 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3597 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3598 if (processor_alias_table[i].flags & PTA_XOP
3599 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3600 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3601 if (processor_alias_table[i].flags & PTA_LWP
3602 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3603 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3604 if (processor_alias_table[i].flags & PTA_ABM
3605 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3606 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3607 if (processor_alias_table[i].flags & PTA_BMI
3608 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3609 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3610 if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
3611 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
3612 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
3613 if (processor_alias_table[i].flags & PTA_TBM
3614 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3615 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3616 if (processor_alias_table[i].flags & PTA_BMI2
3617 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
3618 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
3619 if (processor_alias_table[i].flags & PTA_CX16
3620 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3621 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3622 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3623 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3624 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3625 if (!(TARGET_64BIT_P (opts->x_ix86_isa_flags)
3626 && (processor_alias_table[i].flags & PTA_NO_SAHF))
3627 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3628 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3629 if (processor_alias_table[i].flags & PTA_MOVBE
3630 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3631 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3632 if (processor_alias_table[i].flags & PTA_AES
3633 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3634 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3635 if (processor_alias_table[i].flags & PTA_SHA
3636 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA))
3637 ix86_isa_flags |= OPTION_MASK_ISA_SHA;
3638 if (processor_alias_table[i].flags & PTA_PCLMUL
3639 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3640 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3641 if (processor_alias_table[i].flags & PTA_FSGSBASE
3642 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3643 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3644 if (processor_alias_table[i].flags & PTA_RDRND
3645 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3646 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3647 if (processor_alias_table[i].flags & PTA_F16C
3648 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3649 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3650 if (processor_alias_table[i].flags & PTA_RTM
3651 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RTM))
3652 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RTM;
3653 if (processor_alias_table[i].flags & PTA_HLE
3654 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_HLE))
3655 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_HLE;
3656 if (processor_alias_table[i].flags & PTA_PRFCHW
3657 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PRFCHW))
3658 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW;
3659 if (processor_alias_table[i].flags & PTA_RDSEED
3660 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDSEED))
3661 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDSEED;
3662 if (processor_alias_table[i].flags & PTA_ADX
3663 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ADX))
3664 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ADX;
3665 if (processor_alias_table[i].flags & PTA_FXSR
3666 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FXSR))
3667 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FXSR;
3668 if (processor_alias_table[i].flags & PTA_XSAVE
3669 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVE))
3670 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVE;
3671 if (processor_alias_table[i].flags & PTA_XSAVEOPT
3672 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT))
3673 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT;
3674 if (processor_alias_table[i].flags & PTA_AVX512F
3675 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F))
3676 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F;
3677 if (processor_alias_table[i].flags & PTA_AVX512ER
3678 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512ER))
3679 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER;
3680 if (processor_alias_table[i].flags & PTA_AVX512PF
3681 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512PF))
3682 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF;
3683 if (processor_alias_table[i].flags & PTA_AVX512CD
3684 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512CD))
3685 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD;
3686 if (processor_alias_table[i].flags & PTA_PREFETCHWT1
3687 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PREFETCHWT1))
3688 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PREFETCHWT1;
3689 if (processor_alias_table[i].flags & PTA_CLFLUSHOPT
3690 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLFLUSHOPT))
3691 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLFLUSHOPT;
3692 if (processor_alias_table[i].flags & PTA_XSAVEC
3693 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEC))
3694 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEC;
3695 if (processor_alias_table[i].flags & PTA_XSAVES
3696 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVES))
3697 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVES;
3698 if (processor_alias_table[i].flags & PTA_AVX512DQ
3699 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512DQ))
3700 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512DQ;
3701 if (processor_alias_table[i].flags & PTA_AVX512BW
3702 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512BW))
3703 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512BW;
3704 if (processor_alias_table[i].flags & PTA_AVX512VL
3705 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VL))
3706 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VL;
3707 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3708 x86_prefetch_sse = true;
3709
3710 break;
3711 }
3712
3713 if (!strcmp (opts->x_ix86_arch_string, "generic"))
3714 error ("generic CPU can be used only for %stune=%s %s",
3715 prefix, suffix, sw);
3716 else if (!strcmp (opts->x_ix86_arch_string, "intel"))
3717 error ("intel CPU can be used only for %stune=%s %s",
3718 prefix, suffix, sw);
3719 else if (i == pta_size)
3720 error ("bad value (%s) for %sarch=%s %s",
3721 opts->x_ix86_arch_string, prefix, suffix, sw);
3722
3723 ix86_arch_mask = 1u << ix86_arch;
3724 for (i = 0; i < X86_ARCH_LAST; ++i)
3725 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3726
3727 for (i = 0; i < pta_size; i++)
3728 if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name))
3729 {
3730 ix86_schedule = processor_alias_table[i].schedule;
3731 ix86_tune = processor_alias_table[i].processor;
3732 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3733 {
3734 if (!(processor_alias_table[i].flags & PTA_64BIT))
3735 {
3736 if (ix86_tune_defaulted)
3737 {
3738 opts->x_ix86_tune_string = "x86-64";
3739 for (i = 0; i < pta_size; i++)
3740 if (! strcmp (opts->x_ix86_tune_string,
3741 processor_alias_table[i].name))
3742 break;
3743 ix86_schedule = processor_alias_table[i].schedule;
3744 ix86_tune = processor_alias_table[i].processor;
3745 }
3746 else
3747 error ("CPU you selected does not support x86-64 "
3748 "instruction set");
3749 }
3750 }
3751 /* Intel CPUs have always interpreted SSE prefetch instructions as
3752 NOPs; so, we can enable SSE prefetch instructions even when
3753 -mtune (rather than -march) points us to a processor that has them.
3754 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3755 higher processors. */
3756 if (TARGET_CMOV
3757 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3758 x86_prefetch_sse = true;
3759 break;
3760 }
3761
3762 if (ix86_tune_specified && i == pta_size)
3763 error ("bad value (%s) for %stune=%s %s",
3764 opts->x_ix86_tune_string, prefix, suffix, sw);
3765
3766 set_ix86_tune_features (ix86_tune, opts->x_ix86_dump_tunes);
3767
3768 #ifndef USE_IX86_FRAME_POINTER
3769 #define USE_IX86_FRAME_POINTER 0
3770 #endif
3771
3772 #ifndef USE_X86_64_FRAME_POINTER
3773 #define USE_X86_64_FRAME_POINTER 0
3774 #endif
3775
3776 /* Set the default values for switches whose default depends on TARGET_64BIT
3777 in case they weren't overwritten by command line options. */
3778 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3779 {
3780 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3781 opts->x_flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3782 if (opts->x_flag_asynchronous_unwind_tables
3783 && !opts_set->x_flag_unwind_tables
3784 && TARGET_64BIT_MS_ABI)
3785 opts->x_flag_unwind_tables = 1;
3786 if (opts->x_flag_asynchronous_unwind_tables == 2)
3787 opts->x_flag_unwind_tables
3788 = opts->x_flag_asynchronous_unwind_tables = 1;
3789 if (opts->x_flag_pcc_struct_return == 2)
3790 opts->x_flag_pcc_struct_return = 0;
3791 }
3792 else
3793 {
3794 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3795 opts->x_flag_omit_frame_pointer
3796 = !(USE_IX86_FRAME_POINTER || opts->x_optimize_size);
3797 if (opts->x_flag_asynchronous_unwind_tables == 2)
3798 opts->x_flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3799 if (opts->x_flag_pcc_struct_return == 2)
3800 opts->x_flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3801 }
3802
3803 ix86_tune_cost = processor_target_table[ix86_tune].cost;
3804 if (opts->x_optimize_size)
3805 ix86_cost = &ix86_size_cost;
3806 else
3807 ix86_cost = ix86_tune_cost;
3808
3809 /* Arrange to set up i386_stack_locals for all functions. */
3810 init_machine_status = ix86_init_machine_status;
3811
3812 /* Validate -mregparm= value. */
3813 if (opts_set->x_ix86_regparm)
3814 {
3815 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3816 warning (0, "-mregparm is ignored in 64-bit mode");
3817 if (opts->x_ix86_regparm > REGPARM_MAX)
3818 {
3819 error ("-mregparm=%d is not between 0 and %d",
3820 opts->x_ix86_regparm, REGPARM_MAX);
3821 opts->x_ix86_regparm = 0;
3822 }
3823 }
3824 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3825 opts->x_ix86_regparm = REGPARM_MAX;
3826
3827 /* Default align_* from the processor table. */
3828 if (opts->x_align_loops == 0)
3829 {
3830 opts->x_align_loops = processor_target_table[ix86_tune].align_loop;
3831 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3832 }
3833 if (opts->x_align_jumps == 0)
3834 {
3835 opts->x_align_jumps = processor_target_table[ix86_tune].align_jump;
3836 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3837 }
3838 if (opts->x_align_functions == 0)
3839 {
3840 opts->x_align_functions = processor_target_table[ix86_tune].align_func;
3841 }
3842
3843 /* Provide default for -mbranch-cost= value. */
3844 if (!opts_set->x_ix86_branch_cost)
3845 opts->x_ix86_branch_cost = ix86_cost->branch_cost;
3846
3847 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3848 {
3849 opts->x_target_flags
3850 |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags;
3851
3852 /* Enable by default the SSE and MMX builtins. Do allow the user to
3853 explicitly disable any of these. In particular, disabling SSE and
3854 MMX for kernel code is extremely useful. */
3855 if (!ix86_arch_specified)
3856 opts->x_ix86_isa_flags
3857 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3858 | TARGET_SUBTARGET64_ISA_DEFAULT)
3859 & ~opts->x_ix86_isa_flags_explicit);
3860
3861 if (TARGET_RTD_P (opts->x_target_flags))
3862 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3863 }
3864 else
3865 {
3866 opts->x_target_flags
3867 |= TARGET_SUBTARGET32_DEFAULT & ~opts_set->x_target_flags;
3868
3869 if (!ix86_arch_specified)
3870 opts->x_ix86_isa_flags
3871 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit;
3872
3873 /* i386 ABI does not specify red zone. It still makes sense to use it
3874 when programmer takes care to stack from being destroyed. */
3875 if (!(opts_set->x_target_flags & MASK_NO_RED_ZONE))
3876 opts->x_target_flags |= MASK_NO_RED_ZONE;
3877 }
3878
3879 /* Keep nonleaf frame pointers. */
3880 if (opts->x_flag_omit_frame_pointer)
3881 opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3882 else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags))
3883 opts->x_flag_omit_frame_pointer = 1;
3884
3885 /* If we're doing fast math, we don't care about comparison order
3886 wrt NaNs. This lets us use a shorter comparison sequence. */
3887 if (opts->x_flag_finite_math_only)
3888 opts->x_target_flags &= ~MASK_IEEE_FP;
3889
3890 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3891 since the insns won't need emulation. */
3892 if (ix86_tune_features [X86_TUNE_ALWAYS_FANCY_MATH_387])
3893 opts->x_target_flags &= ~MASK_NO_FANCY_MATH_387;
3894
3895 /* Likewise, if the target doesn't have a 387, or we've specified
3896 software floating point, don't use 387 inline intrinsics. */
3897 if (!TARGET_80387_P (opts->x_target_flags))
3898 opts->x_target_flags |= MASK_NO_FANCY_MATH_387;
3899
3900 /* Turn on MMX builtins for -msse. */
3901 if (TARGET_SSE_P (opts->x_ix86_isa_flags))
3902 opts->x_ix86_isa_flags
3903 |= OPTION_MASK_ISA_MMX & ~opts->x_ix86_isa_flags_explicit;
3904
3905 /* Enable SSE prefetch. */
3906 if (TARGET_SSE_P (opts->x_ix86_isa_flags)
3907 || (TARGET_PRFCHW && !TARGET_3DNOW_P (opts->x_ix86_isa_flags)))
3908 x86_prefetch_sse = true;
3909
3910 /* Enable prefetch{,w} instructions for -m3dnow and -mprefetchwt1. */
3911 if (TARGET_3DNOW_P (opts->x_ix86_isa_flags)
3912 || TARGET_PREFETCHWT1_P (opts->x_ix86_isa_flags))
3913 opts->x_ix86_isa_flags
3914 |= OPTION_MASK_ISA_PRFCHW & ~opts->x_ix86_isa_flags_explicit;
3915
3916 /* Enable popcnt instruction for -msse4.2 or -mabm. */
3917 if (TARGET_SSE4_2_P (opts->x_ix86_isa_flags)
3918 || TARGET_ABM_P (opts->x_ix86_isa_flags))
3919 opts->x_ix86_isa_flags
3920 |= OPTION_MASK_ISA_POPCNT & ~opts->x_ix86_isa_flags_explicit;
3921
3922 /* Enable lzcnt instruction for -mabm. */
3923 if (TARGET_ABM_P(opts->x_ix86_isa_flags))
3924 opts->x_ix86_isa_flags
3925 |= OPTION_MASK_ISA_LZCNT & ~opts->x_ix86_isa_flags_explicit;
3926
3927 /* Validate -mpreferred-stack-boundary= value or default it to
3928 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3929 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3930 if (opts_set->x_ix86_preferred_stack_boundary_arg)
3931 {
3932 int min = (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3933 ? (TARGET_SSE_P (opts->x_ix86_isa_flags) ? 4 : 3) : 2);
3934 int max = (TARGET_SEH ? 4 : 12);
3935
3936 if (opts->x_ix86_preferred_stack_boundary_arg < min
3937 || opts->x_ix86_preferred_stack_boundary_arg > max)
3938 {
3939 if (min == max)
3940 error ("-mpreferred-stack-boundary is not supported "
3941 "for this target");
3942 else
3943 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3944 opts->x_ix86_preferred_stack_boundary_arg, min, max);
3945 }
3946 else
3947 ix86_preferred_stack_boundary
3948 = (1 << opts->x_ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
3949 }
3950
3951 /* Set the default value for -mstackrealign. */
3952 if (opts->x_ix86_force_align_arg_pointer == -1)
3953 opts->x_ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3954
3955 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3956
3957 /* Validate -mincoming-stack-boundary= value or default it to
3958 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3959 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3960 if (opts_set->x_ix86_incoming_stack_boundary_arg)
3961 {
3962 if (opts->x_ix86_incoming_stack_boundary_arg
3963 < (TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2)
3964 || opts->x_ix86_incoming_stack_boundary_arg > 12)
3965 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3966 opts->x_ix86_incoming_stack_boundary_arg,
3967 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2);
3968 else
3969 {
3970 ix86_user_incoming_stack_boundary
3971 = (1 << opts->x_ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
3972 ix86_incoming_stack_boundary
3973 = ix86_user_incoming_stack_boundary;
3974 }
3975 }
3976
3977 /* Accept -msseregparm only if at least SSE support is enabled. */
3978 if (TARGET_SSEREGPARM_P (opts->x_target_flags)
3979 && ! TARGET_SSE_P (opts->x_ix86_isa_flags))
3980 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3981
3982 if (opts_set->x_ix86_fpmath)
3983 {
3984 if (opts->x_ix86_fpmath & FPMATH_SSE)
3985 {
3986 if (!TARGET_SSE_P (opts->x_ix86_isa_flags))
3987 {
3988 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3989 opts->x_ix86_fpmath = FPMATH_387;
3990 }
3991 else if ((opts->x_ix86_fpmath & FPMATH_387)
3992 && !TARGET_80387_P (opts->x_target_flags))
3993 {
3994 warning (0, "387 instruction set disabled, using SSE arithmetics");
3995 opts->x_ix86_fpmath = FPMATH_SSE;
3996 }
3997 }
3998 }
3999 /* For all chips supporting SSE2, -mfpmath=sse performs better than
4000 fpmath=387. The second is however default at many targets since the
4001 extra 80bit precision of temporaries is considered to be part of ABI.
4002 Overwrite the default at least for -ffast-math.
4003 TODO: -mfpmath=both seems to produce same performing code with bit
4004 smaller binaries. It is however not clear if register allocation is
4005 ready for this setting.
4006 Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE
4007 codegen. We may switch to 387 with -ffast-math for size optimized
4008 functions. */
4009 else if (fast_math_flags_set_p (&global_options)
4010 && TARGET_SSE2_P (opts->x_ix86_isa_flags))
4011 opts->x_ix86_fpmath = FPMATH_SSE;
4012 else
4013 opts->x_ix86_fpmath = TARGET_FPMATH_DEFAULT_P (opts->x_ix86_isa_flags);
4014
4015 /* If the i387 is disabled, then do not return values in it. */
4016 if (!TARGET_80387_P (opts->x_target_flags))
4017 opts->x_target_flags &= ~MASK_FLOAT_RETURNS;
4018
4019 /* Use external vectorized library in vectorizing intrinsics. */
4020 if (opts_set->x_ix86_veclibabi_type)
4021 switch (opts->x_ix86_veclibabi_type)
4022 {
4023 case ix86_veclibabi_type_svml:
4024 ix86_veclib_handler = ix86_veclibabi_svml;
4025 break;
4026
4027 case ix86_veclibabi_type_acml:
4028 ix86_veclib_handler = ix86_veclibabi_acml;
4029 break;
4030
4031 default:
4032 gcc_unreachable ();
4033 }
4034
4035 if (ix86_tune_features [X86_TUNE_ACCUMULATE_OUTGOING_ARGS]
4036 && !(opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4037 && !opts->x_optimize_size)
4038 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4039
4040 /* If stack probes are required, the space used for large function
4041 arguments on the stack must also be probed, so enable
4042 -maccumulate-outgoing-args so this happens in the prologue. */
4043 if (TARGET_STACK_PROBE_P (opts->x_target_flags)
4044 && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4045 {
4046 if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4047 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
4048 "for correctness", prefix, suffix);
4049 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4050 }
4051
4052 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
4053 {
4054 char *p;
4055 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
4056 p = strchr (internal_label_prefix, 'X');
4057 internal_label_prefix_len = p - internal_label_prefix;
4058 *p = '\0';
4059 }
4060
4061 /* When scheduling description is not available, disable scheduler pass
4062 so it won't slow down the compilation and make x87 code slower. */
4063 if (!TARGET_SCHEDULE)
4064 opts->x_flag_schedule_insns_after_reload = opts->x_flag_schedule_insns = 0;
4065
4066 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4067 ix86_tune_cost->simultaneous_prefetches,
4068 opts->x_param_values,
4069 opts_set->x_param_values);
4070 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4071 ix86_tune_cost->prefetch_block,
4072 opts->x_param_values,
4073 opts_set->x_param_values);
4074 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
4075 ix86_tune_cost->l1_cache_size,
4076 opts->x_param_values,
4077 opts_set->x_param_values);
4078 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
4079 ix86_tune_cost->l2_cache_size,
4080 opts->x_param_values,
4081 opts_set->x_param_values);
4082
4083 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
4084 if (opts->x_flag_prefetch_loop_arrays < 0
4085 && HAVE_prefetch
4086 && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
4087 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
4088 opts->x_flag_prefetch_loop_arrays = 1;
4089
4090 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
4091 can be opts->x_optimized to ap = __builtin_next_arg (0). */
4092 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && !opts->x_flag_split_stack)
4093 targetm.expand_builtin_va_start = NULL;
4094
4095 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4096 {
4097 ix86_gen_leave = gen_leave_rex64;
4098 if (Pmode == DImode)
4099 {
4100 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_di;
4101 ix86_gen_tls_local_dynamic_base_64
4102 = gen_tls_local_dynamic_base_64_di;
4103 }
4104 else
4105 {
4106 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_si;
4107 ix86_gen_tls_local_dynamic_base_64
4108 = gen_tls_local_dynamic_base_64_si;
4109 }
4110 }
4111 else
4112 ix86_gen_leave = gen_leave;
4113
4114 if (Pmode == DImode)
4115 {
4116 ix86_gen_add3 = gen_adddi3;
4117 ix86_gen_sub3 = gen_subdi3;
4118 ix86_gen_sub3_carry = gen_subdi3_carry;
4119 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
4120 ix86_gen_andsp = gen_anddi3;
4121 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
4122 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
4123 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
4124 ix86_gen_monitor = gen_sse3_monitor_di;
4125 }
4126 else
4127 {
4128 ix86_gen_add3 = gen_addsi3;
4129 ix86_gen_sub3 = gen_subsi3;
4130 ix86_gen_sub3_carry = gen_subsi3_carry;
4131 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
4132 ix86_gen_andsp = gen_andsi3;
4133 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
4134 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
4135 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
4136 ix86_gen_monitor = gen_sse3_monitor_si;
4137 }
4138
4139 #ifdef USE_IX86_CLD
4140 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
4141 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4142 opts->x_target_flags |= MASK_CLD & ~opts_set->x_target_flags;
4143 #endif
4144
4145 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && opts->x_flag_pic)
4146 {
4147 if (opts->x_flag_fentry > 0)
4148 sorry ("-mfentry isn%'t supported for 32-bit in combination "
4149 "with -fpic");
4150 opts->x_flag_fentry = 0;
4151 }
4152 else if (TARGET_SEH)
4153 {
4154 if (opts->x_flag_fentry == 0)
4155 sorry ("-mno-fentry isn%'t compatible with SEH");
4156 opts->x_flag_fentry = 1;
4157 }
4158 else if (opts->x_flag_fentry < 0)
4159 {
4160 #if defined(PROFILE_BEFORE_PROLOGUE)
4161 opts->x_flag_fentry = 1;
4162 #else
4163 opts->x_flag_fentry = 0;
4164 #endif
4165 }
4166
4167 /* When not opts->x_optimize for size, enable vzeroupper optimization for
4168 TARGET_AVX with -fexpensive-optimizations and split 32-byte
4169 AVX unaligned load/store. */
4170 if (!opts->x_optimize_size)
4171 {
4172 if (flag_expensive_optimizations
4173 && !(opts_set->x_target_flags & MASK_VZEROUPPER))
4174 opts->x_target_flags |= MASK_VZEROUPPER;
4175 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL]
4176 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
4177 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
4178 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL]
4179 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE))
4180 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
4181 /* Enable 128-bit AVX instruction generation
4182 for the auto-vectorizer. */
4183 if (TARGET_AVX128_OPTIMAL
4184 && !(opts_set->x_target_flags & MASK_PREFER_AVX128))
4185 opts->x_target_flags |= MASK_PREFER_AVX128;
4186 }
4187
4188 if (opts->x_ix86_recip_name)
4189 {
4190 char *p = ASTRDUP (opts->x_ix86_recip_name);
4191 char *q;
4192 unsigned int mask, i;
4193 bool invert;
4194
4195 while ((q = strtok (p, ",")) != NULL)
4196 {
4197 p = NULL;
4198 if (*q == '!')
4199 {
4200 invert = true;
4201 q++;
4202 }
4203 else
4204 invert = false;
4205
4206 if (!strcmp (q, "default"))
4207 mask = RECIP_MASK_ALL;
4208 else
4209 {
4210 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4211 if (!strcmp (q, recip_options[i].string))
4212 {
4213 mask = recip_options[i].mask;
4214 break;
4215 }
4216
4217 if (i == ARRAY_SIZE (recip_options))
4218 {
4219 error ("unknown option for -mrecip=%s", q);
4220 invert = false;
4221 mask = RECIP_MASK_NONE;
4222 }
4223 }
4224
4225 opts->x_recip_mask_explicit |= mask;
4226 if (invert)
4227 opts->x_recip_mask &= ~mask;
4228 else
4229 opts->x_recip_mask |= mask;
4230 }
4231 }
4232
4233 if (TARGET_RECIP_P (opts->x_target_flags))
4234 opts->x_recip_mask |= RECIP_MASK_ALL & ~opts->x_recip_mask_explicit;
4235 else if (opts_set->x_target_flags & MASK_RECIP)
4236 opts->x_recip_mask &= ~(RECIP_MASK_ALL & ~opts->x_recip_mask_explicit);
4237
4238 /* Default long double to 64-bit for 32-bit Bionic and to __float128
4239 for 64-bit Bionic. */
4240 if (TARGET_HAS_BIONIC
4241 && !(opts_set->x_target_flags
4242 & (MASK_LONG_DOUBLE_64 | MASK_LONG_DOUBLE_128)))
4243 opts->x_target_flags |= (TARGET_64BIT
4244 ? MASK_LONG_DOUBLE_128
4245 : MASK_LONG_DOUBLE_64);
4246
4247 /* Only one of them can be active. */
4248 gcc_assert ((opts->x_target_flags & MASK_LONG_DOUBLE_64) == 0
4249 || (opts->x_target_flags & MASK_LONG_DOUBLE_128) == 0);
4250
4251 /* Save the initial options in case the user does function specific
4252 options. */
4253 if (main_args_p)
4254 target_option_default_node = target_option_current_node
4255 = build_target_option_node (opts);
4256
4257 /* Handle stack protector */
4258 if (!opts_set->x_ix86_stack_protector_guard)
4259 opts->x_ix86_stack_protector_guard
4260 = TARGET_HAS_BIONIC ? SSP_GLOBAL : SSP_TLS;
4261
4262 /* Handle -mmemcpy-strategy= and -mmemset-strategy= */
4263 if (opts->x_ix86_tune_memcpy_strategy)
4264 {
4265 char *str = xstrdup (opts->x_ix86_tune_memcpy_strategy);
4266 ix86_parse_stringop_strategy_string (str, false);
4267 free (str);
4268 }
4269
4270 if (opts->x_ix86_tune_memset_strategy)
4271 {
4272 char *str = xstrdup (opts->x_ix86_tune_memset_strategy);
4273 ix86_parse_stringop_strategy_string (str, true);
4274 free (str);
4275 }
4276 }
4277
4278 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4279
4280 static void
4281 ix86_option_override (void)
4282 {
4283 opt_pass *pass_insert_vzeroupper = make_pass_insert_vzeroupper (g);
4284 static struct register_pass_info insert_vzeroupper_info
4285 = { pass_insert_vzeroupper, "reload",
4286 1, PASS_POS_INSERT_AFTER
4287 };
4288
4289 ix86_option_override_internal (true, &global_options, &global_options_set);
4290
4291
4292 /* This needs to be done at start up. It's convenient to do it here. */
4293 register_pass (&insert_vzeroupper_info);
4294 }
4295
4296 /* Update register usage after having seen the compiler flags. */
4297
4298 static void
4299 ix86_conditional_register_usage (void)
4300 {
4301 int i, c_mask;
4302 unsigned int j;
4303
4304 /* The PIC register, if it exists, is fixed. */
4305 j = PIC_OFFSET_TABLE_REGNUM;
4306 if (j != INVALID_REGNUM)
4307 fixed_regs[j] = call_used_regs[j] = 1;
4308
4309 /* For 32-bit targets, squash the REX registers. */
4310 if (! TARGET_64BIT)
4311 {
4312 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
4313 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4314 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4315 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4316 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4317 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4318 }
4319
4320 /* See the definition of CALL_USED_REGISTERS in i386.h. */
4321 c_mask = (TARGET_64BIT_MS_ABI ? (1 << 3)
4322 : TARGET_64BIT ? (1 << 2)
4323 : (1 << 1));
4324
4325 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
4326
4327 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4328 {
4329 /* Set/reset conditionally defined registers from
4330 CALL_USED_REGISTERS initializer. */
4331 if (call_used_regs[i] > 1)
4332 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
4333
4334 /* Calculate registers of CLOBBERED_REGS register set
4335 as call used registers from GENERAL_REGS register set. */
4336 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
4337 && call_used_regs[i])
4338 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
4339 }
4340
4341 /* If MMX is disabled, squash the registers. */
4342 if (! TARGET_MMX)
4343 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4344 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
4345 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4346
4347 /* If SSE is disabled, squash the registers. */
4348 if (! TARGET_SSE)
4349 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4350 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
4351 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4352
4353 /* If the FPU is disabled, squash the registers. */
4354 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
4355 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4356 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
4357 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4358
4359 /* If AVX512F is disabled, squash the registers. */
4360 if (! TARGET_AVX512F)
4361 {
4362 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4363 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4364
4365 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
4366 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4367 }
4368 }
4369
4370 \f
4371 /* Save the current options */
4372
4373 static void
4374 ix86_function_specific_save (struct cl_target_option *ptr,
4375 struct gcc_options *opts)
4376 {
4377 ptr->arch = ix86_arch;
4378 ptr->schedule = ix86_schedule;
4379 ptr->tune = ix86_tune;
4380 ptr->branch_cost = ix86_branch_cost;
4381 ptr->tune_defaulted = ix86_tune_defaulted;
4382 ptr->arch_specified = ix86_arch_specified;
4383 ptr->x_ix86_isa_flags_explicit = opts->x_ix86_isa_flags_explicit;
4384 ptr->x_ix86_target_flags_explicit = opts->x_ix86_target_flags_explicit;
4385 ptr->x_recip_mask_explicit = opts->x_recip_mask_explicit;
4386 ptr->x_ix86_arch_string = opts->x_ix86_arch_string;
4387 ptr->x_ix86_tune_string = opts->x_ix86_tune_string;
4388 ptr->x_ix86_cmodel = opts->x_ix86_cmodel;
4389 ptr->x_ix86_abi = opts->x_ix86_abi;
4390 ptr->x_ix86_asm_dialect = opts->x_ix86_asm_dialect;
4391 ptr->x_ix86_branch_cost = opts->x_ix86_branch_cost;
4392 ptr->x_ix86_dump_tunes = opts->x_ix86_dump_tunes;
4393 ptr->x_ix86_force_align_arg_pointer = opts->x_ix86_force_align_arg_pointer;
4394 ptr->x_ix86_force_drap = opts->x_ix86_force_drap;
4395 ptr->x_ix86_incoming_stack_boundary_arg = opts->x_ix86_incoming_stack_boundary_arg;
4396 ptr->x_ix86_pmode = opts->x_ix86_pmode;
4397 ptr->x_ix86_preferred_stack_boundary_arg = opts->x_ix86_preferred_stack_boundary_arg;
4398 ptr->x_ix86_recip_name = opts->x_ix86_recip_name;
4399 ptr->x_ix86_regparm = opts->x_ix86_regparm;
4400 ptr->x_ix86_section_threshold = opts->x_ix86_section_threshold;
4401 ptr->x_ix86_sse2avx = opts->x_ix86_sse2avx;
4402 ptr->x_ix86_stack_protector_guard = opts->x_ix86_stack_protector_guard;
4403 ptr->x_ix86_stringop_alg = opts->x_ix86_stringop_alg;
4404 ptr->x_ix86_tls_dialect = opts->x_ix86_tls_dialect;
4405 ptr->x_ix86_tune_ctrl_string = opts->x_ix86_tune_ctrl_string;
4406 ptr->x_ix86_tune_memcpy_strategy = opts->x_ix86_tune_memcpy_strategy;
4407 ptr->x_ix86_tune_memset_strategy = opts->x_ix86_tune_memset_strategy;
4408 ptr->x_ix86_tune_no_default = opts->x_ix86_tune_no_default;
4409 ptr->x_ix86_veclibabi_type = opts->x_ix86_veclibabi_type;
4410
4411 /* The fields are char but the variables are not; make sure the
4412 values fit in the fields. */
4413 gcc_assert (ptr->arch == ix86_arch);
4414 gcc_assert (ptr->schedule == ix86_schedule);
4415 gcc_assert (ptr->tune == ix86_tune);
4416 gcc_assert (ptr->branch_cost == ix86_branch_cost);
4417 }
4418
4419 /* Restore the current options */
4420
4421 static void
4422 ix86_function_specific_restore (struct gcc_options *opts,
4423 struct cl_target_option *ptr)
4424 {
4425 enum processor_type old_tune = ix86_tune;
4426 enum processor_type old_arch = ix86_arch;
4427 unsigned int ix86_arch_mask;
4428 int i;
4429
4430 /* We don't change -fPIC. */
4431 opts->x_flag_pic = flag_pic;
4432
4433 ix86_arch = (enum processor_type) ptr->arch;
4434 ix86_schedule = (enum attr_cpu) ptr->schedule;
4435 ix86_tune = (enum processor_type) ptr->tune;
4436 opts->x_ix86_branch_cost = ptr->branch_cost;
4437 ix86_tune_defaulted = ptr->tune_defaulted;
4438 ix86_arch_specified = ptr->arch_specified;
4439 opts->x_ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
4440 opts->x_ix86_target_flags_explicit = ptr->x_ix86_target_flags_explicit;
4441 opts->x_recip_mask_explicit = ptr->x_recip_mask_explicit;
4442 opts->x_ix86_arch_string = ptr->x_ix86_arch_string;
4443 opts->x_ix86_tune_string = ptr->x_ix86_tune_string;
4444 opts->x_ix86_cmodel = ptr->x_ix86_cmodel;
4445 opts->x_ix86_abi = ptr->x_ix86_abi;
4446 opts->x_ix86_asm_dialect = ptr->x_ix86_asm_dialect;
4447 opts->x_ix86_branch_cost = ptr->x_ix86_branch_cost;
4448 opts->x_ix86_dump_tunes = ptr->x_ix86_dump_tunes;
4449 opts->x_ix86_force_align_arg_pointer = ptr->x_ix86_force_align_arg_pointer;
4450 opts->x_ix86_force_drap = ptr->x_ix86_force_drap;
4451 opts->x_ix86_incoming_stack_boundary_arg = ptr->x_ix86_incoming_stack_boundary_arg;
4452 opts->x_ix86_pmode = ptr->x_ix86_pmode;
4453 opts->x_ix86_preferred_stack_boundary_arg = ptr->x_ix86_preferred_stack_boundary_arg;
4454 opts->x_ix86_recip_name = ptr->x_ix86_recip_name;
4455 opts->x_ix86_regparm = ptr->x_ix86_regparm;
4456 opts->x_ix86_section_threshold = ptr->x_ix86_section_threshold;
4457 opts->x_ix86_sse2avx = ptr->x_ix86_sse2avx;
4458 opts->x_ix86_stack_protector_guard = ptr->x_ix86_stack_protector_guard;
4459 opts->x_ix86_stringop_alg = ptr->x_ix86_stringop_alg;
4460 opts->x_ix86_tls_dialect = ptr->x_ix86_tls_dialect;
4461 opts->x_ix86_tune_ctrl_string = ptr->x_ix86_tune_ctrl_string;
4462 opts->x_ix86_tune_memcpy_strategy = ptr->x_ix86_tune_memcpy_strategy;
4463 opts->x_ix86_tune_memset_strategy = ptr->x_ix86_tune_memset_strategy;
4464 opts->x_ix86_tune_no_default = ptr->x_ix86_tune_no_default;
4465 opts->x_ix86_veclibabi_type = ptr->x_ix86_veclibabi_type;
4466
4467 /* Recreate the arch feature tests if the arch changed */
4468 if (old_arch != ix86_arch)
4469 {
4470 ix86_arch_mask = 1u << ix86_arch;
4471 for (i = 0; i < X86_ARCH_LAST; ++i)
4472 ix86_arch_features[i]
4473 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4474 }
4475
4476 /* Recreate the tune optimization tests */
4477 if (old_tune != ix86_tune)
4478 set_ix86_tune_features (ix86_tune, false);
4479 }
4480
4481 /* Print the current options */
4482
4483 static void
4484 ix86_function_specific_print (FILE *file, int indent,
4485 struct cl_target_option *ptr)
4486 {
4487 char *target_string
4488 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
4489 NULL, NULL, ptr->x_ix86_fpmath, false);
4490
4491 gcc_assert (ptr->arch < PROCESSOR_max);
4492 fprintf (file, "%*sarch = %d (%s)\n",
4493 indent, "",
4494 ptr->arch, processor_target_table[ptr->arch].name);
4495
4496 gcc_assert (ptr->tune < PROCESSOR_max);
4497 fprintf (file, "%*stune = %d (%s)\n",
4498 indent, "",
4499 ptr->tune, processor_target_table[ptr->tune].name);
4500
4501 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4502
4503 if (target_string)
4504 {
4505 fprintf (file, "%*s%s\n", indent, "", target_string);
4506 free (target_string);
4507 }
4508 }
4509
4510 \f
4511 /* Inner function to process the attribute((target(...))), take an argument and
4512 set the current options from the argument. If we have a list, recursively go
4513 over the list. */
4514
4515 static bool
4516 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
4517 struct gcc_options *opts,
4518 struct gcc_options *opts_set,
4519 struct gcc_options *enum_opts_set)
4520 {
4521 char *next_optstr;
4522 bool ret = true;
4523
4524 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4525 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4526 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4527 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4528 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4529
4530 enum ix86_opt_type
4531 {
4532 ix86_opt_unknown,
4533 ix86_opt_yes,
4534 ix86_opt_no,
4535 ix86_opt_str,
4536 ix86_opt_enum,
4537 ix86_opt_isa
4538 };
4539
4540 static const struct
4541 {
4542 const char *string;
4543 size_t len;
4544 enum ix86_opt_type type;
4545 int opt;
4546 int mask;
4547 } attrs[] = {
4548 /* isa options */
4549 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4550 IX86_ATTR_ISA ("abm", OPT_mabm),
4551 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4552 IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
4553 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
4554 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4555 IX86_ATTR_ISA ("aes", OPT_maes),
4556 IX86_ATTR_ISA ("sha", OPT_msha),
4557 IX86_ATTR_ISA ("avx", OPT_mavx),
4558 IX86_ATTR_ISA ("avx2", OPT_mavx2),
4559 IX86_ATTR_ISA ("avx512f", OPT_mavx512f),
4560 IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf),
4561 IX86_ATTR_ISA ("avx512er", OPT_mavx512er),
4562 IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd),
4563 IX86_ATTR_ISA ("avx512dq", OPT_mavx512dq),
4564 IX86_ATTR_ISA ("avx512bw", OPT_mavx512bw),
4565 IX86_ATTR_ISA ("avx512vl", OPT_mavx512vl),
4566 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4567 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4568 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4569 IX86_ATTR_ISA ("sse", OPT_msse),
4570 IX86_ATTR_ISA ("sse2", OPT_msse2),
4571 IX86_ATTR_ISA ("sse3", OPT_msse3),
4572 IX86_ATTR_ISA ("sse4", OPT_msse4),
4573 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4574 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4575 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4576 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4577 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4578 IX86_ATTR_ISA ("fma", OPT_mfma),
4579 IX86_ATTR_ISA ("xop", OPT_mxop),
4580 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4581 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4582 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4583 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4584 IX86_ATTR_ISA ("rtm", OPT_mrtm),
4585 IX86_ATTR_ISA ("hle", OPT_mhle),
4586 IX86_ATTR_ISA ("prfchw", OPT_mprfchw),
4587 IX86_ATTR_ISA ("rdseed", OPT_mrdseed),
4588 IX86_ATTR_ISA ("adx", OPT_madx),
4589 IX86_ATTR_ISA ("fxsr", OPT_mfxsr),
4590 IX86_ATTR_ISA ("xsave", OPT_mxsave),
4591 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt),
4592 IX86_ATTR_ISA ("prefetchwt1", OPT_mprefetchwt1),
4593 IX86_ATTR_ISA ("clflushopt", OPT_mclflushopt),
4594 IX86_ATTR_ISA ("xsavec", OPT_mxsavec),
4595 IX86_ATTR_ISA ("xsaves", OPT_mxsaves),
4596
4597 /* enum options */
4598 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
4599
4600 /* string options */
4601 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4602 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4603
4604 /* flag options */
4605 IX86_ATTR_YES ("cld",
4606 OPT_mcld,
4607 MASK_CLD),
4608
4609 IX86_ATTR_NO ("fancy-math-387",
4610 OPT_mfancy_math_387,
4611 MASK_NO_FANCY_MATH_387),
4612
4613 IX86_ATTR_YES ("ieee-fp",
4614 OPT_mieee_fp,
4615 MASK_IEEE_FP),
4616
4617 IX86_ATTR_YES ("inline-all-stringops",
4618 OPT_minline_all_stringops,
4619 MASK_INLINE_ALL_STRINGOPS),
4620
4621 IX86_ATTR_YES ("inline-stringops-dynamically",
4622 OPT_minline_stringops_dynamically,
4623 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4624
4625 IX86_ATTR_NO ("align-stringops",
4626 OPT_mno_align_stringops,
4627 MASK_NO_ALIGN_STRINGOPS),
4628
4629 IX86_ATTR_YES ("recip",
4630 OPT_mrecip,
4631 MASK_RECIP),
4632
4633 };
4634
4635 /* If this is a list, recurse to get the options. */
4636 if (TREE_CODE (args) == TREE_LIST)
4637 {
4638 bool ret = true;
4639
4640 for (; args; args = TREE_CHAIN (args))
4641 if (TREE_VALUE (args)
4642 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
4643 p_strings, opts, opts_set,
4644 enum_opts_set))
4645 ret = false;
4646
4647 return ret;
4648 }
4649
4650 else if (TREE_CODE (args) != STRING_CST)
4651 {
4652 error ("attribute %<target%> argument not a string");
4653 return false;
4654 }
4655
4656 /* Handle multiple arguments separated by commas. */
4657 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4658
4659 while (next_optstr && *next_optstr != '\0')
4660 {
4661 char *p = next_optstr;
4662 char *orig_p = p;
4663 char *comma = strchr (next_optstr, ',');
4664 const char *opt_string;
4665 size_t len, opt_len;
4666 int opt;
4667 bool opt_set_p;
4668 char ch;
4669 unsigned i;
4670 enum ix86_opt_type type = ix86_opt_unknown;
4671 int mask = 0;
4672
4673 if (comma)
4674 {
4675 *comma = '\0';
4676 len = comma - next_optstr;
4677 next_optstr = comma + 1;
4678 }
4679 else
4680 {
4681 len = strlen (p);
4682 next_optstr = NULL;
4683 }
4684
4685 /* Recognize no-xxx. */
4686 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4687 {
4688 opt_set_p = false;
4689 p += 3;
4690 len -= 3;
4691 }
4692 else
4693 opt_set_p = true;
4694
4695 /* Find the option. */
4696 ch = *p;
4697 opt = N_OPTS;
4698 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4699 {
4700 type = attrs[i].type;
4701 opt_len = attrs[i].len;
4702 if (ch == attrs[i].string[0]
4703 && ((type != ix86_opt_str && type != ix86_opt_enum)
4704 ? len == opt_len
4705 : len > opt_len)
4706 && memcmp (p, attrs[i].string, opt_len) == 0)
4707 {
4708 opt = attrs[i].opt;
4709 mask = attrs[i].mask;
4710 opt_string = attrs[i].string;
4711 break;
4712 }
4713 }
4714
4715 /* Process the option. */
4716 if (opt == N_OPTS)
4717 {
4718 error ("attribute(target(\"%s\")) is unknown", orig_p);
4719 ret = false;
4720 }
4721
4722 else if (type == ix86_opt_isa)
4723 {
4724 struct cl_decoded_option decoded;
4725
4726 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
4727 ix86_handle_option (opts, opts_set,
4728 &decoded, input_location);
4729 }
4730
4731 else if (type == ix86_opt_yes || type == ix86_opt_no)
4732 {
4733 if (type == ix86_opt_no)
4734 opt_set_p = !opt_set_p;
4735
4736 if (opt_set_p)
4737 opts->x_target_flags |= mask;
4738 else
4739 opts->x_target_flags &= ~mask;
4740 }
4741
4742 else if (type == ix86_opt_str)
4743 {
4744 if (p_strings[opt])
4745 {
4746 error ("option(\"%s\") was already specified", opt_string);
4747 ret = false;
4748 }
4749 else
4750 p_strings[opt] = xstrdup (p + opt_len);
4751 }
4752
4753 else if (type == ix86_opt_enum)
4754 {
4755 bool arg_ok;
4756 int value;
4757
4758 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
4759 if (arg_ok)
4760 set_option (opts, enum_opts_set, opt, value,
4761 p + opt_len, DK_UNSPECIFIED, input_location,
4762 global_dc);
4763 else
4764 {
4765 error ("attribute(target(\"%s\")) is unknown", orig_p);
4766 ret = false;
4767 }
4768 }
4769
4770 else
4771 gcc_unreachable ();
4772 }
4773
4774 return ret;
4775 }
4776
4777 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4778
4779 tree
4780 ix86_valid_target_attribute_tree (tree args,
4781 struct gcc_options *opts,
4782 struct gcc_options *opts_set)
4783 {
4784 const char *orig_arch_string = opts->x_ix86_arch_string;
4785 const char *orig_tune_string = opts->x_ix86_tune_string;
4786 enum fpmath_unit orig_fpmath_set = opts_set->x_ix86_fpmath;
4787 int orig_tune_defaulted = ix86_tune_defaulted;
4788 int orig_arch_specified = ix86_arch_specified;
4789 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
4790 tree t = NULL_TREE;
4791 int i;
4792 struct cl_target_option *def
4793 = TREE_TARGET_OPTION (target_option_default_node);
4794 struct gcc_options enum_opts_set;
4795
4796 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
4797
4798 /* Process each of the options on the chain. */
4799 if (! ix86_valid_target_attribute_inner_p (args, option_strings, opts,
4800 opts_set, &enum_opts_set))
4801 return error_mark_node;
4802
4803 /* If the changed options are different from the default, rerun
4804 ix86_option_override_internal, and then save the options away.
4805 The string options are are attribute options, and will be undone
4806 when we copy the save structure. */
4807 if (opts->x_ix86_isa_flags != def->x_ix86_isa_flags
4808 || opts->x_target_flags != def->x_target_flags
4809 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4810 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4811 || enum_opts_set.x_ix86_fpmath)
4812 {
4813 /* If we are using the default tune= or arch=, undo the string assigned,
4814 and use the default. */
4815 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4816 opts->x_ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4817 else if (!orig_arch_specified)
4818 opts->x_ix86_arch_string = NULL;
4819
4820 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4821 opts->x_ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4822 else if (orig_tune_defaulted)
4823 opts->x_ix86_tune_string = NULL;
4824
4825 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4826 if (enum_opts_set.x_ix86_fpmath)
4827 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4828 else if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
4829 && TARGET_SSE_P (opts->x_ix86_isa_flags))
4830 {
4831 opts->x_ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
4832 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4833 }
4834
4835 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4836 ix86_option_override_internal (false, opts, opts_set);
4837
4838 /* Add any builtin functions with the new isa if any. */
4839 ix86_add_new_builtins (opts->x_ix86_isa_flags);
4840
4841 /* Save the current options unless we are validating options for
4842 #pragma. */
4843 t = build_target_option_node (opts);
4844
4845 opts->x_ix86_arch_string = orig_arch_string;
4846 opts->x_ix86_tune_string = orig_tune_string;
4847 opts_set->x_ix86_fpmath = orig_fpmath_set;
4848
4849 /* Free up memory allocated to hold the strings */
4850 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4851 free (option_strings[i]);
4852 }
4853
4854 return t;
4855 }
4856
4857 /* Hook to validate attribute((target("string"))). */
4858
4859 static bool
4860 ix86_valid_target_attribute_p (tree fndecl,
4861 tree ARG_UNUSED (name),
4862 tree args,
4863 int ARG_UNUSED (flags))
4864 {
4865 struct gcc_options func_options;
4866 tree new_target, new_optimize;
4867 bool ret = true;
4868
4869 /* attribute((target("default"))) does nothing, beyond
4870 affecting multi-versioning. */
4871 if (TREE_VALUE (args)
4872 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
4873 && TREE_CHAIN (args) == NULL_TREE
4874 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
4875 return true;
4876
4877 tree old_optimize = build_optimization_node (&global_options);
4878
4879 /* Get the optimization options of the current function. */
4880 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4881
4882 if (!func_optimize)
4883 func_optimize = old_optimize;
4884
4885 /* Init func_options. */
4886 memset (&func_options, 0, sizeof (func_options));
4887 init_options_struct (&func_options, NULL);
4888 lang_hooks.init_options_struct (&func_options);
4889
4890 cl_optimization_restore (&func_options,
4891 TREE_OPTIMIZATION (func_optimize));
4892
4893 /* Initialize func_options to the default before its target options can
4894 be set. */
4895 cl_target_option_restore (&func_options,
4896 TREE_TARGET_OPTION (target_option_default_node));
4897
4898 new_target = ix86_valid_target_attribute_tree (args, &func_options,
4899 &global_options_set);
4900
4901 new_optimize = build_optimization_node (&func_options);
4902
4903 if (new_target == error_mark_node)
4904 ret = false;
4905
4906 else if (fndecl && new_target)
4907 {
4908 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4909
4910 if (old_optimize != new_optimize)
4911 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4912 }
4913
4914 return ret;
4915 }
4916
4917 \f
4918 /* Hook to determine if one function can safely inline another. */
4919
4920 static bool
4921 ix86_can_inline_p (tree caller, tree callee)
4922 {
4923 bool ret = false;
4924 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
4925 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
4926
4927 /* If callee has no option attributes, then it is ok to inline. */
4928 if (!callee_tree)
4929 ret = true;
4930
4931 /* If caller has no option attributes, but callee does then it is not ok to
4932 inline. */
4933 else if (!caller_tree)
4934 ret = false;
4935
4936 else
4937 {
4938 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
4939 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
4940
4941 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4942 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4943 function. */
4944 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
4945 != callee_opts->x_ix86_isa_flags)
4946 ret = false;
4947
4948 /* See if we have the same non-isa options. */
4949 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
4950 ret = false;
4951
4952 /* See if arch, tune, etc. are the same. */
4953 else if (caller_opts->arch != callee_opts->arch)
4954 ret = false;
4955
4956 else if (caller_opts->tune != callee_opts->tune)
4957 ret = false;
4958
4959 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
4960 ret = false;
4961
4962 else if (caller_opts->branch_cost != callee_opts->branch_cost)
4963 ret = false;
4964
4965 else
4966 ret = true;
4967 }
4968
4969 return ret;
4970 }
4971
4972 \f
4973 /* Remember the last target of ix86_set_current_function. */
4974 static GTY(()) tree ix86_previous_fndecl;
4975
4976 /* Invalidate ix86_previous_fndecl cache. */
4977 void
4978 ix86_reset_previous_fndecl (void)
4979 {
4980 ix86_previous_fndecl = NULL_TREE;
4981 }
4982
4983 /* Establish appropriate back-end context for processing the function
4984 FNDECL. The argument might be NULL to indicate processing at top
4985 level, outside of any function scope. */
4986 static void
4987 ix86_set_current_function (tree fndecl)
4988 {
4989 /* Only change the context if the function changes. This hook is called
4990 several times in the course of compiling a function, and we don't want to
4991 slow things down too much or call target_reinit when it isn't safe. */
4992 if (fndecl && fndecl != ix86_previous_fndecl)
4993 {
4994 tree old_tree = (ix86_previous_fndecl
4995 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
4996 : NULL_TREE);
4997
4998 tree new_tree = (fndecl
4999 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
5000 : NULL_TREE);
5001
5002 ix86_previous_fndecl = fndecl;
5003 if (old_tree == new_tree)
5004 ;
5005
5006 else if (new_tree)
5007 {
5008 cl_target_option_restore (&global_options,
5009 TREE_TARGET_OPTION (new_tree));
5010 if (TREE_TARGET_GLOBALS (new_tree))
5011 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5012 else
5013 TREE_TARGET_GLOBALS (new_tree)
5014 = save_target_globals_default_opts ();
5015 }
5016
5017 else if (old_tree)
5018 {
5019 new_tree = target_option_current_node;
5020 cl_target_option_restore (&global_options,
5021 TREE_TARGET_OPTION (new_tree));
5022 if (TREE_TARGET_GLOBALS (new_tree))
5023 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5024 else if (new_tree == target_option_default_node)
5025 restore_target_globals (&default_target_globals);
5026 else
5027 TREE_TARGET_GLOBALS (new_tree)
5028 = save_target_globals_default_opts ();
5029 }
5030 }
5031 }
5032
5033 \f
5034 /* Return true if this goes in large data/bss. */
5035
5036 static bool
5037 ix86_in_large_data_p (tree exp)
5038 {
5039 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
5040 return false;
5041
5042 /* Functions are never large data. */
5043 if (TREE_CODE (exp) == FUNCTION_DECL)
5044 return false;
5045
5046 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
5047 {
5048 const char *section = DECL_SECTION_NAME (exp);
5049 if (strcmp (section, ".ldata") == 0
5050 || strcmp (section, ".lbss") == 0)
5051 return true;
5052 return false;
5053 }
5054 else
5055 {
5056 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
5057
5058 /* If this is an incomplete type with size 0, then we can't put it
5059 in data because it might be too big when completed. Also,
5060 int_size_in_bytes returns -1 if size can vary or is larger than
5061 an integer in which case also it is safer to assume that it goes in
5062 large data. */
5063 if (size <= 0 || size > ix86_section_threshold)
5064 return true;
5065 }
5066
5067 return false;
5068 }
5069
5070 /* Switch to the appropriate section for output of DECL.
5071 DECL is either a `VAR_DECL' node or a constant of some sort.
5072 RELOC indicates whether forming the initial value of DECL requires
5073 link-time relocations. */
5074
5075 ATTRIBUTE_UNUSED static section *
5076 x86_64_elf_select_section (tree decl, int reloc,
5077 unsigned HOST_WIDE_INT align)
5078 {
5079 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5080 && ix86_in_large_data_p (decl))
5081 {
5082 const char *sname = NULL;
5083 unsigned int flags = SECTION_WRITE;
5084 switch (categorize_decl_for_section (decl, reloc))
5085 {
5086 case SECCAT_DATA:
5087 sname = ".ldata";
5088 break;
5089 case SECCAT_DATA_REL:
5090 sname = ".ldata.rel";
5091 break;
5092 case SECCAT_DATA_REL_LOCAL:
5093 sname = ".ldata.rel.local";
5094 break;
5095 case SECCAT_DATA_REL_RO:
5096 sname = ".ldata.rel.ro";
5097 break;
5098 case SECCAT_DATA_REL_RO_LOCAL:
5099 sname = ".ldata.rel.ro.local";
5100 break;
5101 case SECCAT_BSS:
5102 sname = ".lbss";
5103 flags |= SECTION_BSS;
5104 break;
5105 case SECCAT_RODATA:
5106 case SECCAT_RODATA_MERGE_STR:
5107 case SECCAT_RODATA_MERGE_STR_INIT:
5108 case SECCAT_RODATA_MERGE_CONST:
5109 sname = ".lrodata";
5110 flags = 0;
5111 break;
5112 case SECCAT_SRODATA:
5113 case SECCAT_SDATA:
5114 case SECCAT_SBSS:
5115 gcc_unreachable ();
5116 case SECCAT_TEXT:
5117 case SECCAT_TDATA:
5118 case SECCAT_TBSS:
5119 /* We don't split these for medium model. Place them into
5120 default sections and hope for best. */
5121 break;
5122 }
5123 if (sname)
5124 {
5125 /* We might get called with string constants, but get_named_section
5126 doesn't like them as they are not DECLs. Also, we need to set
5127 flags in that case. */
5128 if (!DECL_P (decl))
5129 return get_section (sname, flags, NULL);
5130 return get_named_section (decl, sname, reloc);
5131 }
5132 }
5133 return default_elf_select_section (decl, reloc, align);
5134 }
5135
5136 /* Select a set of attributes for section NAME based on the properties
5137 of DECL and whether or not RELOC indicates that DECL's initializer
5138 might contain runtime relocations. */
5139
5140 static unsigned int ATTRIBUTE_UNUSED
5141 x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
5142 {
5143 unsigned int flags = default_section_type_flags (decl, name, reloc);
5144
5145 if (decl == NULL_TREE
5146 && (strcmp (name, ".ldata.rel.ro") == 0
5147 || strcmp (name, ".ldata.rel.ro.local") == 0))
5148 flags |= SECTION_RELRO;
5149
5150 if (strcmp (name, ".lbss") == 0
5151 || strncmp (name, ".lbss.", 5) == 0
5152 || strncmp (name, ".gnu.linkonce.lb.", 16) == 0)
5153 flags |= SECTION_BSS;
5154
5155 return flags;
5156 }
5157
5158 /* Build up a unique section name, expressed as a
5159 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
5160 RELOC indicates whether the initial value of EXP requires
5161 link-time relocations. */
5162
5163 static void ATTRIBUTE_UNUSED
5164 x86_64_elf_unique_section (tree decl, int reloc)
5165 {
5166 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5167 && ix86_in_large_data_p (decl))
5168 {
5169 const char *prefix = NULL;
5170 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
5171 bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
5172
5173 switch (categorize_decl_for_section (decl, reloc))
5174 {
5175 case SECCAT_DATA:
5176 case SECCAT_DATA_REL:
5177 case SECCAT_DATA_REL_LOCAL:
5178 case SECCAT_DATA_REL_RO:
5179 case SECCAT_DATA_REL_RO_LOCAL:
5180 prefix = one_only ? ".ld" : ".ldata";
5181 break;
5182 case SECCAT_BSS:
5183 prefix = one_only ? ".lb" : ".lbss";
5184 break;
5185 case SECCAT_RODATA:
5186 case SECCAT_RODATA_MERGE_STR:
5187 case SECCAT_RODATA_MERGE_STR_INIT:
5188 case SECCAT_RODATA_MERGE_CONST:
5189 prefix = one_only ? ".lr" : ".lrodata";
5190 break;
5191 case SECCAT_SRODATA:
5192 case SECCAT_SDATA:
5193 case SECCAT_SBSS:
5194 gcc_unreachable ();
5195 case SECCAT_TEXT:
5196 case SECCAT_TDATA:
5197 case SECCAT_TBSS:
5198 /* We don't split these for medium model. Place them into
5199 default sections and hope for best. */
5200 break;
5201 }
5202 if (prefix)
5203 {
5204 const char *name, *linkonce;
5205 char *string;
5206
5207 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
5208 name = targetm.strip_name_encoding (name);
5209
5210 /* If we're using one_only, then there needs to be a .gnu.linkonce
5211 prefix to the section name. */
5212 linkonce = one_only ? ".gnu.linkonce" : "";
5213
5214 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
5215
5216 set_decl_section_name (decl, string);
5217 return;
5218 }
5219 }
5220 default_unique_section (decl, reloc);
5221 }
5222
5223 #ifdef COMMON_ASM_OP
5224 /* This says how to output assembler code to declare an
5225 uninitialized external linkage data object.
5226
5227 For medium model x86-64 we need to use .largecomm opcode for
5228 large objects. */
5229 void
5230 x86_elf_aligned_common (FILE *file,
5231 const char *name, unsigned HOST_WIDE_INT size,
5232 int align)
5233 {
5234 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5235 && size > (unsigned int)ix86_section_threshold)
5236 fputs (".largecomm\t", file);
5237 else
5238 fputs (COMMON_ASM_OP, file);
5239 assemble_name (file, name);
5240 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
5241 size, align / BITS_PER_UNIT);
5242 }
5243 #endif
5244
5245 /* Utility function for targets to use in implementing
5246 ASM_OUTPUT_ALIGNED_BSS. */
5247
5248 void
5249 x86_output_aligned_bss (FILE *file, tree decl, const char *name,
5250 unsigned HOST_WIDE_INT size, int align)
5251 {
5252 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5253 && size > (unsigned int)ix86_section_threshold)
5254 switch_to_section (get_named_section (decl, ".lbss", 0));
5255 else
5256 switch_to_section (bss_section);
5257 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
5258 #ifdef ASM_DECLARE_OBJECT_NAME
5259 last_assemble_variable_decl = decl;
5260 ASM_DECLARE_OBJECT_NAME (file, name, decl);
5261 #else
5262 /* Standard thing is just output label for the object. */
5263 ASM_OUTPUT_LABEL (file, name);
5264 #endif /* ASM_DECLARE_OBJECT_NAME */
5265 ASM_OUTPUT_SKIP (file, size ? size : 1);
5266 }
5267 \f
5268 /* Decide whether we must probe the stack before any space allocation
5269 on this target. It's essentially TARGET_STACK_PROBE except when
5270 -fstack-check causes the stack to be already probed differently. */
5271
5272 bool
5273 ix86_target_stack_probe (void)
5274 {
5275 /* Do not probe the stack twice if static stack checking is enabled. */
5276 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5277 return false;
5278
5279 return TARGET_STACK_PROBE;
5280 }
5281 \f
5282 /* Decide whether we can make a sibling call to a function. DECL is the
5283 declaration of the function being targeted by the call and EXP is the
5284 CALL_EXPR representing the call. */
5285
5286 static bool
5287 ix86_function_ok_for_sibcall (tree decl, tree exp)
5288 {
5289 tree type, decl_or_type;
5290 rtx a, b;
5291
5292 /* If we are generating position-independent code, we cannot sibcall
5293 optimize any indirect call, or a direct call to a global function,
5294 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
5295 if (!TARGET_MACHO
5296 && !TARGET_64BIT
5297 && flag_pic
5298 && (!decl || !targetm.binds_local_p (decl)))
5299 return false;
5300
5301 /* If we need to align the outgoing stack, then sibcalling would
5302 unalign the stack, which may break the called function. */
5303 if (ix86_minimum_incoming_stack_boundary (true)
5304 < PREFERRED_STACK_BOUNDARY)
5305 return false;
5306
5307 if (decl)
5308 {
5309 decl_or_type = decl;
5310 type = TREE_TYPE (decl);
5311 }
5312 else
5313 {
5314 /* We're looking at the CALL_EXPR, we need the type of the function. */
5315 type = CALL_EXPR_FN (exp); /* pointer expression */
5316 type = TREE_TYPE (type); /* pointer type */
5317 type = TREE_TYPE (type); /* function type */
5318 decl_or_type = type;
5319 }
5320
5321 /* Check that the return value locations are the same. Like
5322 if we are returning floats on the 80387 register stack, we cannot
5323 make a sibcall from a function that doesn't return a float to a
5324 function that does or, conversely, from a function that does return
5325 a float to a function that doesn't; the necessary stack adjustment
5326 would not be executed. This is also the place we notice
5327 differences in the return value ABI. Note that it is ok for one
5328 of the functions to have void return type as long as the return
5329 value of the other is passed in a register. */
5330 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
5331 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5332 cfun->decl, false);
5333 if (STACK_REG_P (a) || STACK_REG_P (b))
5334 {
5335 if (!rtx_equal_p (a, b))
5336 return false;
5337 }
5338 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5339 ;
5340 else if (!rtx_equal_p (a, b))
5341 return false;
5342
5343 if (TARGET_64BIT)
5344 {
5345 /* The SYSV ABI has more call-clobbered registers;
5346 disallow sibcalls from MS to SYSV. */
5347 if (cfun->machine->call_abi == MS_ABI
5348 && ix86_function_type_abi (type) == SYSV_ABI)
5349 return false;
5350 }
5351 else
5352 {
5353 /* If this call is indirect, we'll need to be able to use a
5354 call-clobbered register for the address of the target function.
5355 Make sure that all such registers are not used for passing
5356 parameters. Note that DLLIMPORT functions are indirect. */
5357 if (!decl
5358 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
5359 {
5360 if (ix86_function_regparm (type, NULL) >= 3)
5361 {
5362 /* ??? Need to count the actual number of registers to be used,
5363 not the possible number of registers. Fix later. */
5364 return false;
5365 }
5366 }
5367 }
5368
5369 /* Otherwise okay. That also includes certain types of indirect calls. */
5370 return true;
5371 }
5372
5373 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5374 and "sseregparm" calling convention attributes;
5375 arguments as in struct attribute_spec.handler. */
5376
5377 static tree
5378 ix86_handle_cconv_attribute (tree *node, tree name,
5379 tree args,
5380 int,
5381 bool *no_add_attrs)
5382 {
5383 if (TREE_CODE (*node) != FUNCTION_TYPE
5384 && TREE_CODE (*node) != METHOD_TYPE
5385 && TREE_CODE (*node) != FIELD_DECL
5386 && TREE_CODE (*node) != TYPE_DECL)
5387 {
5388 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5389 name);
5390 *no_add_attrs = true;
5391 return NULL_TREE;
5392 }
5393
5394 /* Can combine regparm with all attributes but fastcall, and thiscall. */
5395 if (is_attribute_p ("regparm", name))
5396 {
5397 tree cst;
5398
5399 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5400 {
5401 error ("fastcall and regparm attributes are not compatible");
5402 }
5403
5404 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5405 {
5406 error ("regparam and thiscall attributes are not compatible");
5407 }
5408
5409 cst = TREE_VALUE (args);
5410 if (TREE_CODE (cst) != INTEGER_CST)
5411 {
5412 warning (OPT_Wattributes,
5413 "%qE attribute requires an integer constant argument",
5414 name);
5415 *no_add_attrs = true;
5416 }
5417 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
5418 {
5419 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
5420 name, REGPARM_MAX);
5421 *no_add_attrs = true;
5422 }
5423
5424 return NULL_TREE;
5425 }
5426
5427 if (TARGET_64BIT)
5428 {
5429 /* Do not warn when emulating the MS ABI. */
5430 if ((TREE_CODE (*node) != FUNCTION_TYPE
5431 && TREE_CODE (*node) != METHOD_TYPE)
5432 || ix86_function_type_abi (*node) != MS_ABI)
5433 warning (OPT_Wattributes, "%qE attribute ignored",
5434 name);
5435 *no_add_attrs = true;
5436 return NULL_TREE;
5437 }
5438
5439 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5440 if (is_attribute_p ("fastcall", name))
5441 {
5442 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5443 {
5444 error ("fastcall and cdecl attributes are not compatible");
5445 }
5446 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5447 {
5448 error ("fastcall and stdcall attributes are not compatible");
5449 }
5450 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
5451 {
5452 error ("fastcall and regparm attributes are not compatible");
5453 }
5454 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5455 {
5456 error ("fastcall and thiscall attributes are not compatible");
5457 }
5458 }
5459
5460 /* Can combine stdcall with fastcall (redundant), regparm and
5461 sseregparm. */
5462 else if (is_attribute_p ("stdcall", name))
5463 {
5464 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5465 {
5466 error ("stdcall and cdecl attributes are not compatible");
5467 }
5468 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5469 {
5470 error ("stdcall and fastcall attributes are not compatible");
5471 }
5472 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5473 {
5474 error ("stdcall and thiscall attributes are not compatible");
5475 }
5476 }
5477
5478 /* Can combine cdecl with regparm and sseregparm. */
5479 else if (is_attribute_p ("cdecl", name))
5480 {
5481 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5482 {
5483 error ("stdcall and cdecl attributes are not compatible");
5484 }
5485 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5486 {
5487 error ("fastcall and cdecl attributes are not compatible");
5488 }
5489 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5490 {
5491 error ("cdecl and thiscall attributes are not compatible");
5492 }
5493 }
5494 else if (is_attribute_p ("thiscall", name))
5495 {
5496 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
5497 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
5498 name);
5499 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5500 {
5501 error ("stdcall and thiscall attributes are not compatible");
5502 }
5503 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5504 {
5505 error ("fastcall and thiscall attributes are not compatible");
5506 }
5507 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5508 {
5509 error ("cdecl and thiscall attributes are not compatible");
5510 }
5511 }
5512
5513 /* Can combine sseregparm with all attributes. */
5514
5515 return NULL_TREE;
5516 }
5517
5518 /* The transactional memory builtins are implicitly regparm or fastcall
5519 depending on the ABI. Override the generic do-nothing attribute that
5520 these builtins were declared with, and replace it with one of the two
5521 attributes that we expect elsewhere. */
5522
5523 static tree
5524 ix86_handle_tm_regparm_attribute (tree *node, tree, tree,
5525 int flags, bool *no_add_attrs)
5526 {
5527 tree alt;
5528
5529 /* In no case do we want to add the placeholder attribute. */
5530 *no_add_attrs = true;
5531
5532 /* The 64-bit ABI is unchanged for transactional memory. */
5533 if (TARGET_64BIT)
5534 return NULL_TREE;
5535
5536 /* ??? Is there a better way to validate 32-bit windows? We have
5537 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5538 if (CHECK_STACK_LIMIT > 0)
5539 alt = tree_cons (get_identifier ("fastcall"), NULL, NULL);
5540 else
5541 {
5542 alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL);
5543 alt = tree_cons (get_identifier ("regparm"), alt, NULL);
5544 }
5545 decl_attributes (node, alt, flags);
5546
5547 return NULL_TREE;
5548 }
5549
5550 /* This function determines from TYPE the calling-convention. */
5551
5552 unsigned int
5553 ix86_get_callcvt (const_tree type)
5554 {
5555 unsigned int ret = 0;
5556 bool is_stdarg;
5557 tree attrs;
5558
5559 if (TARGET_64BIT)
5560 return IX86_CALLCVT_CDECL;
5561
5562 attrs = TYPE_ATTRIBUTES (type);
5563 if (attrs != NULL_TREE)
5564 {
5565 if (lookup_attribute ("cdecl", attrs))
5566 ret |= IX86_CALLCVT_CDECL;
5567 else if (lookup_attribute ("stdcall", attrs))
5568 ret |= IX86_CALLCVT_STDCALL;
5569 else if (lookup_attribute ("fastcall", attrs))
5570 ret |= IX86_CALLCVT_FASTCALL;
5571 else if (lookup_attribute ("thiscall", attrs))
5572 ret |= IX86_CALLCVT_THISCALL;
5573
5574 /* Regparam isn't allowed for thiscall and fastcall. */
5575 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
5576 {
5577 if (lookup_attribute ("regparm", attrs))
5578 ret |= IX86_CALLCVT_REGPARM;
5579 if (lookup_attribute ("sseregparm", attrs))
5580 ret |= IX86_CALLCVT_SSEREGPARM;
5581 }
5582
5583 if (IX86_BASE_CALLCVT(ret) != 0)
5584 return ret;
5585 }
5586
5587 is_stdarg = stdarg_p (type);
5588 if (TARGET_RTD && !is_stdarg)
5589 return IX86_CALLCVT_STDCALL | ret;
5590
5591 if (ret != 0
5592 || is_stdarg
5593 || TREE_CODE (type) != METHOD_TYPE
5594 || ix86_function_type_abi (type) != MS_ABI)
5595 return IX86_CALLCVT_CDECL | ret;
5596
5597 return IX86_CALLCVT_THISCALL;
5598 }
5599
5600 /* Return 0 if the attributes for two types are incompatible, 1 if they
5601 are compatible, and 2 if they are nearly compatible (which causes a
5602 warning to be generated). */
5603
5604 static int
5605 ix86_comp_type_attributes (const_tree type1, const_tree type2)
5606 {
5607 unsigned int ccvt1, ccvt2;
5608
5609 if (TREE_CODE (type1) != FUNCTION_TYPE
5610 && TREE_CODE (type1) != METHOD_TYPE)
5611 return 1;
5612
5613 ccvt1 = ix86_get_callcvt (type1);
5614 ccvt2 = ix86_get_callcvt (type2);
5615 if (ccvt1 != ccvt2)
5616 return 0;
5617 if (ix86_function_regparm (type1, NULL)
5618 != ix86_function_regparm (type2, NULL))
5619 return 0;
5620
5621 return 1;
5622 }
5623 \f
5624 /* Return the regparm value for a function with the indicated TYPE and DECL.
5625 DECL may be NULL when calling function indirectly
5626 or considering a libcall. */
5627
5628 static int
5629 ix86_function_regparm (const_tree type, const_tree decl)
5630 {
5631 tree attr;
5632 int regparm;
5633 unsigned int ccvt;
5634
5635 if (TARGET_64BIT)
5636 return (ix86_function_type_abi (type) == SYSV_ABI
5637 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5638 ccvt = ix86_get_callcvt (type);
5639 regparm = ix86_regparm;
5640
5641 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
5642 {
5643 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5644 if (attr)
5645 {
5646 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5647 return regparm;
5648 }
5649 }
5650 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5651 return 2;
5652 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5653 return 1;
5654
5655 /* Use register calling convention for local functions when possible. */
5656 if (decl
5657 && TREE_CODE (decl) == FUNCTION_DECL
5658 /* Caller and callee must agree on the calling convention, so
5659 checking here just optimize means that with
5660 __attribute__((optimize (...))) caller could use regparm convention
5661 and callee not, or vice versa. Instead look at whether the callee
5662 is optimized or not. */
5663 && opt_for_fn (decl, optimize)
5664 && !(profile_flag && !flag_fentry))
5665 {
5666 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5667 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE (decl));
5668 if (i && i->local && i->can_change_signature)
5669 {
5670 int local_regparm, globals = 0, regno;
5671
5672 /* Make sure no regparm register is taken by a
5673 fixed register variable. */
5674 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
5675 if (fixed_regs[local_regparm])
5676 break;
5677
5678 /* We don't want to use regparm(3) for nested functions as
5679 these use a static chain pointer in the third argument. */
5680 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
5681 local_regparm = 2;
5682
5683 /* In 32-bit mode save a register for the split stack. */
5684 if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack)
5685 local_regparm = 2;
5686
5687 /* Each fixed register usage increases register pressure,
5688 so less registers should be used for argument passing.
5689 This functionality can be overriden by an explicit
5690 regparm value. */
5691 for (regno = AX_REG; regno <= DI_REG; regno++)
5692 if (fixed_regs[regno])
5693 globals++;
5694
5695 local_regparm
5696 = globals < local_regparm ? local_regparm - globals : 0;
5697
5698 if (local_regparm > regparm)
5699 regparm = local_regparm;
5700 }
5701 }
5702
5703 return regparm;
5704 }
5705
5706 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5707 DFmode (2) arguments in SSE registers for a function with the
5708 indicated TYPE and DECL. DECL may be NULL when calling function
5709 indirectly or considering a libcall. Otherwise return 0. */
5710
5711 static int
5712 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5713 {
5714 gcc_assert (!TARGET_64BIT);
5715
5716 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5717 by the sseregparm attribute. */
5718 if (TARGET_SSEREGPARM
5719 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5720 {
5721 if (!TARGET_SSE)
5722 {
5723 if (warn)
5724 {
5725 if (decl)
5726 error ("calling %qD with attribute sseregparm without "
5727 "SSE/SSE2 enabled", decl);
5728 else
5729 error ("calling %qT with attribute sseregparm without "
5730 "SSE/SSE2 enabled", type);
5731 }
5732 return 0;
5733 }
5734
5735 return 2;
5736 }
5737
5738 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5739 (and DFmode for SSE2) arguments in SSE registers. */
5740 if (decl && TARGET_SSE_MATH && optimize
5741 && !(profile_flag && !flag_fentry))
5742 {
5743 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5744 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
5745 if (i && i->local && i->can_change_signature)
5746 return TARGET_SSE2 ? 2 : 1;
5747 }
5748
5749 return 0;
5750 }
5751
5752 /* Return true if EAX is live at the start of the function. Used by
5753 ix86_expand_prologue to determine if we need special help before
5754 calling allocate_stack_worker. */
5755
5756 static bool
5757 ix86_eax_live_at_start_p (void)
5758 {
5759 /* Cheat. Don't bother working forward from ix86_function_regparm
5760 to the function type to whether an actual argument is located in
5761 eax. Instead just look at cfg info, which is still close enough
5762 to correct at this point. This gives false positives for broken
5763 functions that might use uninitialized data that happens to be
5764 allocated in eax, but who cares? */
5765 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
5766 }
5767
5768 static bool
5769 ix86_keep_aggregate_return_pointer (tree fntype)
5770 {
5771 tree attr;
5772
5773 if (!TARGET_64BIT)
5774 {
5775 attr = lookup_attribute ("callee_pop_aggregate_return",
5776 TYPE_ATTRIBUTES (fntype));
5777 if (attr)
5778 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
5779
5780 /* For 32-bit MS-ABI the default is to keep aggregate
5781 return pointer. */
5782 if (ix86_function_type_abi (fntype) == MS_ABI)
5783 return true;
5784 }
5785 return KEEP_AGGREGATE_RETURN_POINTER != 0;
5786 }
5787
5788 /* Value is the number of bytes of arguments automatically
5789 popped when returning from a subroutine call.
5790 FUNDECL is the declaration node of the function (as a tree),
5791 FUNTYPE is the data type of the function (as a tree),
5792 or for a library call it is an identifier node for the subroutine name.
5793 SIZE is the number of bytes of arguments passed on the stack.
5794
5795 On the 80386, the RTD insn may be used to pop them if the number
5796 of args is fixed, but if the number is variable then the caller
5797 must pop them all. RTD can't be used for library calls now
5798 because the library is compiled with the Unix compiler.
5799 Use of RTD is a selectable option, since it is incompatible with
5800 standard Unix calling sequences. If the option is not selected,
5801 the caller must always pop the args.
5802
5803 The attribute stdcall is equivalent to RTD on a per module basis. */
5804
5805 static int
5806 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5807 {
5808 unsigned int ccvt;
5809
5810 /* None of the 64-bit ABIs pop arguments. */
5811 if (TARGET_64BIT)
5812 return 0;
5813
5814 ccvt = ix86_get_callcvt (funtype);
5815
5816 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
5817 | IX86_CALLCVT_THISCALL)) != 0
5818 && ! stdarg_p (funtype))
5819 return size;
5820
5821 /* Lose any fake structure return argument if it is passed on the stack. */
5822 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5823 && !ix86_keep_aggregate_return_pointer (funtype))
5824 {
5825 int nregs = ix86_function_regparm (funtype, fundecl);
5826 if (nregs == 0)
5827 return GET_MODE_SIZE (Pmode);
5828 }
5829
5830 return 0;
5831 }
5832
5833 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
5834
5835 static bool
5836 ix86_legitimate_combined_insn (rtx insn)
5837 {
5838 /* Check operand constraints in case hard registers were propagated
5839 into insn pattern. This check prevents combine pass from
5840 generating insn patterns with invalid hard register operands.
5841 These invalid insns can eventually confuse reload to error out
5842 with a spill failure. See also PRs 46829 and 46843. */
5843 if ((INSN_CODE (insn) = recog (PATTERN (insn), insn, 0)) >= 0)
5844 {
5845 int i;
5846
5847 extract_insn (insn);
5848 preprocess_constraints (insn);
5849
5850 int n_operands = recog_data.n_operands;
5851 int n_alternatives = recog_data.n_alternatives;
5852 for (i = 0; i < n_operands; i++)
5853 {
5854 rtx op = recog_data.operand[i];
5855 enum machine_mode mode = GET_MODE (op);
5856 const operand_alternative *op_alt;
5857 int offset = 0;
5858 bool win;
5859 int j;
5860
5861 /* For pre-AVX disallow unaligned loads/stores where the
5862 instructions don't support it. */
5863 if (!TARGET_AVX
5864 && VECTOR_MODE_P (GET_MODE (op))
5865 && misaligned_operand (op, GET_MODE (op)))
5866 {
5867 int min_align = get_attr_ssememalign (insn);
5868 if (min_align == 0)
5869 return false;
5870 }
5871
5872 /* A unary operator may be accepted by the predicate, but it
5873 is irrelevant for matching constraints. */
5874 if (UNARY_P (op))
5875 op = XEXP (op, 0);
5876
5877 if (GET_CODE (op) == SUBREG)
5878 {
5879 if (REG_P (SUBREG_REG (op))
5880 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
5881 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
5882 GET_MODE (SUBREG_REG (op)),
5883 SUBREG_BYTE (op),
5884 GET_MODE (op));
5885 op = SUBREG_REG (op);
5886 }
5887
5888 if (!(REG_P (op) && HARD_REGISTER_P (op)))
5889 continue;
5890
5891 op_alt = recog_op_alt;
5892
5893 /* Operand has no constraints, anything is OK. */
5894 win = !n_alternatives;
5895
5896 alternative_mask enabled = recog_data.enabled_alternatives;
5897 for (j = 0; j < n_alternatives; j++, op_alt += n_operands)
5898 {
5899 if (!TEST_BIT (enabled, j))
5900 continue;
5901 if (op_alt[i].anything_ok
5902 || (op_alt[i].matches != -1
5903 && operands_match_p
5904 (recog_data.operand[i],
5905 recog_data.operand[op_alt[i].matches]))
5906 || reg_fits_class_p (op, op_alt[i].cl, offset, mode))
5907 {
5908 win = true;
5909 break;
5910 }
5911 }
5912
5913 if (!win)
5914 return false;
5915 }
5916 }
5917
5918 return true;
5919 }
5920 \f
5921 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
5922
5923 static unsigned HOST_WIDE_INT
5924 ix86_asan_shadow_offset (void)
5925 {
5926 return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44)
5927 : HOST_WIDE_INT_C (0x7fff8000))
5928 : (HOST_WIDE_INT_1 << 29);
5929 }
5930 \f
5931 /* Argument support functions. */
5932
5933 /* Return true when register may be used to pass function parameters. */
5934 bool
5935 ix86_function_arg_regno_p (int regno)
5936 {
5937 int i;
5938 const int *parm_regs;
5939
5940 if (!TARGET_64BIT)
5941 {
5942 if (TARGET_MACHO)
5943 return (regno < REGPARM_MAX
5944 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
5945 else
5946 return (regno < REGPARM_MAX
5947 || (TARGET_MMX && MMX_REGNO_P (regno)
5948 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
5949 || (TARGET_SSE && SSE_REGNO_P (regno)
5950 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
5951 }
5952
5953 if (TARGET_SSE && SSE_REGNO_P (regno)
5954 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
5955 return true;
5956
5957 /* TODO: The function should depend on current function ABI but
5958 builtins.c would need updating then. Therefore we use the
5959 default ABI. */
5960
5961 /* RAX is used as hidden argument to va_arg functions. */
5962 if (ix86_abi == SYSV_ABI && regno == AX_REG)
5963 return true;
5964
5965 if (ix86_abi == MS_ABI)
5966 parm_regs = x86_64_ms_abi_int_parameter_registers;
5967 else
5968 parm_regs = x86_64_int_parameter_registers;
5969 for (i = 0; i < (ix86_abi == MS_ABI
5970 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
5971 if (regno == parm_regs[i])
5972 return true;
5973 return false;
5974 }
5975
5976 /* Return if we do not know how to pass TYPE solely in registers. */
5977
5978 static bool
5979 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
5980 {
5981 if (must_pass_in_stack_var_size_or_pad (mode, type))
5982 return true;
5983
5984 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5985 The layout_type routine is crafty and tries to trick us into passing
5986 currently unsupported vector types on the stack by using TImode. */
5987 return (!TARGET_64BIT && mode == TImode
5988 && type && TREE_CODE (type) != VECTOR_TYPE);
5989 }
5990
5991 /* It returns the size, in bytes, of the area reserved for arguments passed
5992 in registers for the function represented by fndecl dependent to the used
5993 abi format. */
5994 int
5995 ix86_reg_parm_stack_space (const_tree fndecl)
5996 {
5997 enum calling_abi call_abi = SYSV_ABI;
5998 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
5999 call_abi = ix86_function_abi (fndecl);
6000 else
6001 call_abi = ix86_function_type_abi (fndecl);
6002 if (TARGET_64BIT && call_abi == MS_ABI)
6003 return 32;
6004 return 0;
6005 }
6006
6007 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
6008 call abi used. */
6009 enum calling_abi
6010 ix86_function_type_abi (const_tree fntype)
6011 {
6012 if (fntype != NULL_TREE && TYPE_ATTRIBUTES (fntype) != NULL_TREE)
6013 {
6014 enum calling_abi abi = ix86_abi;
6015 if (abi == SYSV_ABI)
6016 {
6017 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
6018 abi = MS_ABI;
6019 }
6020 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
6021 abi = SYSV_ABI;
6022 return abi;
6023 }
6024 return ix86_abi;
6025 }
6026
6027 /* We add this as a workaround in order to use libc_has_function
6028 hook in i386.md. */
6029 bool
6030 ix86_libc_has_function (enum function_class fn_class)
6031 {
6032 return targetm.libc_has_function (fn_class);
6033 }
6034
6035 static bool
6036 ix86_function_ms_hook_prologue (const_tree fn)
6037 {
6038 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
6039 {
6040 if (decl_function_context (fn) != NULL_TREE)
6041 error_at (DECL_SOURCE_LOCATION (fn),
6042 "ms_hook_prologue is not compatible with nested function");
6043 else
6044 return true;
6045 }
6046 return false;
6047 }
6048
6049 static enum calling_abi
6050 ix86_function_abi (const_tree fndecl)
6051 {
6052 if (! fndecl)
6053 return ix86_abi;
6054 return ix86_function_type_abi (TREE_TYPE (fndecl));
6055 }
6056
6057 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
6058 call abi used. */
6059 enum calling_abi
6060 ix86_cfun_abi (void)
6061 {
6062 if (! cfun)
6063 return ix86_abi;
6064 return cfun->machine->call_abi;
6065 }
6066
6067 /* Write the extra assembler code needed to declare a function properly. */
6068
6069 void
6070 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
6071 tree decl)
6072 {
6073 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
6074
6075 if (is_ms_hook)
6076 {
6077 int i, filler_count = (TARGET_64BIT ? 32 : 16);
6078 unsigned int filler_cc = 0xcccccccc;
6079
6080 for (i = 0; i < filler_count; i += 4)
6081 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
6082 }
6083
6084 #ifdef SUBTARGET_ASM_UNWIND_INIT
6085 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
6086 #endif
6087
6088 ASM_OUTPUT_LABEL (asm_out_file, fname);
6089
6090 /* Output magic byte marker, if hot-patch attribute is set. */
6091 if (is_ms_hook)
6092 {
6093 if (TARGET_64BIT)
6094 {
6095 /* leaq [%rsp + 0], %rsp */
6096 asm_fprintf (asm_out_file, ASM_BYTE
6097 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
6098 }
6099 else
6100 {
6101 /* movl.s %edi, %edi
6102 push %ebp
6103 movl.s %esp, %ebp */
6104 asm_fprintf (asm_out_file, ASM_BYTE
6105 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
6106 }
6107 }
6108 }
6109
6110 /* regclass.c */
6111 extern void init_regs (void);
6112
6113 /* Implementation of call abi switching target hook. Specific to FNDECL
6114 the specific call register sets are set. See also
6115 ix86_conditional_register_usage for more details. */
6116 void
6117 ix86_call_abi_override (const_tree fndecl)
6118 {
6119 if (fndecl == NULL_TREE)
6120 cfun->machine->call_abi = ix86_abi;
6121 else
6122 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
6123 }
6124
6125 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
6126 expensive re-initialization of init_regs each time we switch function context
6127 since this is needed only during RTL expansion. */
6128 static void
6129 ix86_maybe_switch_abi (void)
6130 {
6131 if (TARGET_64BIT &&
6132 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
6133 reinit_regs ();
6134 }
6135
6136 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6137 for a call to a function whose data type is FNTYPE.
6138 For a library call, FNTYPE is 0. */
6139
6140 void
6141 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
6142 tree fntype, /* tree ptr for function decl */
6143 rtx libname, /* SYMBOL_REF of library name or 0 */
6144 tree fndecl,
6145 int caller)
6146 {
6147 struct cgraph_local_info *i;
6148
6149 memset (cum, 0, sizeof (*cum));
6150
6151 if (fndecl)
6152 {
6153 i = cgraph_local_info (fndecl);
6154 cum->call_abi = ix86_function_abi (fndecl);
6155 }
6156 else
6157 {
6158 i = NULL;
6159 cum->call_abi = ix86_function_type_abi (fntype);
6160 }
6161
6162 cum->caller = caller;
6163
6164 /* Set up the number of registers to use for passing arguments. */
6165 cum->nregs = ix86_regparm;
6166 if (TARGET_64BIT)
6167 {
6168 cum->nregs = (cum->call_abi == SYSV_ABI
6169 ? X86_64_REGPARM_MAX
6170 : X86_64_MS_REGPARM_MAX);
6171 }
6172 if (TARGET_SSE)
6173 {
6174 cum->sse_nregs = SSE_REGPARM_MAX;
6175 if (TARGET_64BIT)
6176 {
6177 cum->sse_nregs = (cum->call_abi == SYSV_ABI
6178 ? X86_64_SSE_REGPARM_MAX
6179 : X86_64_MS_SSE_REGPARM_MAX);
6180 }
6181 }
6182 if (TARGET_MMX)
6183 cum->mmx_nregs = MMX_REGPARM_MAX;
6184 cum->warn_avx512f = true;
6185 cum->warn_avx = true;
6186 cum->warn_sse = true;
6187 cum->warn_mmx = true;
6188
6189 /* Because type might mismatch in between caller and callee, we need to
6190 use actual type of function for local calls.
6191 FIXME: cgraph_analyze can be told to actually record if function uses
6192 va_start so for local functions maybe_vaarg can be made aggressive
6193 helping K&R code.
6194 FIXME: once typesytem is fixed, we won't need this code anymore. */
6195 if (i && i->local && i->can_change_signature)
6196 fntype = TREE_TYPE (fndecl);
6197 cum->maybe_vaarg = (fntype
6198 ? (!prototype_p (fntype) || stdarg_p (fntype))
6199 : !libname);
6200
6201 if (!TARGET_64BIT)
6202 {
6203 /* If there are variable arguments, then we won't pass anything
6204 in registers in 32-bit mode. */
6205 if (stdarg_p (fntype))
6206 {
6207 cum->nregs = 0;
6208 cum->sse_nregs = 0;
6209 cum->mmx_nregs = 0;
6210 cum->warn_avx512f = false;
6211 cum->warn_avx = false;
6212 cum->warn_sse = false;
6213 cum->warn_mmx = false;
6214 return;
6215 }
6216
6217 /* Use ecx and edx registers if function has fastcall attribute,
6218 else look for regparm information. */
6219 if (fntype)
6220 {
6221 unsigned int ccvt = ix86_get_callcvt (fntype);
6222 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
6223 {
6224 cum->nregs = 1;
6225 cum->fastcall = 1; /* Same first register as in fastcall. */
6226 }
6227 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
6228 {
6229 cum->nregs = 2;
6230 cum->fastcall = 1;
6231 }
6232 else
6233 cum->nregs = ix86_function_regparm (fntype, fndecl);
6234 }
6235
6236 /* Set up the number of SSE registers used for passing SFmode
6237 and DFmode arguments. Warn for mismatching ABI. */
6238 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
6239 }
6240 }
6241
6242 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
6243 But in the case of vector types, it is some vector mode.
6244
6245 When we have only some of our vector isa extensions enabled, then there
6246 are some modes for which vector_mode_supported_p is false. For these
6247 modes, the generic vector support in gcc will choose some non-vector mode
6248 in order to implement the type. By computing the natural mode, we'll
6249 select the proper ABI location for the operand and not depend on whatever
6250 the middle-end decides to do with these vector types.
6251
6252 The midde-end can't deal with the vector types > 16 bytes. In this
6253 case, we return the original mode and warn ABI change if CUM isn't
6254 NULL.
6255
6256 If INT_RETURN is true, warn ABI change if the vector mode isn't
6257 available for function return value. */
6258
6259 static enum machine_mode
6260 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
6261 bool in_return)
6262 {
6263 enum machine_mode mode = TYPE_MODE (type);
6264
6265 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
6266 {
6267 HOST_WIDE_INT size = int_size_in_bytes (type);
6268 if ((size == 8 || size == 16 || size == 32 || size == 64)
6269 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
6270 && TYPE_VECTOR_SUBPARTS (type) > 1)
6271 {
6272 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
6273
6274 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6275 mode = MIN_MODE_VECTOR_FLOAT;
6276 else
6277 mode = MIN_MODE_VECTOR_INT;
6278
6279 /* Get the mode which has this inner mode and number of units. */
6280 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
6281 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
6282 && GET_MODE_INNER (mode) == innermode)
6283 {
6284 if (size == 64 && !TARGET_AVX512F)
6285 {
6286 static bool warnedavx512f;
6287 static bool warnedavx512f_ret;
6288
6289 if (cum && cum->warn_avx512f && !warnedavx512f)
6290 {
6291 if (warning (OPT_Wpsabi, "AVX512F vector argument "
6292 "without AVX512F enabled changes the ABI"))
6293 warnedavx512f = true;
6294 }
6295 else if (in_return && !warnedavx512f_ret)
6296 {
6297 if (warning (OPT_Wpsabi, "AVX512F vector return "
6298 "without AVX512F enabled changes the ABI"))
6299 warnedavx512f_ret = true;
6300 }
6301
6302 return TYPE_MODE (type);
6303 }
6304 else if (size == 32 && !TARGET_AVX)
6305 {
6306 static bool warnedavx;
6307 static bool warnedavx_ret;
6308
6309 if (cum && cum->warn_avx && !warnedavx)
6310 {
6311 if (warning (OPT_Wpsabi, "AVX vector argument "
6312 "without AVX enabled changes the ABI"))
6313 warnedavx = true;
6314 }
6315 else if (in_return && !warnedavx_ret)
6316 {
6317 if (warning (OPT_Wpsabi, "AVX vector return "
6318 "without AVX enabled changes the ABI"))
6319 warnedavx_ret = true;
6320 }
6321
6322 return TYPE_MODE (type);
6323 }
6324 else if (((size == 8 && TARGET_64BIT) || size == 16)
6325 && !TARGET_SSE)
6326 {
6327 static bool warnedsse;
6328 static bool warnedsse_ret;
6329
6330 if (cum && cum->warn_sse && !warnedsse)
6331 {
6332 if (warning (OPT_Wpsabi, "SSE vector argument "
6333 "without SSE enabled changes the ABI"))
6334 warnedsse = true;
6335 }
6336 else if (!TARGET_64BIT && in_return && !warnedsse_ret)
6337 {
6338 if (warning (OPT_Wpsabi, "SSE vector return "
6339 "without SSE enabled changes the ABI"))
6340 warnedsse_ret = true;
6341 }
6342 }
6343 else if ((size == 8 && !TARGET_64BIT) && !TARGET_MMX)
6344 {
6345 static bool warnedmmx;
6346 static bool warnedmmx_ret;
6347
6348 if (cum && cum->warn_mmx && !warnedmmx)
6349 {
6350 if (warning (OPT_Wpsabi, "MMX vector argument "
6351 "without MMX enabled changes the ABI"))
6352 warnedmmx = true;
6353 }
6354 else if (in_return && !warnedmmx_ret)
6355 {
6356 if (warning (OPT_Wpsabi, "MMX vector return "
6357 "without MMX enabled changes the ABI"))
6358 warnedmmx_ret = true;
6359 }
6360 }
6361 return mode;
6362 }
6363
6364 gcc_unreachable ();
6365 }
6366 }
6367
6368 return mode;
6369 }
6370
6371 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
6372 this may not agree with the mode that the type system has chosen for the
6373 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
6374 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
6375
6376 static rtx
6377 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
6378 unsigned int regno)
6379 {
6380 rtx tmp;
6381
6382 if (orig_mode != BLKmode)
6383 tmp = gen_rtx_REG (orig_mode, regno);
6384 else
6385 {
6386 tmp = gen_rtx_REG (mode, regno);
6387 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
6388 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
6389 }
6390
6391 return tmp;
6392 }
6393
6394 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
6395 of this code is to classify each 8bytes of incoming argument by the register
6396 class and assign registers accordingly. */
6397
6398 /* Return the union class of CLASS1 and CLASS2.
6399 See the x86-64 PS ABI for details. */
6400
6401 static enum x86_64_reg_class
6402 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
6403 {
6404 /* Rule #1: If both classes are equal, this is the resulting class. */
6405 if (class1 == class2)
6406 return class1;
6407
6408 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
6409 the other class. */
6410 if (class1 == X86_64_NO_CLASS)
6411 return class2;
6412 if (class2 == X86_64_NO_CLASS)
6413 return class1;
6414
6415 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
6416 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
6417 return X86_64_MEMORY_CLASS;
6418
6419 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
6420 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
6421 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
6422 return X86_64_INTEGERSI_CLASS;
6423 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
6424 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
6425 return X86_64_INTEGER_CLASS;
6426
6427 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
6428 MEMORY is used. */
6429 if (class1 == X86_64_X87_CLASS
6430 || class1 == X86_64_X87UP_CLASS
6431 || class1 == X86_64_COMPLEX_X87_CLASS
6432 || class2 == X86_64_X87_CLASS
6433 || class2 == X86_64_X87UP_CLASS
6434 || class2 == X86_64_COMPLEX_X87_CLASS)
6435 return X86_64_MEMORY_CLASS;
6436
6437 /* Rule #6: Otherwise class SSE is used. */
6438 return X86_64_SSE_CLASS;
6439 }
6440
6441 /* Classify the argument of type TYPE and mode MODE.
6442 CLASSES will be filled by the register class used to pass each word
6443 of the operand. The number of words is returned. In case the parameter
6444 should be passed in memory, 0 is returned. As a special case for zero
6445 sized containers, classes[0] will be NO_CLASS and 1 is returned.
6446
6447 BIT_OFFSET is used internally for handling records and specifies offset
6448 of the offset in bits modulo 512 to avoid overflow cases.
6449
6450 See the x86-64 PS ABI for details.
6451 */
6452
6453 static int
6454 classify_argument (enum machine_mode mode, const_tree type,
6455 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
6456 {
6457 HOST_WIDE_INT bytes =
6458 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6459 int words
6460 = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6461
6462 /* Variable sized entities are always passed/returned in memory. */
6463 if (bytes < 0)
6464 return 0;
6465
6466 if (mode != VOIDmode
6467 && targetm.calls.must_pass_in_stack (mode, type))
6468 return 0;
6469
6470 if (type && AGGREGATE_TYPE_P (type))
6471 {
6472 int i;
6473 tree field;
6474 enum x86_64_reg_class subclasses[MAX_CLASSES];
6475
6476 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
6477 if (bytes > 64)
6478 return 0;
6479
6480 for (i = 0; i < words; i++)
6481 classes[i] = X86_64_NO_CLASS;
6482
6483 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
6484 signalize memory class, so handle it as special case. */
6485 if (!words)
6486 {
6487 classes[0] = X86_64_NO_CLASS;
6488 return 1;
6489 }
6490
6491 /* Classify each field of record and merge classes. */
6492 switch (TREE_CODE (type))
6493 {
6494 case RECORD_TYPE:
6495 /* And now merge the fields of structure. */
6496 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6497 {
6498 if (TREE_CODE (field) == FIELD_DECL)
6499 {
6500 int num;
6501
6502 if (TREE_TYPE (field) == error_mark_node)
6503 continue;
6504
6505 /* Bitfields are always classified as integer. Handle them
6506 early, since later code would consider them to be
6507 misaligned integers. */
6508 if (DECL_BIT_FIELD (field))
6509 {
6510 for (i = (int_bit_position (field)
6511 + (bit_offset % 64)) / 8 / 8;
6512 i < ((int_bit_position (field) + (bit_offset % 64))
6513 + tree_to_shwi (DECL_SIZE (field))
6514 + 63) / 8 / 8; i++)
6515 classes[i] =
6516 merge_classes (X86_64_INTEGER_CLASS,
6517 classes[i]);
6518 }
6519 else
6520 {
6521 int pos;
6522
6523 type = TREE_TYPE (field);
6524
6525 /* Flexible array member is ignored. */
6526 if (TYPE_MODE (type) == BLKmode
6527 && TREE_CODE (type) == ARRAY_TYPE
6528 && TYPE_SIZE (type) == NULL_TREE
6529 && TYPE_DOMAIN (type) != NULL_TREE
6530 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
6531 == NULL_TREE))
6532 {
6533 static bool warned;
6534
6535 if (!warned && warn_psabi)
6536 {
6537 warned = true;
6538 inform (input_location,
6539 "the ABI of passing struct with"
6540 " a flexible array member has"
6541 " changed in GCC 4.4");
6542 }
6543 continue;
6544 }
6545 num = classify_argument (TYPE_MODE (type), type,
6546 subclasses,
6547 (int_bit_position (field)
6548 + bit_offset) % 512);
6549 if (!num)
6550 return 0;
6551 pos = (int_bit_position (field)
6552 + (bit_offset % 64)) / 8 / 8;
6553 for (i = 0; i < num && (i + pos) < words; i++)
6554 classes[i + pos] =
6555 merge_classes (subclasses[i], classes[i + pos]);
6556 }
6557 }
6558 }
6559 break;
6560
6561 case ARRAY_TYPE:
6562 /* Arrays are handled as small records. */
6563 {
6564 int num;
6565 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
6566 TREE_TYPE (type), subclasses, bit_offset);
6567 if (!num)
6568 return 0;
6569
6570 /* The partial classes are now full classes. */
6571 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
6572 subclasses[0] = X86_64_SSE_CLASS;
6573 if (subclasses[0] == X86_64_INTEGERSI_CLASS
6574 && !((bit_offset % 64) == 0 && bytes == 4))
6575 subclasses[0] = X86_64_INTEGER_CLASS;
6576
6577 for (i = 0; i < words; i++)
6578 classes[i] = subclasses[i % num];
6579
6580 break;
6581 }
6582 case UNION_TYPE:
6583 case QUAL_UNION_TYPE:
6584 /* Unions are similar to RECORD_TYPE but offset is always 0.
6585 */
6586 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6587 {
6588 if (TREE_CODE (field) == FIELD_DECL)
6589 {
6590 int num;
6591
6592 if (TREE_TYPE (field) == error_mark_node)
6593 continue;
6594
6595 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
6596 TREE_TYPE (field), subclasses,
6597 bit_offset);
6598 if (!num)
6599 return 0;
6600 for (i = 0; i < num && i < words; i++)
6601 classes[i] = merge_classes (subclasses[i], classes[i]);
6602 }
6603 }
6604 break;
6605
6606 default:
6607 gcc_unreachable ();
6608 }
6609
6610 if (words > 2)
6611 {
6612 /* When size > 16 bytes, if the first one isn't
6613 X86_64_SSE_CLASS or any other ones aren't
6614 X86_64_SSEUP_CLASS, everything should be passed in
6615 memory. */
6616 if (classes[0] != X86_64_SSE_CLASS)
6617 return 0;
6618
6619 for (i = 1; i < words; i++)
6620 if (classes[i] != X86_64_SSEUP_CLASS)
6621 return 0;
6622 }
6623
6624 /* Final merger cleanup. */
6625 for (i = 0; i < words; i++)
6626 {
6627 /* If one class is MEMORY, everything should be passed in
6628 memory. */
6629 if (classes[i] == X86_64_MEMORY_CLASS)
6630 return 0;
6631
6632 /* The X86_64_SSEUP_CLASS should be always preceded by
6633 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6634 if (classes[i] == X86_64_SSEUP_CLASS
6635 && classes[i - 1] != X86_64_SSE_CLASS
6636 && classes[i - 1] != X86_64_SSEUP_CLASS)
6637 {
6638 /* The first one should never be X86_64_SSEUP_CLASS. */
6639 gcc_assert (i != 0);
6640 classes[i] = X86_64_SSE_CLASS;
6641 }
6642
6643 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6644 everything should be passed in memory. */
6645 if (classes[i] == X86_64_X87UP_CLASS
6646 && (classes[i - 1] != X86_64_X87_CLASS))
6647 {
6648 static bool warned;
6649
6650 /* The first one should never be X86_64_X87UP_CLASS. */
6651 gcc_assert (i != 0);
6652 if (!warned && warn_psabi)
6653 {
6654 warned = true;
6655 inform (input_location,
6656 "the ABI of passing union with long double"
6657 " has changed in GCC 4.4");
6658 }
6659 return 0;
6660 }
6661 }
6662 return words;
6663 }
6664
6665 /* Compute alignment needed. We align all types to natural boundaries with
6666 exception of XFmode that is aligned to 64bits. */
6667 if (mode != VOIDmode && mode != BLKmode)
6668 {
6669 int mode_alignment = GET_MODE_BITSIZE (mode);
6670
6671 if (mode == XFmode)
6672 mode_alignment = 128;
6673 else if (mode == XCmode)
6674 mode_alignment = 256;
6675 if (COMPLEX_MODE_P (mode))
6676 mode_alignment /= 2;
6677 /* Misaligned fields are always returned in memory. */
6678 if (bit_offset % mode_alignment)
6679 return 0;
6680 }
6681
6682 /* for V1xx modes, just use the base mode */
6683 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
6684 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
6685 mode = GET_MODE_INNER (mode);
6686
6687 /* Classification of atomic types. */
6688 switch (mode)
6689 {
6690 case SDmode:
6691 case DDmode:
6692 classes[0] = X86_64_SSE_CLASS;
6693 return 1;
6694 case TDmode:
6695 classes[0] = X86_64_SSE_CLASS;
6696 classes[1] = X86_64_SSEUP_CLASS;
6697 return 2;
6698 case DImode:
6699 case SImode:
6700 case HImode:
6701 case QImode:
6702 case CSImode:
6703 case CHImode:
6704 case CQImode:
6705 {
6706 int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
6707
6708 /* Analyze last 128 bits only. */
6709 size = (size - 1) & 0x7f;
6710
6711 if (size < 32)
6712 {
6713 classes[0] = X86_64_INTEGERSI_CLASS;
6714 return 1;
6715 }
6716 else if (size < 64)
6717 {
6718 classes[0] = X86_64_INTEGER_CLASS;
6719 return 1;
6720 }
6721 else if (size < 64+32)
6722 {
6723 classes[0] = X86_64_INTEGER_CLASS;
6724 classes[1] = X86_64_INTEGERSI_CLASS;
6725 return 2;
6726 }
6727 else if (size < 64+64)
6728 {
6729 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6730 return 2;
6731 }
6732 else
6733 gcc_unreachable ();
6734 }
6735 case CDImode:
6736 case TImode:
6737 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6738 return 2;
6739 case COImode:
6740 case OImode:
6741 /* OImode shouldn't be used directly. */
6742 gcc_unreachable ();
6743 case CTImode:
6744 return 0;
6745 case SFmode:
6746 if (!(bit_offset % 64))
6747 classes[0] = X86_64_SSESF_CLASS;
6748 else
6749 classes[0] = X86_64_SSE_CLASS;
6750 return 1;
6751 case DFmode:
6752 classes[0] = X86_64_SSEDF_CLASS;
6753 return 1;
6754 case XFmode:
6755 classes[0] = X86_64_X87_CLASS;
6756 classes[1] = X86_64_X87UP_CLASS;
6757 return 2;
6758 case TFmode:
6759 classes[0] = X86_64_SSE_CLASS;
6760 classes[1] = X86_64_SSEUP_CLASS;
6761 return 2;
6762 case SCmode:
6763 classes[0] = X86_64_SSE_CLASS;
6764 if (!(bit_offset % 64))
6765 return 1;
6766 else
6767 {
6768 static bool warned;
6769
6770 if (!warned && warn_psabi)
6771 {
6772 warned = true;
6773 inform (input_location,
6774 "the ABI of passing structure with complex float"
6775 " member has changed in GCC 4.4");
6776 }
6777 classes[1] = X86_64_SSESF_CLASS;
6778 return 2;
6779 }
6780 case DCmode:
6781 classes[0] = X86_64_SSEDF_CLASS;
6782 classes[1] = X86_64_SSEDF_CLASS;
6783 return 2;
6784 case XCmode:
6785 classes[0] = X86_64_COMPLEX_X87_CLASS;
6786 return 1;
6787 case TCmode:
6788 /* This modes is larger than 16 bytes. */
6789 return 0;
6790 case V8SFmode:
6791 case V8SImode:
6792 case V32QImode:
6793 case V16HImode:
6794 case V4DFmode:
6795 case V4DImode:
6796 classes[0] = X86_64_SSE_CLASS;
6797 classes[1] = X86_64_SSEUP_CLASS;
6798 classes[2] = X86_64_SSEUP_CLASS;
6799 classes[3] = X86_64_SSEUP_CLASS;
6800 return 4;
6801 case V8DFmode:
6802 case V16SFmode:
6803 case V8DImode:
6804 case V16SImode:
6805 case V32HImode:
6806 case V64QImode:
6807 classes[0] = X86_64_SSE_CLASS;
6808 classes[1] = X86_64_SSEUP_CLASS;
6809 classes[2] = X86_64_SSEUP_CLASS;
6810 classes[3] = X86_64_SSEUP_CLASS;
6811 classes[4] = X86_64_SSEUP_CLASS;
6812 classes[5] = X86_64_SSEUP_CLASS;
6813 classes[6] = X86_64_SSEUP_CLASS;
6814 classes[7] = X86_64_SSEUP_CLASS;
6815 return 8;
6816 case V4SFmode:
6817 case V4SImode:
6818 case V16QImode:
6819 case V8HImode:
6820 case V2DFmode:
6821 case V2DImode:
6822 classes[0] = X86_64_SSE_CLASS;
6823 classes[1] = X86_64_SSEUP_CLASS;
6824 return 2;
6825 case V1TImode:
6826 case V1DImode:
6827 case V2SFmode:
6828 case V2SImode:
6829 case V4HImode:
6830 case V8QImode:
6831 classes[0] = X86_64_SSE_CLASS;
6832 return 1;
6833 case BLKmode:
6834 case VOIDmode:
6835 return 0;
6836 default:
6837 gcc_assert (VECTOR_MODE_P (mode));
6838
6839 if (bytes > 16)
6840 return 0;
6841
6842 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
6843
6844 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
6845 classes[0] = X86_64_INTEGERSI_CLASS;
6846 else
6847 classes[0] = X86_64_INTEGER_CLASS;
6848 classes[1] = X86_64_INTEGER_CLASS;
6849 return 1 + (bytes > 8);
6850 }
6851 }
6852
6853 /* Examine the argument and return set number of register required in each
6854 class. Return true iff parameter should be passed in memory. */
6855
6856 static bool
6857 examine_argument (enum machine_mode mode, const_tree type, int in_return,
6858 int *int_nregs, int *sse_nregs)
6859 {
6860 enum x86_64_reg_class regclass[MAX_CLASSES];
6861 int n = classify_argument (mode, type, regclass, 0);
6862
6863 *int_nregs = 0;
6864 *sse_nregs = 0;
6865
6866 if (!n)
6867 return true;
6868 for (n--; n >= 0; n--)
6869 switch (regclass[n])
6870 {
6871 case X86_64_INTEGER_CLASS:
6872 case X86_64_INTEGERSI_CLASS:
6873 (*int_nregs)++;
6874 break;
6875 case X86_64_SSE_CLASS:
6876 case X86_64_SSESF_CLASS:
6877 case X86_64_SSEDF_CLASS:
6878 (*sse_nregs)++;
6879 break;
6880 case X86_64_NO_CLASS:
6881 case X86_64_SSEUP_CLASS:
6882 break;
6883 case X86_64_X87_CLASS:
6884 case X86_64_X87UP_CLASS:
6885 case X86_64_COMPLEX_X87_CLASS:
6886 if (!in_return)
6887 return true;
6888 break;
6889 case X86_64_MEMORY_CLASS:
6890 gcc_unreachable ();
6891 }
6892
6893 return false;
6894 }
6895
6896 /* Construct container for the argument used by GCC interface. See
6897 FUNCTION_ARG for the detailed description. */
6898
6899 static rtx
6900 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
6901 const_tree type, int in_return, int nintregs, int nsseregs,
6902 const int *intreg, int sse_regno)
6903 {
6904 /* The following variables hold the static issued_error state. */
6905 static bool issued_sse_arg_error;
6906 static bool issued_sse_ret_error;
6907 static bool issued_x87_ret_error;
6908
6909 enum machine_mode tmpmode;
6910 int bytes =
6911 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6912 enum x86_64_reg_class regclass[MAX_CLASSES];
6913 int n;
6914 int i;
6915 int nexps = 0;
6916 int needed_sseregs, needed_intregs;
6917 rtx exp[MAX_CLASSES];
6918 rtx ret;
6919
6920 n = classify_argument (mode, type, regclass, 0);
6921 if (!n)
6922 return NULL;
6923 if (examine_argument (mode, type, in_return, &needed_intregs,
6924 &needed_sseregs))
6925 return NULL;
6926 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
6927 return NULL;
6928
6929 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6930 some less clueful developer tries to use floating-point anyway. */
6931 if (needed_sseregs && !TARGET_SSE)
6932 {
6933 if (in_return)
6934 {
6935 if (!issued_sse_ret_error)
6936 {
6937 error ("SSE register return with SSE disabled");
6938 issued_sse_ret_error = true;
6939 }
6940 }
6941 else if (!issued_sse_arg_error)
6942 {
6943 error ("SSE register argument with SSE disabled");
6944 issued_sse_arg_error = true;
6945 }
6946 return NULL;
6947 }
6948
6949 /* Likewise, error if the ABI requires us to return values in the
6950 x87 registers and the user specified -mno-80387. */
6951 if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
6952 for (i = 0; i < n; i++)
6953 if (regclass[i] == X86_64_X87_CLASS
6954 || regclass[i] == X86_64_X87UP_CLASS
6955 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
6956 {
6957 if (!issued_x87_ret_error)
6958 {
6959 error ("x87 register return with x87 disabled");
6960 issued_x87_ret_error = true;
6961 }
6962 return NULL;
6963 }
6964
6965 /* First construct simple cases. Avoid SCmode, since we want to use
6966 single register to pass this type. */
6967 if (n == 1 && mode != SCmode)
6968 switch (regclass[0])
6969 {
6970 case X86_64_INTEGER_CLASS:
6971 case X86_64_INTEGERSI_CLASS:
6972 return gen_rtx_REG (mode, intreg[0]);
6973 case X86_64_SSE_CLASS:
6974 case X86_64_SSESF_CLASS:
6975 case X86_64_SSEDF_CLASS:
6976 if (mode != BLKmode)
6977 return gen_reg_or_parallel (mode, orig_mode,
6978 SSE_REGNO (sse_regno));
6979 break;
6980 case X86_64_X87_CLASS:
6981 case X86_64_COMPLEX_X87_CLASS:
6982 return gen_rtx_REG (mode, FIRST_STACK_REG);
6983 case X86_64_NO_CLASS:
6984 /* Zero sized array, struct or class. */
6985 return NULL;
6986 default:
6987 gcc_unreachable ();
6988 }
6989 if (n == 2
6990 && regclass[0] == X86_64_SSE_CLASS
6991 && regclass[1] == X86_64_SSEUP_CLASS
6992 && mode != BLKmode)
6993 return gen_reg_or_parallel (mode, orig_mode,
6994 SSE_REGNO (sse_regno));
6995 if (n == 4
6996 && regclass[0] == X86_64_SSE_CLASS
6997 && regclass[1] == X86_64_SSEUP_CLASS
6998 && regclass[2] == X86_64_SSEUP_CLASS
6999 && regclass[3] == X86_64_SSEUP_CLASS
7000 && mode != BLKmode)
7001 return gen_reg_or_parallel (mode, orig_mode,
7002 SSE_REGNO (sse_regno));
7003 if (n == 8
7004 && regclass[0] == X86_64_SSE_CLASS
7005 && regclass[1] == X86_64_SSEUP_CLASS
7006 && regclass[2] == X86_64_SSEUP_CLASS
7007 && regclass[3] == X86_64_SSEUP_CLASS
7008 && regclass[4] == X86_64_SSEUP_CLASS
7009 && regclass[5] == X86_64_SSEUP_CLASS
7010 && regclass[6] == X86_64_SSEUP_CLASS
7011 && regclass[7] == X86_64_SSEUP_CLASS
7012 && mode != BLKmode)
7013 return gen_reg_or_parallel (mode, orig_mode,
7014 SSE_REGNO (sse_regno));
7015 if (n == 2
7016 && regclass[0] == X86_64_X87_CLASS
7017 && regclass[1] == X86_64_X87UP_CLASS)
7018 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
7019
7020 if (n == 2
7021 && regclass[0] == X86_64_INTEGER_CLASS
7022 && regclass[1] == X86_64_INTEGER_CLASS
7023 && (mode == CDImode || mode == TImode)
7024 && intreg[0] + 1 == intreg[1])
7025 return gen_rtx_REG (mode, intreg[0]);
7026
7027 /* Otherwise figure out the entries of the PARALLEL. */
7028 for (i = 0; i < n; i++)
7029 {
7030 int pos;
7031
7032 switch (regclass[i])
7033 {
7034 case X86_64_NO_CLASS:
7035 break;
7036 case X86_64_INTEGER_CLASS:
7037 case X86_64_INTEGERSI_CLASS:
7038 /* Merge TImodes on aligned occasions here too. */
7039 if (i * 8 + 8 > bytes)
7040 tmpmode
7041 = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
7042 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
7043 tmpmode = SImode;
7044 else
7045 tmpmode = DImode;
7046 /* We've requested 24 bytes we
7047 don't have mode for. Use DImode. */
7048 if (tmpmode == BLKmode)
7049 tmpmode = DImode;
7050 exp [nexps++]
7051 = gen_rtx_EXPR_LIST (VOIDmode,
7052 gen_rtx_REG (tmpmode, *intreg),
7053 GEN_INT (i*8));
7054 intreg++;
7055 break;
7056 case X86_64_SSESF_CLASS:
7057 exp [nexps++]
7058 = gen_rtx_EXPR_LIST (VOIDmode,
7059 gen_rtx_REG (SFmode,
7060 SSE_REGNO (sse_regno)),
7061 GEN_INT (i*8));
7062 sse_regno++;
7063 break;
7064 case X86_64_SSEDF_CLASS:
7065 exp [nexps++]
7066 = gen_rtx_EXPR_LIST (VOIDmode,
7067 gen_rtx_REG (DFmode,
7068 SSE_REGNO (sse_regno)),
7069 GEN_INT (i*8));
7070 sse_regno++;
7071 break;
7072 case X86_64_SSE_CLASS:
7073 pos = i;
7074 switch (n)
7075 {
7076 case 1:
7077 tmpmode = DImode;
7078 break;
7079 case 2:
7080 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
7081 {
7082 tmpmode = TImode;
7083 i++;
7084 }
7085 else
7086 tmpmode = DImode;
7087 break;
7088 case 4:
7089 gcc_assert (i == 0
7090 && regclass[1] == X86_64_SSEUP_CLASS
7091 && regclass[2] == X86_64_SSEUP_CLASS
7092 && regclass[3] == X86_64_SSEUP_CLASS);
7093 tmpmode = OImode;
7094 i += 3;
7095 break;
7096 case 8:
7097 gcc_assert (i == 0
7098 && regclass[1] == X86_64_SSEUP_CLASS
7099 && regclass[2] == X86_64_SSEUP_CLASS
7100 && regclass[3] == X86_64_SSEUP_CLASS
7101 && regclass[4] == X86_64_SSEUP_CLASS
7102 && regclass[5] == X86_64_SSEUP_CLASS
7103 && regclass[6] == X86_64_SSEUP_CLASS
7104 && regclass[7] == X86_64_SSEUP_CLASS);
7105 tmpmode = XImode;
7106 i += 7;
7107 break;
7108 default:
7109 gcc_unreachable ();
7110 }
7111 exp [nexps++]
7112 = gen_rtx_EXPR_LIST (VOIDmode,
7113 gen_rtx_REG (tmpmode,
7114 SSE_REGNO (sse_regno)),
7115 GEN_INT (pos*8));
7116 sse_regno++;
7117 break;
7118 default:
7119 gcc_unreachable ();
7120 }
7121 }
7122
7123 /* Empty aligned struct, union or class. */
7124 if (nexps == 0)
7125 return NULL;
7126
7127 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
7128 for (i = 0; i < nexps; i++)
7129 XVECEXP (ret, 0, i) = exp [i];
7130 return ret;
7131 }
7132
7133 /* Update the data in CUM to advance over an argument of mode MODE
7134 and data type TYPE. (TYPE is null for libcalls where that information
7135 may not be available.) */
7136
7137 static void
7138 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7139 const_tree type, HOST_WIDE_INT bytes,
7140 HOST_WIDE_INT words)
7141 {
7142 switch (mode)
7143 {
7144 default:
7145 break;
7146
7147 case BLKmode:
7148 if (bytes < 0)
7149 break;
7150 /* FALLTHRU */
7151
7152 case DImode:
7153 case SImode:
7154 case HImode:
7155 case QImode:
7156 cum->words += words;
7157 cum->nregs -= words;
7158 cum->regno += words;
7159
7160 if (cum->nregs <= 0)
7161 {
7162 cum->nregs = 0;
7163 cum->regno = 0;
7164 }
7165 break;
7166
7167 case OImode:
7168 /* OImode shouldn't be used directly. */
7169 gcc_unreachable ();
7170
7171 case DFmode:
7172 if (cum->float_in_sse < 2)
7173 break;
7174 case SFmode:
7175 if (cum->float_in_sse < 1)
7176 break;
7177 /* FALLTHRU */
7178
7179 case V8SFmode:
7180 case V8SImode:
7181 case V64QImode:
7182 case V32HImode:
7183 case V16SImode:
7184 case V8DImode:
7185 case V16SFmode:
7186 case V8DFmode:
7187 case V32QImode:
7188 case V16HImode:
7189 case V4DFmode:
7190 case V4DImode:
7191 case TImode:
7192 case V16QImode:
7193 case V8HImode:
7194 case V4SImode:
7195 case V2DImode:
7196 case V4SFmode:
7197 case V2DFmode:
7198 if (!type || !AGGREGATE_TYPE_P (type))
7199 {
7200 cum->sse_words += words;
7201 cum->sse_nregs -= 1;
7202 cum->sse_regno += 1;
7203 if (cum->sse_nregs <= 0)
7204 {
7205 cum->sse_nregs = 0;
7206 cum->sse_regno = 0;
7207 }
7208 }
7209 break;
7210
7211 case V8QImode:
7212 case V4HImode:
7213 case V2SImode:
7214 case V2SFmode:
7215 case V1TImode:
7216 case V1DImode:
7217 if (!type || !AGGREGATE_TYPE_P (type))
7218 {
7219 cum->mmx_words += words;
7220 cum->mmx_nregs -= 1;
7221 cum->mmx_regno += 1;
7222 if (cum->mmx_nregs <= 0)
7223 {
7224 cum->mmx_nregs = 0;
7225 cum->mmx_regno = 0;
7226 }
7227 }
7228 break;
7229 }
7230 }
7231
7232 static void
7233 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7234 const_tree type, HOST_WIDE_INT words, bool named)
7235 {
7236 int int_nregs, sse_nregs;
7237
7238 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
7239 if (!named && (VALID_AVX512F_REG_MODE (mode)
7240 || VALID_AVX256_REG_MODE (mode)))
7241 return;
7242
7243 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
7244 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
7245 {
7246 cum->nregs -= int_nregs;
7247 cum->sse_nregs -= sse_nregs;
7248 cum->regno += int_nregs;
7249 cum->sse_regno += sse_nregs;
7250 }
7251 else
7252 {
7253 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
7254 cum->words = (cum->words + align - 1) & ~(align - 1);
7255 cum->words += words;
7256 }
7257 }
7258
7259 static void
7260 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
7261 HOST_WIDE_INT words)
7262 {
7263 /* Otherwise, this should be passed indirect. */
7264 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
7265
7266 cum->words += words;
7267 if (cum->nregs > 0)
7268 {
7269 cum->nregs -= 1;
7270 cum->regno += 1;
7271 }
7272 }
7273
7274 /* Update the data in CUM to advance over an argument of mode MODE and
7275 data type TYPE. (TYPE is null for libcalls where that information
7276 may not be available.) */
7277
7278 static void
7279 ix86_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
7280 const_tree type, bool named)
7281 {
7282 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7283 HOST_WIDE_INT bytes, words;
7284
7285 if (mode == BLKmode)
7286 bytes = int_size_in_bytes (type);
7287 else
7288 bytes = GET_MODE_SIZE (mode);
7289 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7290
7291 if (type)
7292 mode = type_natural_mode (type, NULL, false);
7293
7294 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7295 function_arg_advance_ms_64 (cum, bytes, words);
7296 else if (TARGET_64BIT)
7297 function_arg_advance_64 (cum, mode, type, words, named);
7298 else
7299 function_arg_advance_32 (cum, mode, type, bytes, words);
7300 }
7301
7302 /* Define where to put the arguments to a function.
7303 Value is zero to push the argument on the stack,
7304 or a hard register in which to store the argument.
7305
7306 MODE is the argument's machine mode.
7307 TYPE is the data type of the argument (as a tree).
7308 This is null for libcalls where that information may
7309 not be available.
7310 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7311 the preceding args and about the function being called.
7312 NAMED is nonzero if this argument is a named parameter
7313 (otherwise it is an extra parameter matching an ellipsis). */
7314
7315 static rtx
7316 function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7317 enum machine_mode orig_mode, const_tree type,
7318 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
7319 {
7320 /* Avoid the AL settings for the Unix64 ABI. */
7321 if (mode == VOIDmode)
7322 return constm1_rtx;
7323
7324 switch (mode)
7325 {
7326 default:
7327 break;
7328
7329 case BLKmode:
7330 if (bytes < 0)
7331 break;
7332 /* FALLTHRU */
7333 case DImode:
7334 case SImode:
7335 case HImode:
7336 case QImode:
7337 if (words <= cum->nregs)
7338 {
7339 int regno = cum->regno;
7340
7341 /* Fastcall allocates the first two DWORD (SImode) or
7342 smaller arguments to ECX and EDX if it isn't an
7343 aggregate type . */
7344 if (cum->fastcall)
7345 {
7346 if (mode == BLKmode
7347 || mode == DImode
7348 || (type && AGGREGATE_TYPE_P (type)))
7349 break;
7350
7351 /* ECX not EAX is the first allocated register. */
7352 if (regno == AX_REG)
7353 regno = CX_REG;
7354 }
7355 return gen_rtx_REG (mode, regno);
7356 }
7357 break;
7358
7359 case DFmode:
7360 if (cum->float_in_sse < 2)
7361 break;
7362 case SFmode:
7363 if (cum->float_in_sse < 1)
7364 break;
7365 /* FALLTHRU */
7366 case TImode:
7367 /* In 32bit, we pass TImode in xmm registers. */
7368 case V16QImode:
7369 case V8HImode:
7370 case V4SImode:
7371 case V2DImode:
7372 case V4SFmode:
7373 case V2DFmode:
7374 if (!type || !AGGREGATE_TYPE_P (type))
7375 {
7376 if (cum->sse_nregs)
7377 return gen_reg_or_parallel (mode, orig_mode,
7378 cum->sse_regno + FIRST_SSE_REG);
7379 }
7380 break;
7381
7382 case OImode:
7383 case XImode:
7384 /* OImode and XImode shouldn't be used directly. */
7385 gcc_unreachable ();
7386
7387 case V64QImode:
7388 case V32HImode:
7389 case V16SImode:
7390 case V8DImode:
7391 case V16SFmode:
7392 case V8DFmode:
7393 case V8SFmode:
7394 case V8SImode:
7395 case V32QImode:
7396 case V16HImode:
7397 case V4DFmode:
7398 case V4DImode:
7399 if (!type || !AGGREGATE_TYPE_P (type))
7400 {
7401 if (cum->sse_nregs)
7402 return gen_reg_or_parallel (mode, orig_mode,
7403 cum->sse_regno + FIRST_SSE_REG);
7404 }
7405 break;
7406
7407 case V8QImode:
7408 case V4HImode:
7409 case V2SImode:
7410 case V2SFmode:
7411 case V1TImode:
7412 case V1DImode:
7413 if (!type || !AGGREGATE_TYPE_P (type))
7414 {
7415 if (cum->mmx_nregs)
7416 return gen_reg_or_parallel (mode, orig_mode,
7417 cum->mmx_regno + FIRST_MMX_REG);
7418 }
7419 break;
7420 }
7421
7422 return NULL_RTX;
7423 }
7424
7425 static rtx
7426 function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7427 enum machine_mode orig_mode, const_tree type, bool named)
7428 {
7429 /* Handle a hidden AL argument containing number of registers
7430 for varargs x86-64 functions. */
7431 if (mode == VOIDmode)
7432 return GEN_INT (cum->maybe_vaarg
7433 ? (cum->sse_nregs < 0
7434 ? X86_64_SSE_REGPARM_MAX
7435 : cum->sse_regno)
7436 : -1);
7437
7438 switch (mode)
7439 {
7440 default:
7441 break;
7442
7443 case V8SFmode:
7444 case V8SImode:
7445 case V32QImode:
7446 case V16HImode:
7447 case V4DFmode:
7448 case V4DImode:
7449 case V16SFmode:
7450 case V16SImode:
7451 case V64QImode:
7452 case V32HImode:
7453 case V8DFmode:
7454 case V8DImode:
7455 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
7456 if (!named)
7457 return NULL;
7458 break;
7459 }
7460
7461 return construct_container (mode, orig_mode, type, 0, cum->nregs,
7462 cum->sse_nregs,
7463 &x86_64_int_parameter_registers [cum->regno],
7464 cum->sse_regno);
7465 }
7466
7467 static rtx
7468 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7469 enum machine_mode orig_mode, bool named,
7470 HOST_WIDE_INT bytes)
7471 {
7472 unsigned int regno;
7473
7474 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
7475 We use value of -2 to specify that current function call is MSABI. */
7476 if (mode == VOIDmode)
7477 return GEN_INT (-2);
7478
7479 /* If we've run out of registers, it goes on the stack. */
7480 if (cum->nregs == 0)
7481 return NULL_RTX;
7482
7483 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
7484
7485 /* Only floating point modes are passed in anything but integer regs. */
7486 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
7487 {
7488 if (named)
7489 regno = cum->regno + FIRST_SSE_REG;
7490 else
7491 {
7492 rtx t1, t2;
7493
7494 /* Unnamed floating parameters are passed in both the
7495 SSE and integer registers. */
7496 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
7497 t2 = gen_rtx_REG (mode, regno);
7498 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
7499 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
7500 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
7501 }
7502 }
7503 /* Handle aggregated types passed in register. */
7504 if (orig_mode == BLKmode)
7505 {
7506 if (bytes > 0 && bytes <= 8)
7507 mode = (bytes > 4 ? DImode : SImode);
7508 if (mode == BLKmode)
7509 mode = DImode;
7510 }
7511
7512 return gen_reg_or_parallel (mode, orig_mode, regno);
7513 }
7514
7515 /* Return where to put the arguments to a function.
7516 Return zero to push the argument on the stack, or a hard register in which to store the argument.
7517
7518 MODE is the argument's machine mode. TYPE is the data type of the
7519 argument. It is null for libcalls where that information may not be
7520 available. CUM gives information about the preceding args and about
7521 the function being called. NAMED is nonzero if this argument is a
7522 named parameter (otherwise it is an extra parameter matching an
7523 ellipsis). */
7524
7525 static rtx
7526 ix86_function_arg (cumulative_args_t cum_v, enum machine_mode omode,
7527 const_tree type, bool named)
7528 {
7529 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7530 enum machine_mode mode = omode;
7531 HOST_WIDE_INT bytes, words;
7532 rtx arg;
7533
7534 if (mode == BLKmode)
7535 bytes = int_size_in_bytes (type);
7536 else
7537 bytes = GET_MODE_SIZE (mode);
7538 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7539
7540 /* To simplify the code below, represent vector types with a vector mode
7541 even if MMX/SSE are not active. */
7542 if (type && TREE_CODE (type) == VECTOR_TYPE)
7543 mode = type_natural_mode (type, cum, false);
7544
7545 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7546 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
7547 else if (TARGET_64BIT)
7548 arg = function_arg_64 (cum, mode, omode, type, named);
7549 else
7550 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
7551
7552 return arg;
7553 }
7554
7555 /* A C expression that indicates when an argument must be passed by
7556 reference. If nonzero for an argument, a copy of that argument is
7557 made in memory and a pointer to the argument is passed instead of
7558 the argument itself. The pointer is passed in whatever way is
7559 appropriate for passing a pointer to that type. */
7560
7561 static bool
7562 ix86_pass_by_reference (cumulative_args_t cum_v, enum machine_mode mode,
7563 const_tree type, bool)
7564 {
7565 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7566
7567 /* See Windows x64 Software Convention. */
7568 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7569 {
7570 int msize = (int) GET_MODE_SIZE (mode);
7571 if (type)
7572 {
7573 /* Arrays are passed by reference. */
7574 if (TREE_CODE (type) == ARRAY_TYPE)
7575 return true;
7576
7577 if (AGGREGATE_TYPE_P (type))
7578 {
7579 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
7580 are passed by reference. */
7581 msize = int_size_in_bytes (type);
7582 }
7583 }
7584
7585 /* __m128 is passed by reference. */
7586 switch (msize) {
7587 case 1: case 2: case 4: case 8:
7588 break;
7589 default:
7590 return true;
7591 }
7592 }
7593 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
7594 return 1;
7595
7596 return 0;
7597 }
7598
7599 /* Return true when TYPE should be 128bit aligned for 32bit argument
7600 passing ABI. XXX: This function is obsolete and is only used for
7601 checking psABI compatibility with previous versions of GCC. */
7602
7603 static bool
7604 ix86_compat_aligned_value_p (const_tree type)
7605 {
7606 enum machine_mode mode = TYPE_MODE (type);
7607 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
7608 || mode == TDmode
7609 || mode == TFmode
7610 || mode == TCmode)
7611 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
7612 return true;
7613 if (TYPE_ALIGN (type) < 128)
7614 return false;
7615
7616 if (AGGREGATE_TYPE_P (type))
7617 {
7618 /* Walk the aggregates recursively. */
7619 switch (TREE_CODE (type))
7620 {
7621 case RECORD_TYPE:
7622 case UNION_TYPE:
7623 case QUAL_UNION_TYPE:
7624 {
7625 tree field;
7626
7627 /* Walk all the structure fields. */
7628 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7629 {
7630 if (TREE_CODE (field) == FIELD_DECL
7631 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
7632 return true;
7633 }
7634 break;
7635 }
7636
7637 case ARRAY_TYPE:
7638 /* Just for use if some languages passes arrays by value. */
7639 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
7640 return true;
7641 break;
7642
7643 default:
7644 gcc_unreachable ();
7645 }
7646 }
7647 return false;
7648 }
7649
7650 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
7651 XXX: This function is obsolete and is only used for checking psABI
7652 compatibility with previous versions of GCC. */
7653
7654 static unsigned int
7655 ix86_compat_function_arg_boundary (enum machine_mode mode,
7656 const_tree type, unsigned int align)
7657 {
7658 /* In 32bit, only _Decimal128 and __float128 are aligned to their
7659 natural boundaries. */
7660 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
7661 {
7662 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
7663 make an exception for SSE modes since these require 128bit
7664 alignment.
7665
7666 The handling here differs from field_alignment. ICC aligns MMX
7667 arguments to 4 byte boundaries, while structure fields are aligned
7668 to 8 byte boundaries. */
7669 if (!type)
7670 {
7671 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
7672 align = PARM_BOUNDARY;
7673 }
7674 else
7675 {
7676 if (!ix86_compat_aligned_value_p (type))
7677 align = PARM_BOUNDARY;
7678 }
7679 }
7680 if (align > BIGGEST_ALIGNMENT)
7681 align = BIGGEST_ALIGNMENT;
7682 return align;
7683 }
7684
7685 /* Return true when TYPE should be 128bit aligned for 32bit argument
7686 passing ABI. */
7687
7688 static bool
7689 ix86_contains_aligned_value_p (const_tree type)
7690 {
7691 enum machine_mode mode = TYPE_MODE (type);
7692
7693 if (mode == XFmode || mode == XCmode)
7694 return false;
7695
7696 if (TYPE_ALIGN (type) < 128)
7697 return false;
7698
7699 if (AGGREGATE_TYPE_P (type))
7700 {
7701 /* Walk the aggregates recursively. */
7702 switch (TREE_CODE (type))
7703 {
7704 case RECORD_TYPE:
7705 case UNION_TYPE:
7706 case QUAL_UNION_TYPE:
7707 {
7708 tree field;
7709
7710 /* Walk all the structure fields. */
7711 for (field = TYPE_FIELDS (type);
7712 field;
7713 field = DECL_CHAIN (field))
7714 {
7715 if (TREE_CODE (field) == FIELD_DECL
7716 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
7717 return true;
7718 }
7719 break;
7720 }
7721
7722 case ARRAY_TYPE:
7723 /* Just for use if some languages passes arrays by value. */
7724 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
7725 return true;
7726 break;
7727
7728 default:
7729 gcc_unreachable ();
7730 }
7731 }
7732 else
7733 return TYPE_ALIGN (type) >= 128;
7734
7735 return false;
7736 }
7737
7738 /* Gives the alignment boundary, in bits, of an argument with the
7739 specified mode and type. */
7740
7741 static unsigned int
7742 ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
7743 {
7744 unsigned int align;
7745 if (type)
7746 {
7747 /* Since the main variant type is used for call, we convert it to
7748 the main variant type. */
7749 type = TYPE_MAIN_VARIANT (type);
7750 align = TYPE_ALIGN (type);
7751 }
7752 else
7753 align = GET_MODE_ALIGNMENT (mode);
7754 if (align < PARM_BOUNDARY)
7755 align = PARM_BOUNDARY;
7756 else
7757 {
7758 static bool warned;
7759 unsigned int saved_align = align;
7760
7761 if (!TARGET_64BIT)
7762 {
7763 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7764 if (!type)
7765 {
7766 if (mode == XFmode || mode == XCmode)
7767 align = PARM_BOUNDARY;
7768 }
7769 else if (!ix86_contains_aligned_value_p (type))
7770 align = PARM_BOUNDARY;
7771
7772 if (align < 128)
7773 align = PARM_BOUNDARY;
7774 }
7775
7776 if (warn_psabi
7777 && !warned
7778 && align != ix86_compat_function_arg_boundary (mode, type,
7779 saved_align))
7780 {
7781 warned = true;
7782 inform (input_location,
7783 "The ABI for passing parameters with %d-byte"
7784 " alignment has changed in GCC 4.6",
7785 align / BITS_PER_UNIT);
7786 }
7787 }
7788
7789 return align;
7790 }
7791
7792 /* Return true if N is a possible register number of function value. */
7793
7794 static bool
7795 ix86_function_value_regno_p (const unsigned int regno)
7796 {
7797 switch (regno)
7798 {
7799 case AX_REG:
7800 return true;
7801 case DX_REG:
7802 return (!TARGET_64BIT || ix86_abi != MS_ABI);
7803 case DI_REG:
7804 case SI_REG:
7805 return TARGET_64BIT && ix86_abi != MS_ABI;
7806
7807 /* Complex values are returned in %st(0)/%st(1) pair. */
7808 case ST0_REG:
7809 case ST1_REG:
7810 /* TODO: The function should depend on current function ABI but
7811 builtins.c would need updating then. Therefore we use the
7812 default ABI. */
7813 if (TARGET_64BIT && ix86_abi == MS_ABI)
7814 return false;
7815 return TARGET_FLOAT_RETURNS_IN_80387;
7816
7817 /* Complex values are returned in %xmm0/%xmm1 pair. */
7818 case XMM0_REG:
7819 case XMM1_REG:
7820 return TARGET_SSE;
7821
7822 case MM0_REG:
7823 if (TARGET_MACHO || TARGET_64BIT)
7824 return false;
7825 return TARGET_MMX;
7826 }
7827
7828 return false;
7829 }
7830
7831 /* Define how to find the value returned by a function.
7832 VALTYPE is the data type of the value (as a tree).
7833 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7834 otherwise, FUNC is 0. */
7835
7836 static rtx
7837 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
7838 const_tree fntype, const_tree fn)
7839 {
7840 unsigned int regno;
7841
7842 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7843 we normally prevent this case when mmx is not available. However
7844 some ABIs may require the result to be returned like DImode. */
7845 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7846 regno = FIRST_MMX_REG;
7847
7848 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7849 we prevent this case when sse is not available. However some ABIs
7850 may require the result to be returned like integer TImode. */
7851 else if (mode == TImode
7852 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7853 regno = FIRST_SSE_REG;
7854
7855 /* 32-byte vector modes in %ymm0. */
7856 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
7857 regno = FIRST_SSE_REG;
7858
7859 /* 64-byte vector modes in %zmm0. */
7860 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
7861 regno = FIRST_SSE_REG;
7862
7863 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7864 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
7865 regno = FIRST_FLOAT_REG;
7866 else
7867 /* Most things go in %eax. */
7868 regno = AX_REG;
7869
7870 /* Override FP return register with %xmm0 for local functions when
7871 SSE math is enabled or for functions with sseregparm attribute. */
7872 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
7873 {
7874 int sse_level = ix86_function_sseregparm (fntype, fn, false);
7875 if ((sse_level >= 1 && mode == SFmode)
7876 || (sse_level == 2 && mode == DFmode))
7877 regno = FIRST_SSE_REG;
7878 }
7879
7880 /* OImode shouldn't be used directly. */
7881 gcc_assert (mode != OImode);
7882
7883 return gen_rtx_REG (orig_mode, regno);
7884 }
7885
7886 static rtx
7887 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
7888 const_tree valtype)
7889 {
7890 rtx ret;
7891
7892 /* Handle libcalls, which don't provide a type node. */
7893 if (valtype == NULL)
7894 {
7895 unsigned int regno;
7896
7897 switch (mode)
7898 {
7899 case SFmode:
7900 case SCmode:
7901 case DFmode:
7902 case DCmode:
7903 case TFmode:
7904 case SDmode:
7905 case DDmode:
7906 case TDmode:
7907 regno = FIRST_SSE_REG;
7908 break;
7909 case XFmode:
7910 case XCmode:
7911 regno = FIRST_FLOAT_REG;
7912 break;
7913 case TCmode:
7914 return NULL;
7915 default:
7916 regno = AX_REG;
7917 }
7918
7919 return gen_rtx_REG (mode, regno);
7920 }
7921 else if (POINTER_TYPE_P (valtype))
7922 {
7923 /* Pointers are always returned in word_mode. */
7924 mode = word_mode;
7925 }
7926
7927 ret = construct_container (mode, orig_mode, valtype, 1,
7928 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
7929 x86_64_int_return_registers, 0);
7930
7931 /* For zero sized structures, construct_container returns NULL, but we
7932 need to keep rest of compiler happy by returning meaningful value. */
7933 if (!ret)
7934 ret = gen_rtx_REG (orig_mode, AX_REG);
7935
7936 return ret;
7937 }
7938
7939 static rtx
7940 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode,
7941 const_tree valtype)
7942 {
7943 unsigned int regno = AX_REG;
7944
7945 if (TARGET_SSE)
7946 {
7947 switch (GET_MODE_SIZE (mode))
7948 {
7949 case 16:
7950 if (valtype != NULL_TREE
7951 && !VECTOR_INTEGER_TYPE_P (valtype)
7952 && !VECTOR_INTEGER_TYPE_P (valtype)
7953 && !INTEGRAL_TYPE_P (valtype)
7954 && !VECTOR_FLOAT_TYPE_P (valtype))
7955 break;
7956 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7957 && !COMPLEX_MODE_P (mode))
7958 regno = FIRST_SSE_REG;
7959 break;
7960 case 8:
7961 case 4:
7962 if (mode == SFmode || mode == DFmode)
7963 regno = FIRST_SSE_REG;
7964 break;
7965 default:
7966 break;
7967 }
7968 }
7969 return gen_rtx_REG (orig_mode, regno);
7970 }
7971
7972 static rtx
7973 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
7974 enum machine_mode orig_mode, enum machine_mode mode)
7975 {
7976 const_tree fn, fntype;
7977
7978 fn = NULL_TREE;
7979 if (fntype_or_decl && DECL_P (fntype_or_decl))
7980 fn = fntype_or_decl;
7981 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
7982
7983 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
7984 return function_value_ms_64 (orig_mode, mode, valtype);
7985 else if (TARGET_64BIT)
7986 return function_value_64 (orig_mode, mode, valtype);
7987 else
7988 return function_value_32 (orig_mode, mode, fntype, fn);
7989 }
7990
7991 static rtx
7992 ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool)
7993 {
7994 enum machine_mode mode, orig_mode;
7995
7996 orig_mode = TYPE_MODE (valtype);
7997 mode = type_natural_mode (valtype, NULL, true);
7998 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
7999 }
8000
8001 /* Pointer function arguments and return values are promoted to
8002 word_mode. */
8003
8004 static enum machine_mode
8005 ix86_promote_function_mode (const_tree type, enum machine_mode mode,
8006 int *punsignedp, const_tree fntype,
8007 int for_return)
8008 {
8009 if (type != NULL_TREE && POINTER_TYPE_P (type))
8010 {
8011 *punsignedp = POINTERS_EXTEND_UNSIGNED;
8012 return word_mode;
8013 }
8014 return default_promote_function_mode (type, mode, punsignedp, fntype,
8015 for_return);
8016 }
8017
8018 /* Return true if a structure, union or array with MODE containing FIELD
8019 should be accessed using BLKmode. */
8020
8021 static bool
8022 ix86_member_type_forces_blk (const_tree field, enum machine_mode mode)
8023 {
8024 /* Union with XFmode must be in BLKmode. */
8025 return (mode == XFmode
8026 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
8027 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
8028 }
8029
8030 rtx
8031 ix86_libcall_value (enum machine_mode mode)
8032 {
8033 return ix86_function_value_1 (NULL, NULL, mode, mode);
8034 }
8035
8036 /* Return true iff type is returned in memory. */
8037
8038 static bool
8039 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
8040 {
8041 #ifdef SUBTARGET_RETURN_IN_MEMORY
8042 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
8043 #else
8044 const enum machine_mode mode = type_natural_mode (type, NULL, true);
8045 HOST_WIDE_INT size;
8046
8047 if (TARGET_64BIT)
8048 {
8049 if (ix86_function_type_abi (fntype) == MS_ABI)
8050 {
8051 size = int_size_in_bytes (type);
8052
8053 /* __m128 is returned in xmm0. */
8054 if ((!type || VECTOR_INTEGER_TYPE_P (type)
8055 || INTEGRAL_TYPE_P (type)
8056 || VECTOR_FLOAT_TYPE_P (type))
8057 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8058 && !COMPLEX_MODE_P (mode)
8059 && (GET_MODE_SIZE (mode) == 16 || size == 16))
8060 return false;
8061
8062 /* Otherwise, the size must be exactly in [1248]. */
8063 return size != 1 && size != 2 && size != 4 && size != 8;
8064 }
8065 else
8066 {
8067 int needed_intregs, needed_sseregs;
8068
8069 return examine_argument (mode, type, 1,
8070 &needed_intregs, &needed_sseregs);
8071 }
8072 }
8073 else
8074 {
8075 if (mode == BLKmode)
8076 return true;
8077
8078 size = int_size_in_bytes (type);
8079
8080 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
8081 return false;
8082
8083 if (VECTOR_MODE_P (mode) || mode == TImode)
8084 {
8085 /* User-created vectors small enough to fit in EAX. */
8086 if (size < 8)
8087 return false;
8088
8089 /* Unless ABI prescibes otherwise,
8090 MMX/3dNow values are returned in MM0 if available. */
8091
8092 if (size == 8)
8093 return TARGET_VECT8_RETURNS || !TARGET_MMX;
8094
8095 /* SSE values are returned in XMM0 if available. */
8096 if (size == 16)
8097 return !TARGET_SSE;
8098
8099 /* AVX values are returned in YMM0 if available. */
8100 if (size == 32)
8101 return !TARGET_AVX;
8102
8103 /* AVX512F values are returned in ZMM0 if available. */
8104 if (size == 64)
8105 return !TARGET_AVX512F;
8106 }
8107
8108 if (mode == XFmode)
8109 return false;
8110
8111 if (size > 12)
8112 return true;
8113
8114 /* OImode shouldn't be used directly. */
8115 gcc_assert (mode != OImode);
8116
8117 return false;
8118 }
8119 #endif
8120 }
8121
8122 \f
8123 /* Create the va_list data type. */
8124
8125 /* Returns the calling convention specific va_list date type.
8126 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
8127
8128 static tree
8129 ix86_build_builtin_va_list_abi (enum calling_abi abi)
8130 {
8131 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
8132
8133 /* For i386 we use plain pointer to argument area. */
8134 if (!TARGET_64BIT || abi == MS_ABI)
8135 return build_pointer_type (char_type_node);
8136
8137 record = lang_hooks.types.make_type (RECORD_TYPE);
8138 type_decl = build_decl (BUILTINS_LOCATION,
8139 TYPE_DECL, get_identifier ("__va_list_tag"), record);
8140
8141 f_gpr = build_decl (BUILTINS_LOCATION,
8142 FIELD_DECL, get_identifier ("gp_offset"),
8143 unsigned_type_node);
8144 f_fpr = build_decl (BUILTINS_LOCATION,
8145 FIELD_DECL, get_identifier ("fp_offset"),
8146 unsigned_type_node);
8147 f_ovf = build_decl (BUILTINS_LOCATION,
8148 FIELD_DECL, get_identifier ("overflow_arg_area"),
8149 ptr_type_node);
8150 f_sav = build_decl (BUILTINS_LOCATION,
8151 FIELD_DECL, get_identifier ("reg_save_area"),
8152 ptr_type_node);
8153
8154 va_list_gpr_counter_field = f_gpr;
8155 va_list_fpr_counter_field = f_fpr;
8156
8157 DECL_FIELD_CONTEXT (f_gpr) = record;
8158 DECL_FIELD_CONTEXT (f_fpr) = record;
8159 DECL_FIELD_CONTEXT (f_ovf) = record;
8160 DECL_FIELD_CONTEXT (f_sav) = record;
8161
8162 TYPE_STUB_DECL (record) = type_decl;
8163 TYPE_NAME (record) = type_decl;
8164 TYPE_FIELDS (record) = f_gpr;
8165 DECL_CHAIN (f_gpr) = f_fpr;
8166 DECL_CHAIN (f_fpr) = f_ovf;
8167 DECL_CHAIN (f_ovf) = f_sav;
8168
8169 layout_type (record);
8170
8171 /* The correct type is an array type of one element. */
8172 return build_array_type (record, build_index_type (size_zero_node));
8173 }
8174
8175 /* Setup the builtin va_list data type and for 64-bit the additional
8176 calling convention specific va_list data types. */
8177
8178 static tree
8179 ix86_build_builtin_va_list (void)
8180 {
8181 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
8182
8183 /* Initialize abi specific va_list builtin types. */
8184 if (TARGET_64BIT)
8185 {
8186 tree t;
8187 if (ix86_abi == MS_ABI)
8188 {
8189 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
8190 if (TREE_CODE (t) != RECORD_TYPE)
8191 t = build_variant_type_copy (t);
8192 sysv_va_list_type_node = t;
8193 }
8194 else
8195 {
8196 t = ret;
8197 if (TREE_CODE (t) != RECORD_TYPE)
8198 t = build_variant_type_copy (t);
8199 sysv_va_list_type_node = t;
8200 }
8201 if (ix86_abi != MS_ABI)
8202 {
8203 t = ix86_build_builtin_va_list_abi (MS_ABI);
8204 if (TREE_CODE (t) != RECORD_TYPE)
8205 t = build_variant_type_copy (t);
8206 ms_va_list_type_node = t;
8207 }
8208 else
8209 {
8210 t = ret;
8211 if (TREE_CODE (t) != RECORD_TYPE)
8212 t = build_variant_type_copy (t);
8213 ms_va_list_type_node = t;
8214 }
8215 }
8216
8217 return ret;
8218 }
8219
8220 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
8221
8222 static void
8223 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
8224 {
8225 rtx save_area, mem;
8226 alias_set_type set;
8227 int i, max;
8228
8229 /* GPR size of varargs save area. */
8230 if (cfun->va_list_gpr_size)
8231 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
8232 else
8233 ix86_varargs_gpr_size = 0;
8234
8235 /* FPR size of varargs save area. We don't need it if we don't pass
8236 anything in SSE registers. */
8237 if (TARGET_SSE && cfun->va_list_fpr_size)
8238 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
8239 else
8240 ix86_varargs_fpr_size = 0;
8241
8242 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
8243 return;
8244
8245 save_area = frame_pointer_rtx;
8246 set = get_varargs_alias_set ();
8247
8248 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
8249 if (max > X86_64_REGPARM_MAX)
8250 max = X86_64_REGPARM_MAX;
8251
8252 for (i = cum->regno; i < max; i++)
8253 {
8254 mem = gen_rtx_MEM (word_mode,
8255 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
8256 MEM_NOTRAP_P (mem) = 1;
8257 set_mem_alias_set (mem, set);
8258 emit_move_insn (mem,
8259 gen_rtx_REG (word_mode,
8260 x86_64_int_parameter_registers[i]));
8261 }
8262
8263 if (ix86_varargs_fpr_size)
8264 {
8265 enum machine_mode smode;
8266 rtx label, test;
8267
8268 /* Now emit code to save SSE registers. The AX parameter contains number
8269 of SSE parameter registers used to call this function, though all we
8270 actually check here is the zero/non-zero status. */
8271
8272 label = gen_label_rtx ();
8273 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
8274 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
8275 label));
8276
8277 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
8278 we used movdqa (i.e. TImode) instead? Perhaps even better would
8279 be if we could determine the real mode of the data, via a hook
8280 into pass_stdarg. Ignore all that for now. */
8281 smode = V4SFmode;
8282 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
8283 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
8284
8285 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
8286 if (max > X86_64_SSE_REGPARM_MAX)
8287 max = X86_64_SSE_REGPARM_MAX;
8288
8289 for (i = cum->sse_regno; i < max; ++i)
8290 {
8291 mem = plus_constant (Pmode, save_area,
8292 i * 16 + ix86_varargs_gpr_size);
8293 mem = gen_rtx_MEM (smode, mem);
8294 MEM_NOTRAP_P (mem) = 1;
8295 set_mem_alias_set (mem, set);
8296 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
8297
8298 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
8299 }
8300
8301 emit_label (label);
8302 }
8303 }
8304
8305 static void
8306 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
8307 {
8308 alias_set_type set = get_varargs_alias_set ();
8309 int i;
8310
8311 /* Reset to zero, as there might be a sysv vaarg used
8312 before. */
8313 ix86_varargs_gpr_size = 0;
8314 ix86_varargs_fpr_size = 0;
8315
8316 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
8317 {
8318 rtx reg, mem;
8319
8320 mem = gen_rtx_MEM (Pmode,
8321 plus_constant (Pmode, virtual_incoming_args_rtx,
8322 i * UNITS_PER_WORD));
8323 MEM_NOTRAP_P (mem) = 1;
8324 set_mem_alias_set (mem, set);
8325
8326 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
8327 emit_move_insn (mem, reg);
8328 }
8329 }
8330
8331 static void
8332 ix86_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
8333 tree type, int *, int no_rtl)
8334 {
8335 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8336 CUMULATIVE_ARGS next_cum;
8337 tree fntype;
8338
8339 /* This argument doesn't appear to be used anymore. Which is good,
8340 because the old code here didn't suppress rtl generation. */
8341 gcc_assert (!no_rtl);
8342
8343 if (!TARGET_64BIT)
8344 return;
8345
8346 fntype = TREE_TYPE (current_function_decl);
8347
8348 /* For varargs, we do not want to skip the dummy va_dcl argument.
8349 For stdargs, we do want to skip the last named argument. */
8350 next_cum = *cum;
8351 if (stdarg_p (fntype))
8352 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
8353 true);
8354
8355 if (cum->call_abi == MS_ABI)
8356 setup_incoming_varargs_ms_64 (&next_cum);
8357 else
8358 setup_incoming_varargs_64 (&next_cum);
8359 }
8360
8361 /* Checks if TYPE is of kind va_list char *. */
8362
8363 static bool
8364 is_va_list_char_pointer (tree type)
8365 {
8366 tree canonic;
8367
8368 /* For 32-bit it is always true. */
8369 if (!TARGET_64BIT)
8370 return true;
8371 canonic = ix86_canonical_va_list_type (type);
8372 return (canonic == ms_va_list_type_node
8373 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
8374 }
8375
8376 /* Implement va_start. */
8377
8378 static void
8379 ix86_va_start (tree valist, rtx nextarg)
8380 {
8381 HOST_WIDE_INT words, n_gpr, n_fpr;
8382 tree f_gpr, f_fpr, f_ovf, f_sav;
8383 tree gpr, fpr, ovf, sav, t;
8384 tree type;
8385 rtx ovf_rtx;
8386
8387 if (flag_split_stack
8388 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8389 {
8390 unsigned int scratch_regno;
8391
8392 /* When we are splitting the stack, we can't refer to the stack
8393 arguments using internal_arg_pointer, because they may be on
8394 the old stack. The split stack prologue will arrange to
8395 leave a pointer to the old stack arguments in a scratch
8396 register, which we here copy to a pseudo-register. The split
8397 stack prologue can't set the pseudo-register directly because
8398 it (the prologue) runs before any registers have been saved. */
8399
8400 scratch_regno = split_stack_prologue_scratch_regno ();
8401 if (scratch_regno != INVALID_REGNUM)
8402 {
8403 rtx reg, seq;
8404
8405 reg = gen_reg_rtx (Pmode);
8406 cfun->machine->split_stack_varargs_pointer = reg;
8407
8408 start_sequence ();
8409 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
8410 seq = get_insns ();
8411 end_sequence ();
8412
8413 push_topmost_sequence ();
8414 emit_insn_after (seq, entry_of_function ());
8415 pop_topmost_sequence ();
8416 }
8417 }
8418
8419 /* Only 64bit target needs something special. */
8420 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8421 {
8422 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8423 std_expand_builtin_va_start (valist, nextarg);
8424 else
8425 {
8426 rtx va_r, next;
8427
8428 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
8429 next = expand_binop (ptr_mode, add_optab,
8430 cfun->machine->split_stack_varargs_pointer,
8431 crtl->args.arg_offset_rtx,
8432 NULL_RTX, 0, OPTAB_LIB_WIDEN);
8433 convert_move (va_r, next, 0);
8434 }
8435 return;
8436 }
8437
8438 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8439 f_fpr = DECL_CHAIN (f_gpr);
8440 f_ovf = DECL_CHAIN (f_fpr);
8441 f_sav = DECL_CHAIN (f_ovf);
8442
8443 valist = build_simple_mem_ref (valist);
8444 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
8445 /* The following should be folded into the MEM_REF offset. */
8446 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
8447 f_gpr, NULL_TREE);
8448 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
8449 f_fpr, NULL_TREE);
8450 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
8451 f_ovf, NULL_TREE);
8452 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
8453 f_sav, NULL_TREE);
8454
8455 /* Count number of gp and fp argument registers used. */
8456 words = crtl->args.info.words;
8457 n_gpr = crtl->args.info.regno;
8458 n_fpr = crtl->args.info.sse_regno;
8459
8460 if (cfun->va_list_gpr_size)
8461 {
8462 type = TREE_TYPE (gpr);
8463 t = build2 (MODIFY_EXPR, type,
8464 gpr, build_int_cst (type, n_gpr * 8));
8465 TREE_SIDE_EFFECTS (t) = 1;
8466 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8467 }
8468
8469 if (TARGET_SSE && cfun->va_list_fpr_size)
8470 {
8471 type = TREE_TYPE (fpr);
8472 t = build2 (MODIFY_EXPR, type, fpr,
8473 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
8474 TREE_SIDE_EFFECTS (t) = 1;
8475 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8476 }
8477
8478 /* Find the overflow area. */
8479 type = TREE_TYPE (ovf);
8480 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8481 ovf_rtx = crtl->args.internal_arg_pointer;
8482 else
8483 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
8484 t = make_tree (type, ovf_rtx);
8485 if (words != 0)
8486 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
8487 t = build2 (MODIFY_EXPR, type, ovf, t);
8488 TREE_SIDE_EFFECTS (t) = 1;
8489 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8490
8491 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
8492 {
8493 /* Find the register save area.
8494 Prologue of the function save it right above stack frame. */
8495 type = TREE_TYPE (sav);
8496 t = make_tree (type, frame_pointer_rtx);
8497 if (!ix86_varargs_gpr_size)
8498 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
8499 t = build2 (MODIFY_EXPR, type, sav, t);
8500 TREE_SIDE_EFFECTS (t) = 1;
8501 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8502 }
8503 }
8504
8505 /* Implement va_arg. */
8506
8507 static tree
8508 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
8509 gimple_seq *post_p)
8510 {
8511 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
8512 tree f_gpr, f_fpr, f_ovf, f_sav;
8513 tree gpr, fpr, ovf, sav, t;
8514 int size, rsize;
8515 tree lab_false, lab_over = NULL_TREE;
8516 tree addr, t2;
8517 rtx container;
8518 int indirect_p = 0;
8519 tree ptrtype;
8520 enum machine_mode nat_mode;
8521 unsigned int arg_boundary;
8522
8523 /* Only 64bit target needs something special. */
8524 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8525 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
8526
8527 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8528 f_fpr = DECL_CHAIN (f_gpr);
8529 f_ovf = DECL_CHAIN (f_fpr);
8530 f_sav = DECL_CHAIN (f_ovf);
8531
8532 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
8533 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
8534 valist = build_va_arg_indirect_ref (valist);
8535 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
8536 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
8537 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
8538
8539 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
8540 if (indirect_p)
8541 type = build_pointer_type (type);
8542 size = int_size_in_bytes (type);
8543 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
8544
8545 nat_mode = type_natural_mode (type, NULL, false);
8546 switch (nat_mode)
8547 {
8548 case V8SFmode:
8549 case V8SImode:
8550 case V32QImode:
8551 case V16HImode:
8552 case V4DFmode:
8553 case V4DImode:
8554 case V16SFmode:
8555 case V16SImode:
8556 case V64QImode:
8557 case V32HImode:
8558 case V8DFmode:
8559 case V8DImode:
8560 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
8561 if (!TARGET_64BIT_MS_ABI)
8562 {
8563 container = NULL;
8564 break;
8565 }
8566
8567 default:
8568 container = construct_container (nat_mode, TYPE_MODE (type),
8569 type, 0, X86_64_REGPARM_MAX,
8570 X86_64_SSE_REGPARM_MAX, intreg,
8571 0);
8572 break;
8573 }
8574
8575 /* Pull the value out of the saved registers. */
8576
8577 addr = create_tmp_var (ptr_type_node, "addr");
8578
8579 if (container)
8580 {
8581 int needed_intregs, needed_sseregs;
8582 bool need_temp;
8583 tree int_addr, sse_addr;
8584
8585 lab_false = create_artificial_label (UNKNOWN_LOCATION);
8586 lab_over = create_artificial_label (UNKNOWN_LOCATION);
8587
8588 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
8589
8590 need_temp = (!REG_P (container)
8591 && ((needed_intregs && TYPE_ALIGN (type) > 64)
8592 || TYPE_ALIGN (type) > 128));
8593
8594 /* In case we are passing structure, verify that it is consecutive block
8595 on the register save area. If not we need to do moves. */
8596 if (!need_temp && !REG_P (container))
8597 {
8598 /* Verify that all registers are strictly consecutive */
8599 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
8600 {
8601 int i;
8602
8603 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8604 {
8605 rtx slot = XVECEXP (container, 0, i);
8606 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
8607 || INTVAL (XEXP (slot, 1)) != i * 16)
8608 need_temp = 1;
8609 }
8610 }
8611 else
8612 {
8613 int i;
8614
8615 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8616 {
8617 rtx slot = XVECEXP (container, 0, i);
8618 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
8619 || INTVAL (XEXP (slot, 1)) != i * 8)
8620 need_temp = 1;
8621 }
8622 }
8623 }
8624 if (!need_temp)
8625 {
8626 int_addr = addr;
8627 sse_addr = addr;
8628 }
8629 else
8630 {
8631 int_addr = create_tmp_var (ptr_type_node, "int_addr");
8632 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
8633 }
8634
8635 /* First ensure that we fit completely in registers. */
8636 if (needed_intregs)
8637 {
8638 t = build_int_cst (TREE_TYPE (gpr),
8639 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
8640 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
8641 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8642 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8643 gimplify_and_add (t, pre_p);
8644 }
8645 if (needed_sseregs)
8646 {
8647 t = build_int_cst (TREE_TYPE (fpr),
8648 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
8649 + X86_64_REGPARM_MAX * 8);
8650 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
8651 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8652 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8653 gimplify_and_add (t, pre_p);
8654 }
8655
8656 /* Compute index to start of area used for integer regs. */
8657 if (needed_intregs)
8658 {
8659 /* int_addr = gpr + sav; */
8660 t = fold_build_pointer_plus (sav, gpr);
8661 gimplify_assign (int_addr, t, pre_p);
8662 }
8663 if (needed_sseregs)
8664 {
8665 /* sse_addr = fpr + sav; */
8666 t = fold_build_pointer_plus (sav, fpr);
8667 gimplify_assign (sse_addr, t, pre_p);
8668 }
8669 if (need_temp)
8670 {
8671 int i, prev_size = 0;
8672 tree temp = create_tmp_var (type, "va_arg_tmp");
8673
8674 /* addr = &temp; */
8675 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
8676 gimplify_assign (addr, t, pre_p);
8677
8678 for (i = 0; i < XVECLEN (container, 0); i++)
8679 {
8680 rtx slot = XVECEXP (container, 0, i);
8681 rtx reg = XEXP (slot, 0);
8682 enum machine_mode mode = GET_MODE (reg);
8683 tree piece_type;
8684 tree addr_type;
8685 tree daddr_type;
8686 tree src_addr, src;
8687 int src_offset;
8688 tree dest_addr, dest;
8689 int cur_size = GET_MODE_SIZE (mode);
8690
8691 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
8692 prev_size = INTVAL (XEXP (slot, 1));
8693 if (prev_size + cur_size > size)
8694 {
8695 cur_size = size - prev_size;
8696 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
8697 if (mode == BLKmode)
8698 mode = QImode;
8699 }
8700 piece_type = lang_hooks.types.type_for_mode (mode, 1);
8701 if (mode == GET_MODE (reg))
8702 addr_type = build_pointer_type (piece_type);
8703 else
8704 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8705 true);
8706 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8707 true);
8708
8709 if (SSE_REGNO_P (REGNO (reg)))
8710 {
8711 src_addr = sse_addr;
8712 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
8713 }
8714 else
8715 {
8716 src_addr = int_addr;
8717 src_offset = REGNO (reg) * 8;
8718 }
8719 src_addr = fold_convert (addr_type, src_addr);
8720 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
8721
8722 dest_addr = fold_convert (daddr_type, addr);
8723 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
8724 if (cur_size == GET_MODE_SIZE (mode))
8725 {
8726 src = build_va_arg_indirect_ref (src_addr);
8727 dest = build_va_arg_indirect_ref (dest_addr);
8728
8729 gimplify_assign (dest, src, pre_p);
8730 }
8731 else
8732 {
8733 tree copy
8734 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
8735 3, dest_addr, src_addr,
8736 size_int (cur_size));
8737 gimplify_and_add (copy, pre_p);
8738 }
8739 prev_size += cur_size;
8740 }
8741 }
8742
8743 if (needed_intregs)
8744 {
8745 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
8746 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
8747 gimplify_assign (gpr, t, pre_p);
8748 }
8749
8750 if (needed_sseregs)
8751 {
8752 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
8753 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
8754 gimplify_assign (fpr, t, pre_p);
8755 }
8756
8757 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
8758
8759 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
8760 }
8761
8762 /* ... otherwise out of the overflow area. */
8763
8764 /* When we align parameter on stack for caller, if the parameter
8765 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
8766 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
8767 here with caller. */
8768 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
8769 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
8770 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
8771
8772 /* Care for on-stack alignment if needed. */
8773 if (arg_boundary <= 64 || size == 0)
8774 t = ovf;
8775 else
8776 {
8777 HOST_WIDE_INT align = arg_boundary / 8;
8778 t = fold_build_pointer_plus_hwi (ovf, align - 1);
8779 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
8780 build_int_cst (TREE_TYPE (t), -align));
8781 }
8782
8783 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
8784 gimplify_assign (addr, t, pre_p);
8785
8786 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
8787 gimplify_assign (unshare_expr (ovf), t, pre_p);
8788
8789 if (container)
8790 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
8791
8792 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
8793 addr = fold_convert (ptrtype, addr);
8794
8795 if (indirect_p)
8796 addr = build_va_arg_indirect_ref (addr);
8797 return build_va_arg_indirect_ref (addr);
8798 }
8799 \f
8800 /* Return true if OPNUM's MEM should be matched
8801 in movabs* patterns. */
8802
8803 bool
8804 ix86_check_movabs (rtx insn, int opnum)
8805 {
8806 rtx set, mem;
8807
8808 set = PATTERN (insn);
8809 if (GET_CODE (set) == PARALLEL)
8810 set = XVECEXP (set, 0, 0);
8811 gcc_assert (GET_CODE (set) == SET);
8812 mem = XEXP (set, opnum);
8813 while (GET_CODE (mem) == SUBREG)
8814 mem = SUBREG_REG (mem);
8815 gcc_assert (MEM_P (mem));
8816 return volatile_ok || !MEM_VOLATILE_P (mem);
8817 }
8818 \f
8819 /* Initialize the table of extra 80387 mathematical constants. */
8820
8821 static void
8822 init_ext_80387_constants (void)
8823 {
8824 static const char * cst[5] =
8825 {
8826 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8827 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8828 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8829 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8830 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8831 };
8832 int i;
8833
8834 for (i = 0; i < 5; i++)
8835 {
8836 real_from_string (&ext_80387_constants_table[i], cst[i]);
8837 /* Ensure each constant is rounded to XFmode precision. */
8838 real_convert (&ext_80387_constants_table[i],
8839 XFmode, &ext_80387_constants_table[i]);
8840 }
8841
8842 ext_80387_constants_init = 1;
8843 }
8844
8845 /* Return non-zero if the constant is something that
8846 can be loaded with a special instruction. */
8847
8848 int
8849 standard_80387_constant_p (rtx x)
8850 {
8851 enum machine_mode mode = GET_MODE (x);
8852
8853 REAL_VALUE_TYPE r;
8854
8855 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
8856 return -1;
8857
8858 if (x == CONST0_RTX (mode))
8859 return 1;
8860 if (x == CONST1_RTX (mode))
8861 return 2;
8862
8863 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8864
8865 /* For XFmode constants, try to find a special 80387 instruction when
8866 optimizing for size or on those CPUs that benefit from them. */
8867 if (mode == XFmode
8868 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
8869 {
8870 int i;
8871
8872 if (! ext_80387_constants_init)
8873 init_ext_80387_constants ();
8874
8875 for (i = 0; i < 5; i++)
8876 if (real_identical (&r, &ext_80387_constants_table[i]))
8877 return i + 3;
8878 }
8879
8880 /* Load of the constant -0.0 or -1.0 will be split as
8881 fldz;fchs or fld1;fchs sequence. */
8882 if (real_isnegzero (&r))
8883 return 8;
8884 if (real_identical (&r, &dconstm1))
8885 return 9;
8886
8887 return 0;
8888 }
8889
8890 /* Return the opcode of the special instruction to be used to load
8891 the constant X. */
8892
8893 const char *
8894 standard_80387_constant_opcode (rtx x)
8895 {
8896 switch (standard_80387_constant_p (x))
8897 {
8898 case 1:
8899 return "fldz";
8900 case 2:
8901 return "fld1";
8902 case 3:
8903 return "fldlg2";
8904 case 4:
8905 return "fldln2";
8906 case 5:
8907 return "fldl2e";
8908 case 6:
8909 return "fldl2t";
8910 case 7:
8911 return "fldpi";
8912 case 8:
8913 case 9:
8914 return "#";
8915 default:
8916 gcc_unreachable ();
8917 }
8918 }
8919
8920 /* Return the CONST_DOUBLE representing the 80387 constant that is
8921 loaded by the specified special instruction. The argument IDX
8922 matches the return value from standard_80387_constant_p. */
8923
8924 rtx
8925 standard_80387_constant_rtx (int idx)
8926 {
8927 int i;
8928
8929 if (! ext_80387_constants_init)
8930 init_ext_80387_constants ();
8931
8932 switch (idx)
8933 {
8934 case 3:
8935 case 4:
8936 case 5:
8937 case 6:
8938 case 7:
8939 i = idx - 3;
8940 break;
8941
8942 default:
8943 gcc_unreachable ();
8944 }
8945
8946 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
8947 XFmode);
8948 }
8949
8950 /* Return 1 if X is all 0s and 2 if x is all 1s
8951 in supported SSE/AVX vector mode. */
8952
8953 int
8954 standard_sse_constant_p (rtx x)
8955 {
8956 enum machine_mode mode = GET_MODE (x);
8957
8958 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
8959 return 1;
8960 if (vector_all_ones_operand (x, mode))
8961 switch (mode)
8962 {
8963 case V16QImode:
8964 case V8HImode:
8965 case V4SImode:
8966 case V2DImode:
8967 if (TARGET_SSE2)
8968 return 2;
8969 case V32QImode:
8970 case V16HImode:
8971 case V8SImode:
8972 case V4DImode:
8973 if (TARGET_AVX2)
8974 return 2;
8975 case V64QImode:
8976 case V32HImode:
8977 case V16SImode:
8978 case V8DImode:
8979 if (TARGET_AVX512F)
8980 return 2;
8981 default:
8982 break;
8983 }
8984
8985 return 0;
8986 }
8987
8988 /* Return the opcode of the special instruction to be used to load
8989 the constant X. */
8990
8991 const char *
8992 standard_sse_constant_opcode (rtx insn, rtx x)
8993 {
8994 switch (standard_sse_constant_p (x))
8995 {
8996 case 1:
8997 switch (get_attr_mode (insn))
8998 {
8999 case MODE_XI:
9000 case MODE_V16SF:
9001 return "vpxord\t%g0, %g0, %g0";
9002 case MODE_V8DF:
9003 return "vpxorq\t%g0, %g0, %g0";
9004 case MODE_TI:
9005 return "%vpxor\t%0, %d0";
9006 case MODE_V2DF:
9007 return "%vxorpd\t%0, %d0";
9008 case MODE_V4SF:
9009 return "%vxorps\t%0, %d0";
9010
9011 case MODE_OI:
9012 return "vpxor\t%x0, %x0, %x0";
9013 case MODE_V4DF:
9014 return "vxorpd\t%x0, %x0, %x0";
9015 case MODE_V8SF:
9016 return "vxorps\t%x0, %x0, %x0";
9017
9018 default:
9019 break;
9020 }
9021
9022 case 2:
9023 if (get_attr_mode (insn) == MODE_XI
9024 || get_attr_mode (insn) == MODE_V8DF
9025 || get_attr_mode (insn) == MODE_V16SF)
9026 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
9027 if (TARGET_AVX)
9028 return "vpcmpeqd\t%0, %0, %0";
9029 else
9030 return "pcmpeqd\t%0, %0";
9031
9032 default:
9033 break;
9034 }
9035 gcc_unreachable ();
9036 }
9037
9038 /* Returns true if OP contains a symbol reference */
9039
9040 bool
9041 symbolic_reference_mentioned_p (rtx op)
9042 {
9043 const char *fmt;
9044 int i;
9045
9046 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
9047 return true;
9048
9049 fmt = GET_RTX_FORMAT (GET_CODE (op));
9050 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
9051 {
9052 if (fmt[i] == 'E')
9053 {
9054 int j;
9055
9056 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
9057 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
9058 return true;
9059 }
9060
9061 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
9062 return true;
9063 }
9064
9065 return false;
9066 }
9067
9068 /* Return true if it is appropriate to emit `ret' instructions in the
9069 body of a function. Do this only if the epilogue is simple, needing a
9070 couple of insns. Prior to reloading, we can't tell how many registers
9071 must be saved, so return false then. Return false if there is no frame
9072 marker to de-allocate. */
9073
9074 bool
9075 ix86_can_use_return_insn_p (void)
9076 {
9077 struct ix86_frame frame;
9078
9079 if (! reload_completed || frame_pointer_needed)
9080 return 0;
9081
9082 /* Don't allow more than 32k pop, since that's all we can do
9083 with one instruction. */
9084 if (crtl->args.pops_args && crtl->args.size >= 32768)
9085 return 0;
9086
9087 ix86_compute_frame_layout (&frame);
9088 return (frame.stack_pointer_offset == UNITS_PER_WORD
9089 && (frame.nregs + frame.nsseregs) == 0);
9090 }
9091 \f
9092 /* Value should be nonzero if functions must have frame pointers.
9093 Zero means the frame pointer need not be set up (and parms may
9094 be accessed via the stack pointer) in functions that seem suitable. */
9095
9096 static bool
9097 ix86_frame_pointer_required (void)
9098 {
9099 /* If we accessed previous frames, then the generated code expects
9100 to be able to access the saved ebp value in our frame. */
9101 if (cfun->machine->accesses_prev_frame)
9102 return true;
9103
9104 /* Several x86 os'es need a frame pointer for other reasons,
9105 usually pertaining to setjmp. */
9106 if (SUBTARGET_FRAME_POINTER_REQUIRED)
9107 return true;
9108
9109 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
9110 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
9111 return true;
9112
9113 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
9114 allocation is 4GB. */
9115 if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
9116 return true;
9117
9118 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
9119 turns off the frame pointer by default. Turn it back on now if
9120 we've not got a leaf function. */
9121 if (TARGET_OMIT_LEAF_FRAME_POINTER
9122 && (!crtl->is_leaf
9123 || ix86_current_function_calls_tls_descriptor))
9124 return true;
9125
9126 if (crtl->profile && !flag_fentry)
9127 return true;
9128
9129 return false;
9130 }
9131
9132 /* Record that the current function accesses previous call frames. */
9133
9134 void
9135 ix86_setup_frame_addresses (void)
9136 {
9137 cfun->machine->accesses_prev_frame = 1;
9138 }
9139 \f
9140 #ifndef USE_HIDDEN_LINKONCE
9141 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
9142 # define USE_HIDDEN_LINKONCE 1
9143 # else
9144 # define USE_HIDDEN_LINKONCE 0
9145 # endif
9146 #endif
9147
9148 static int pic_labels_used;
9149
9150 /* Fills in the label name that should be used for a pc thunk for
9151 the given register. */
9152
9153 static void
9154 get_pc_thunk_name (char name[32], unsigned int regno)
9155 {
9156 gcc_assert (!TARGET_64BIT);
9157
9158 if (USE_HIDDEN_LINKONCE)
9159 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
9160 else
9161 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
9162 }
9163
9164
9165 /* This function generates code for -fpic that loads %ebx with
9166 the return address of the caller and then returns. */
9167
9168 static void
9169 ix86_code_end (void)
9170 {
9171 rtx xops[2];
9172 int regno;
9173
9174 for (regno = AX_REG; regno <= SP_REG; regno++)
9175 {
9176 char name[32];
9177 tree decl;
9178
9179 if (!(pic_labels_used & (1 << regno)))
9180 continue;
9181
9182 get_pc_thunk_name (name, regno);
9183
9184 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
9185 get_identifier (name),
9186 build_function_type_list (void_type_node, NULL_TREE));
9187 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
9188 NULL_TREE, void_type_node);
9189 TREE_PUBLIC (decl) = 1;
9190 TREE_STATIC (decl) = 1;
9191 DECL_IGNORED_P (decl) = 1;
9192
9193 #if TARGET_MACHO
9194 if (TARGET_MACHO)
9195 {
9196 switch_to_section (darwin_sections[text_coal_section]);
9197 fputs ("\t.weak_definition\t", asm_out_file);
9198 assemble_name (asm_out_file, name);
9199 fputs ("\n\t.private_extern\t", asm_out_file);
9200 assemble_name (asm_out_file, name);
9201 putc ('\n', asm_out_file);
9202 ASM_OUTPUT_LABEL (asm_out_file, name);
9203 DECL_WEAK (decl) = 1;
9204 }
9205 else
9206 #endif
9207 if (USE_HIDDEN_LINKONCE)
9208 {
9209 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
9210
9211 targetm.asm_out.unique_section (decl, 0);
9212 switch_to_section (get_named_section (decl, NULL, 0));
9213
9214 targetm.asm_out.globalize_label (asm_out_file, name);
9215 fputs ("\t.hidden\t", asm_out_file);
9216 assemble_name (asm_out_file, name);
9217 putc ('\n', asm_out_file);
9218 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
9219 }
9220 else
9221 {
9222 switch_to_section (text_section);
9223 ASM_OUTPUT_LABEL (asm_out_file, name);
9224 }
9225
9226 DECL_INITIAL (decl) = make_node (BLOCK);
9227 current_function_decl = decl;
9228 init_function_start (decl);
9229 first_function_block_is_cold = false;
9230 /* Make sure unwind info is emitted for the thunk if needed. */
9231 final_start_function (emit_barrier (), asm_out_file, 1);
9232
9233 /* Pad stack IP move with 4 instructions (two NOPs count
9234 as one instruction). */
9235 if (TARGET_PAD_SHORT_FUNCTION)
9236 {
9237 int i = 8;
9238
9239 while (i--)
9240 fputs ("\tnop\n", asm_out_file);
9241 }
9242
9243 xops[0] = gen_rtx_REG (Pmode, regno);
9244 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
9245 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
9246 fputs ("\tret\n", asm_out_file);
9247 final_end_function ();
9248 init_insn_lengths ();
9249 free_after_compilation (cfun);
9250 set_cfun (NULL);
9251 current_function_decl = NULL;
9252 }
9253
9254 if (flag_split_stack)
9255 file_end_indicate_split_stack ();
9256 }
9257
9258 /* Emit code for the SET_GOT patterns. */
9259
9260 const char *
9261 output_set_got (rtx dest, rtx label)
9262 {
9263 rtx xops[3];
9264
9265 xops[0] = dest;
9266
9267 if (TARGET_VXWORKS_RTP && flag_pic)
9268 {
9269 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
9270 xops[2] = gen_rtx_MEM (Pmode,
9271 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
9272 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
9273
9274 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
9275 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
9276 an unadorned address. */
9277 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
9278 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
9279 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
9280 return "";
9281 }
9282
9283 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
9284
9285 if (!flag_pic)
9286 {
9287 if (TARGET_MACHO)
9288 /* We don't need a pic base, we're not producing pic. */
9289 gcc_unreachable ();
9290
9291 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
9292 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
9293 targetm.asm_out.internal_label (asm_out_file, "L",
9294 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
9295 }
9296 else
9297 {
9298 char name[32];
9299 get_pc_thunk_name (name, REGNO (dest));
9300 pic_labels_used |= 1 << REGNO (dest);
9301
9302 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
9303 xops[2] = gen_rtx_MEM (QImode, xops[2]);
9304 output_asm_insn ("call\t%X2", xops);
9305
9306 #if TARGET_MACHO
9307 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
9308 This is what will be referenced by the Mach-O PIC subsystem. */
9309 if (machopic_should_output_picbase_label () || !label)
9310 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
9311
9312 /* When we are restoring the pic base at the site of a nonlocal label,
9313 and we decided to emit the pic base above, we will still output a
9314 local label used for calculating the correction offset (even though
9315 the offset will be 0 in that case). */
9316 if (label)
9317 targetm.asm_out.internal_label (asm_out_file, "L",
9318 CODE_LABEL_NUMBER (label));
9319 #endif
9320 }
9321
9322 if (!TARGET_MACHO)
9323 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
9324
9325 return "";
9326 }
9327
9328 /* Generate an "push" pattern for input ARG. */
9329
9330 static rtx
9331 gen_push (rtx arg)
9332 {
9333 struct machine_function *m = cfun->machine;
9334
9335 if (m->fs.cfa_reg == stack_pointer_rtx)
9336 m->fs.cfa_offset += UNITS_PER_WORD;
9337 m->fs.sp_offset += UNITS_PER_WORD;
9338
9339 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9340 arg = gen_rtx_REG (word_mode, REGNO (arg));
9341
9342 return gen_rtx_SET (VOIDmode,
9343 gen_rtx_MEM (word_mode,
9344 gen_rtx_PRE_DEC (Pmode,
9345 stack_pointer_rtx)),
9346 arg);
9347 }
9348
9349 /* Generate an "pop" pattern for input ARG. */
9350
9351 static rtx
9352 gen_pop (rtx arg)
9353 {
9354 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9355 arg = gen_rtx_REG (word_mode, REGNO (arg));
9356
9357 return gen_rtx_SET (VOIDmode,
9358 arg,
9359 gen_rtx_MEM (word_mode,
9360 gen_rtx_POST_INC (Pmode,
9361 stack_pointer_rtx)));
9362 }
9363
9364 /* Return >= 0 if there is an unused call-clobbered register available
9365 for the entire function. */
9366
9367 static unsigned int
9368 ix86_select_alt_pic_regnum (void)
9369 {
9370 if (crtl->is_leaf
9371 && !crtl->profile
9372 && !ix86_current_function_calls_tls_descriptor)
9373 {
9374 int i, drap;
9375 /* Can't use the same register for both PIC and DRAP. */
9376 if (crtl->drap_reg)
9377 drap = REGNO (crtl->drap_reg);
9378 else
9379 drap = -1;
9380 for (i = 2; i >= 0; --i)
9381 if (i != drap && !df_regs_ever_live_p (i))
9382 return i;
9383 }
9384
9385 return INVALID_REGNUM;
9386 }
9387
9388 /* Return TRUE if we need to save REGNO. */
9389
9390 static bool
9391 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
9392 {
9393 if (pic_offset_table_rtx
9394 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
9395 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
9396 || crtl->profile
9397 || crtl->calls_eh_return
9398 || crtl->uses_const_pool
9399 || cfun->has_nonlocal_label))
9400 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
9401
9402 if (crtl->calls_eh_return && maybe_eh_return)
9403 {
9404 unsigned i;
9405 for (i = 0; ; i++)
9406 {
9407 unsigned test = EH_RETURN_DATA_REGNO (i);
9408 if (test == INVALID_REGNUM)
9409 break;
9410 if (test == regno)
9411 return true;
9412 }
9413 }
9414
9415 if (crtl->drap_reg
9416 && regno == REGNO (crtl->drap_reg)
9417 && !cfun->machine->no_drap_save_restore)
9418 return true;
9419
9420 return (df_regs_ever_live_p (regno)
9421 && !call_used_regs[regno]
9422 && !fixed_regs[regno]
9423 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
9424 }
9425
9426 /* Return number of saved general prupose registers. */
9427
9428 static int
9429 ix86_nsaved_regs (void)
9430 {
9431 int nregs = 0;
9432 int regno;
9433
9434 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9435 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9436 nregs ++;
9437 return nregs;
9438 }
9439
9440 /* Return number of saved SSE registrers. */
9441
9442 static int
9443 ix86_nsaved_sseregs (void)
9444 {
9445 int nregs = 0;
9446 int regno;
9447
9448 if (!TARGET_64BIT_MS_ABI)
9449 return 0;
9450 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9451 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9452 nregs ++;
9453 return nregs;
9454 }
9455
9456 /* Given FROM and TO register numbers, say whether this elimination is
9457 allowed. If stack alignment is needed, we can only replace argument
9458 pointer with hard frame pointer, or replace frame pointer with stack
9459 pointer. Otherwise, frame pointer elimination is automatically
9460 handled and all other eliminations are valid. */
9461
9462 static bool
9463 ix86_can_eliminate (const int from, const int to)
9464 {
9465 if (stack_realign_fp)
9466 return ((from == ARG_POINTER_REGNUM
9467 && to == HARD_FRAME_POINTER_REGNUM)
9468 || (from == FRAME_POINTER_REGNUM
9469 && to == STACK_POINTER_REGNUM));
9470 else
9471 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
9472 }
9473
9474 /* Return the offset between two registers, one to be eliminated, and the other
9475 its replacement, at the start of a routine. */
9476
9477 HOST_WIDE_INT
9478 ix86_initial_elimination_offset (int from, int to)
9479 {
9480 struct ix86_frame frame;
9481 ix86_compute_frame_layout (&frame);
9482
9483 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9484 return frame.hard_frame_pointer_offset;
9485 else if (from == FRAME_POINTER_REGNUM
9486 && to == HARD_FRAME_POINTER_REGNUM)
9487 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
9488 else
9489 {
9490 gcc_assert (to == STACK_POINTER_REGNUM);
9491
9492 if (from == ARG_POINTER_REGNUM)
9493 return frame.stack_pointer_offset;
9494
9495 gcc_assert (from == FRAME_POINTER_REGNUM);
9496 return frame.stack_pointer_offset - frame.frame_pointer_offset;
9497 }
9498 }
9499
9500 /* In a dynamically-aligned function, we can't know the offset from
9501 stack pointer to frame pointer, so we must ensure that setjmp
9502 eliminates fp against the hard fp (%ebp) rather than trying to
9503 index from %esp up to the top of the frame across a gap that is
9504 of unknown (at compile-time) size. */
9505 static rtx
9506 ix86_builtin_setjmp_frame_value (void)
9507 {
9508 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
9509 }
9510
9511 /* When using -fsplit-stack, the allocation routines set a field in
9512 the TCB to the bottom of the stack plus this much space, measured
9513 in bytes. */
9514
9515 #define SPLIT_STACK_AVAILABLE 256
9516
9517 /* Fill structure ix86_frame about frame of currently computed function. */
9518
9519 static void
9520 ix86_compute_frame_layout (struct ix86_frame *frame)
9521 {
9522 unsigned HOST_WIDE_INT stack_alignment_needed;
9523 HOST_WIDE_INT offset;
9524 unsigned HOST_WIDE_INT preferred_alignment;
9525 HOST_WIDE_INT size = get_frame_size ();
9526 HOST_WIDE_INT to_allocate;
9527
9528 frame->nregs = ix86_nsaved_regs ();
9529 frame->nsseregs = ix86_nsaved_sseregs ();
9530
9531 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
9532 function prologues and leaf. */
9533 if ((TARGET_64BIT_MS_ABI && crtl->preferred_stack_boundary < 128)
9534 && (!crtl->is_leaf || cfun->calls_alloca != 0
9535 || ix86_current_function_calls_tls_descriptor))
9536 {
9537 crtl->preferred_stack_boundary = 128;
9538 crtl->stack_alignment_needed = 128;
9539 }
9540 /* preferred_stack_boundary is never updated for call
9541 expanded from tls descriptor. Update it here. We don't update it in
9542 expand stage because according to the comments before
9543 ix86_current_function_calls_tls_descriptor, tls calls may be optimized
9544 away. */
9545 else if (ix86_current_function_calls_tls_descriptor
9546 && crtl->preferred_stack_boundary < PREFERRED_STACK_BOUNDARY)
9547 {
9548 crtl->preferred_stack_boundary = PREFERRED_STACK_BOUNDARY;
9549 if (crtl->stack_alignment_needed < PREFERRED_STACK_BOUNDARY)
9550 crtl->stack_alignment_needed = PREFERRED_STACK_BOUNDARY;
9551 }
9552
9553 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
9554 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
9555
9556 gcc_assert (!size || stack_alignment_needed);
9557 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
9558 gcc_assert (preferred_alignment <= stack_alignment_needed);
9559
9560 /* For SEH we have to limit the amount of code movement into the prologue.
9561 At present we do this via a BLOCKAGE, at which point there's very little
9562 scheduling that can be done, which means that there's very little point
9563 in doing anything except PUSHs. */
9564 if (TARGET_SEH)
9565 cfun->machine->use_fast_prologue_epilogue = false;
9566
9567 /* During reload iteration the amount of registers saved can change.
9568 Recompute the value as needed. Do not recompute when amount of registers
9569 didn't change as reload does multiple calls to the function and does not
9570 expect the decision to change within single iteration. */
9571 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun))
9572 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
9573 {
9574 int count = frame->nregs;
9575 struct cgraph_node *node = cgraph_node::get (current_function_decl);
9576
9577 cfun->machine->use_fast_prologue_epilogue_nregs = count;
9578
9579 /* The fast prologue uses move instead of push to save registers. This
9580 is significantly longer, but also executes faster as modern hardware
9581 can execute the moves in parallel, but can't do that for push/pop.
9582
9583 Be careful about choosing what prologue to emit: When function takes
9584 many instructions to execute we may use slow version as well as in
9585 case function is known to be outside hot spot (this is known with
9586 feedback only). Weight the size of function by number of registers
9587 to save as it is cheap to use one or two push instructions but very
9588 slow to use many of them. */
9589 if (count)
9590 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
9591 if (node->frequency < NODE_FREQUENCY_NORMAL
9592 || (flag_branch_probabilities
9593 && node->frequency < NODE_FREQUENCY_HOT))
9594 cfun->machine->use_fast_prologue_epilogue = false;
9595 else
9596 cfun->machine->use_fast_prologue_epilogue
9597 = !expensive_function_p (count);
9598 }
9599
9600 frame->save_regs_using_mov
9601 = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
9602 /* If static stack checking is enabled and done with probes,
9603 the registers need to be saved before allocating the frame. */
9604 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
9605
9606 /* Skip return address. */
9607 offset = UNITS_PER_WORD;
9608
9609 /* Skip pushed static chain. */
9610 if (ix86_static_chain_on_stack)
9611 offset += UNITS_PER_WORD;
9612
9613 /* Skip saved base pointer. */
9614 if (frame_pointer_needed)
9615 offset += UNITS_PER_WORD;
9616 frame->hfp_save_offset = offset;
9617
9618 /* The traditional frame pointer location is at the top of the frame. */
9619 frame->hard_frame_pointer_offset = offset;
9620
9621 /* Register save area */
9622 offset += frame->nregs * UNITS_PER_WORD;
9623 frame->reg_save_offset = offset;
9624
9625 /* On SEH target, registers are pushed just before the frame pointer
9626 location. */
9627 if (TARGET_SEH)
9628 frame->hard_frame_pointer_offset = offset;
9629
9630 /* Align and set SSE register save area. */
9631 if (frame->nsseregs)
9632 {
9633 /* The only ABI that has saved SSE registers (Win64) also has a
9634 16-byte aligned default stack, and thus we don't need to be
9635 within the re-aligned local stack frame to save them. */
9636 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
9637 offset = (offset + 16 - 1) & -16;
9638 offset += frame->nsseregs * 16;
9639 }
9640 frame->sse_reg_save_offset = offset;
9641
9642 /* The re-aligned stack starts here. Values before this point are not
9643 directly comparable with values below this point. In order to make
9644 sure that no value happens to be the same before and after, force
9645 the alignment computation below to add a non-zero value. */
9646 if (stack_realign_fp)
9647 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
9648
9649 /* Va-arg area */
9650 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
9651 offset += frame->va_arg_size;
9652
9653 /* Align start of frame for local function. */
9654 if (stack_realign_fp
9655 || offset != frame->sse_reg_save_offset
9656 || size != 0
9657 || !crtl->is_leaf
9658 || cfun->calls_alloca
9659 || ix86_current_function_calls_tls_descriptor)
9660 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
9661
9662 /* Frame pointer points here. */
9663 frame->frame_pointer_offset = offset;
9664
9665 offset += size;
9666
9667 /* Add outgoing arguments area. Can be skipped if we eliminated
9668 all the function calls as dead code.
9669 Skipping is however impossible when function calls alloca. Alloca
9670 expander assumes that last crtl->outgoing_args_size
9671 of stack frame are unused. */
9672 if (ACCUMULATE_OUTGOING_ARGS
9673 && (!crtl->is_leaf || cfun->calls_alloca
9674 || ix86_current_function_calls_tls_descriptor))
9675 {
9676 offset += crtl->outgoing_args_size;
9677 frame->outgoing_arguments_size = crtl->outgoing_args_size;
9678 }
9679 else
9680 frame->outgoing_arguments_size = 0;
9681
9682 /* Align stack boundary. Only needed if we're calling another function
9683 or using alloca. */
9684 if (!crtl->is_leaf || cfun->calls_alloca
9685 || ix86_current_function_calls_tls_descriptor)
9686 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
9687
9688 /* We've reached end of stack frame. */
9689 frame->stack_pointer_offset = offset;
9690
9691 /* Size prologue needs to allocate. */
9692 to_allocate = offset - frame->sse_reg_save_offset;
9693
9694 if ((!to_allocate && frame->nregs <= 1)
9695 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
9696 frame->save_regs_using_mov = false;
9697
9698 if (ix86_using_red_zone ()
9699 && crtl->sp_is_unchanging
9700 && crtl->is_leaf
9701 && !ix86_current_function_calls_tls_descriptor)
9702 {
9703 frame->red_zone_size = to_allocate;
9704 if (frame->save_regs_using_mov)
9705 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
9706 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
9707 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
9708 }
9709 else
9710 frame->red_zone_size = 0;
9711 frame->stack_pointer_offset -= frame->red_zone_size;
9712
9713 /* The SEH frame pointer location is near the bottom of the frame.
9714 This is enforced by the fact that the difference between the
9715 stack pointer and the frame pointer is limited to 240 bytes in
9716 the unwind data structure. */
9717 if (TARGET_SEH)
9718 {
9719 HOST_WIDE_INT diff;
9720
9721 /* If we can leave the frame pointer where it is, do so. Also, returns
9722 the establisher frame for __builtin_frame_address (0). */
9723 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
9724 if (diff <= SEH_MAX_FRAME_SIZE
9725 && (diff > 240 || (diff & 15) != 0)
9726 && !crtl->accesses_prior_frames)
9727 {
9728 /* Ideally we'd determine what portion of the local stack frame
9729 (within the constraint of the lowest 240) is most heavily used.
9730 But without that complication, simply bias the frame pointer
9731 by 128 bytes so as to maximize the amount of the local stack
9732 frame that is addressable with 8-bit offsets. */
9733 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
9734 }
9735 }
9736 }
9737
9738 /* This is semi-inlined memory_address_length, but simplified
9739 since we know that we're always dealing with reg+offset, and
9740 to avoid having to create and discard all that rtl. */
9741
9742 static inline int
9743 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
9744 {
9745 int len = 4;
9746
9747 if (offset == 0)
9748 {
9749 /* EBP and R13 cannot be encoded without an offset. */
9750 len = (regno == BP_REG || regno == R13_REG);
9751 }
9752 else if (IN_RANGE (offset, -128, 127))
9753 len = 1;
9754
9755 /* ESP and R12 must be encoded with a SIB byte. */
9756 if (regno == SP_REG || regno == R12_REG)
9757 len++;
9758
9759 return len;
9760 }
9761
9762 /* Return an RTX that points to CFA_OFFSET within the stack frame.
9763 The valid base registers are taken from CFUN->MACHINE->FS. */
9764
9765 static rtx
9766 choose_baseaddr (HOST_WIDE_INT cfa_offset)
9767 {
9768 const struct machine_function *m = cfun->machine;
9769 rtx base_reg = NULL;
9770 HOST_WIDE_INT base_offset = 0;
9771
9772 if (m->use_fast_prologue_epilogue)
9773 {
9774 /* Choose the base register most likely to allow the most scheduling
9775 opportunities. Generally FP is valid throughout the function,
9776 while DRAP must be reloaded within the epilogue. But choose either
9777 over the SP due to increased encoding size. */
9778
9779 if (m->fs.fp_valid)
9780 {
9781 base_reg = hard_frame_pointer_rtx;
9782 base_offset = m->fs.fp_offset - cfa_offset;
9783 }
9784 else if (m->fs.drap_valid)
9785 {
9786 base_reg = crtl->drap_reg;
9787 base_offset = 0 - cfa_offset;
9788 }
9789 else if (m->fs.sp_valid)
9790 {
9791 base_reg = stack_pointer_rtx;
9792 base_offset = m->fs.sp_offset - cfa_offset;
9793 }
9794 }
9795 else
9796 {
9797 HOST_WIDE_INT toffset;
9798 int len = 16, tlen;
9799
9800 /* Choose the base register with the smallest address encoding.
9801 With a tie, choose FP > DRAP > SP. */
9802 if (m->fs.sp_valid)
9803 {
9804 base_reg = stack_pointer_rtx;
9805 base_offset = m->fs.sp_offset - cfa_offset;
9806 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
9807 }
9808 if (m->fs.drap_valid)
9809 {
9810 toffset = 0 - cfa_offset;
9811 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
9812 if (tlen <= len)
9813 {
9814 base_reg = crtl->drap_reg;
9815 base_offset = toffset;
9816 len = tlen;
9817 }
9818 }
9819 if (m->fs.fp_valid)
9820 {
9821 toffset = m->fs.fp_offset - cfa_offset;
9822 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
9823 if (tlen <= len)
9824 {
9825 base_reg = hard_frame_pointer_rtx;
9826 base_offset = toffset;
9827 len = tlen;
9828 }
9829 }
9830 }
9831 gcc_assert (base_reg != NULL);
9832
9833 return plus_constant (Pmode, base_reg, base_offset);
9834 }
9835
9836 /* Emit code to save registers in the prologue. */
9837
9838 static void
9839 ix86_emit_save_regs (void)
9840 {
9841 unsigned int regno;
9842 rtx insn;
9843
9844 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
9845 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9846 {
9847 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
9848 RTX_FRAME_RELATED_P (insn) = 1;
9849 }
9850 }
9851
9852 /* Emit a single register save at CFA - CFA_OFFSET. */
9853
9854 static void
9855 ix86_emit_save_reg_using_mov (enum machine_mode mode, unsigned int regno,
9856 HOST_WIDE_INT cfa_offset)
9857 {
9858 struct machine_function *m = cfun->machine;
9859 rtx reg = gen_rtx_REG (mode, regno);
9860 rtx mem, addr, base, insn;
9861
9862 addr = choose_baseaddr (cfa_offset);
9863 mem = gen_frame_mem (mode, addr);
9864
9865 /* For SSE saves, we need to indicate the 128-bit alignment. */
9866 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
9867
9868 insn = emit_move_insn (mem, reg);
9869 RTX_FRAME_RELATED_P (insn) = 1;
9870
9871 base = addr;
9872 if (GET_CODE (base) == PLUS)
9873 base = XEXP (base, 0);
9874 gcc_checking_assert (REG_P (base));
9875
9876 /* When saving registers into a re-aligned local stack frame, avoid
9877 any tricky guessing by dwarf2out. */
9878 if (m->fs.realigned)
9879 {
9880 gcc_checking_assert (stack_realign_drap);
9881
9882 if (regno == REGNO (crtl->drap_reg))
9883 {
9884 /* A bit of a hack. We force the DRAP register to be saved in
9885 the re-aligned stack frame, which provides us with a copy
9886 of the CFA that will last past the prologue. Install it. */
9887 gcc_checking_assert (cfun->machine->fs.fp_valid);
9888 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
9889 cfun->machine->fs.fp_offset - cfa_offset);
9890 mem = gen_rtx_MEM (mode, addr);
9891 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
9892 }
9893 else
9894 {
9895 /* The frame pointer is a stable reference within the
9896 aligned frame. Use it. */
9897 gcc_checking_assert (cfun->machine->fs.fp_valid);
9898 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
9899 cfun->machine->fs.fp_offset - cfa_offset);
9900 mem = gen_rtx_MEM (mode, addr);
9901 add_reg_note (insn, REG_CFA_EXPRESSION,
9902 gen_rtx_SET (VOIDmode, mem, reg));
9903 }
9904 }
9905
9906 /* The memory may not be relative to the current CFA register,
9907 which means that we may need to generate a new pattern for
9908 use by the unwind info. */
9909 else if (base != m->fs.cfa_reg)
9910 {
9911 addr = plus_constant (Pmode, m->fs.cfa_reg,
9912 m->fs.cfa_offset - cfa_offset);
9913 mem = gen_rtx_MEM (mode, addr);
9914 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
9915 }
9916 }
9917
9918 /* Emit code to save registers using MOV insns.
9919 First register is stored at CFA - CFA_OFFSET. */
9920 static void
9921 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
9922 {
9923 unsigned int regno;
9924
9925 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9926 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9927 {
9928 ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
9929 cfa_offset -= UNITS_PER_WORD;
9930 }
9931 }
9932
9933 /* Emit code to save SSE registers using MOV insns.
9934 First register is stored at CFA - CFA_OFFSET. */
9935 static void
9936 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
9937 {
9938 unsigned int regno;
9939
9940 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9941 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9942 {
9943 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
9944 cfa_offset -= 16;
9945 }
9946 }
9947
9948 static GTY(()) rtx queued_cfa_restores;
9949
9950 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9951 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9952 Don't add the note if the previously saved value will be left untouched
9953 within stack red-zone till return, as unwinders can find the same value
9954 in the register and on the stack. */
9955
9956 static void
9957 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
9958 {
9959 if (!crtl->shrink_wrapped
9960 && cfa_offset <= cfun->machine->fs.red_zone_offset)
9961 return;
9962
9963 if (insn)
9964 {
9965 add_reg_note (insn, REG_CFA_RESTORE, reg);
9966 RTX_FRAME_RELATED_P (insn) = 1;
9967 }
9968 else
9969 queued_cfa_restores
9970 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
9971 }
9972
9973 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9974
9975 static void
9976 ix86_add_queued_cfa_restore_notes (rtx insn)
9977 {
9978 rtx last;
9979 if (!queued_cfa_restores)
9980 return;
9981 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
9982 ;
9983 XEXP (last, 1) = REG_NOTES (insn);
9984 REG_NOTES (insn) = queued_cfa_restores;
9985 queued_cfa_restores = NULL_RTX;
9986 RTX_FRAME_RELATED_P (insn) = 1;
9987 }
9988
9989 /* Expand prologue or epilogue stack adjustment.
9990 The pattern exist to put a dependency on all ebp-based memory accesses.
9991 STYLE should be negative if instructions should be marked as frame related,
9992 zero if %r11 register is live and cannot be freely used and positive
9993 otherwise. */
9994
9995 static void
9996 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
9997 int style, bool set_cfa)
9998 {
9999 struct machine_function *m = cfun->machine;
10000 rtx insn;
10001 bool add_frame_related_expr = false;
10002
10003 if (Pmode == SImode)
10004 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
10005 else if (x86_64_immediate_operand (offset, DImode))
10006 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
10007 else
10008 {
10009 rtx tmp;
10010 /* r11 is used by indirect sibcall return as well, set before the
10011 epilogue and used after the epilogue. */
10012 if (style)
10013 tmp = gen_rtx_REG (DImode, R11_REG);
10014 else
10015 {
10016 gcc_assert (src != hard_frame_pointer_rtx
10017 && dest != hard_frame_pointer_rtx);
10018 tmp = hard_frame_pointer_rtx;
10019 }
10020 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
10021 if (style < 0)
10022 add_frame_related_expr = true;
10023
10024 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
10025 }
10026
10027 insn = emit_insn (insn);
10028 if (style >= 0)
10029 ix86_add_queued_cfa_restore_notes (insn);
10030
10031 if (set_cfa)
10032 {
10033 rtx r;
10034
10035 gcc_assert (m->fs.cfa_reg == src);
10036 m->fs.cfa_offset += INTVAL (offset);
10037 m->fs.cfa_reg = dest;
10038
10039 r = gen_rtx_PLUS (Pmode, src, offset);
10040 r = gen_rtx_SET (VOIDmode, dest, r);
10041 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
10042 RTX_FRAME_RELATED_P (insn) = 1;
10043 }
10044 else if (style < 0)
10045 {
10046 RTX_FRAME_RELATED_P (insn) = 1;
10047 if (add_frame_related_expr)
10048 {
10049 rtx r = gen_rtx_PLUS (Pmode, src, offset);
10050 r = gen_rtx_SET (VOIDmode, dest, r);
10051 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
10052 }
10053 }
10054
10055 if (dest == stack_pointer_rtx)
10056 {
10057 HOST_WIDE_INT ooffset = m->fs.sp_offset;
10058 bool valid = m->fs.sp_valid;
10059
10060 if (src == hard_frame_pointer_rtx)
10061 {
10062 valid = m->fs.fp_valid;
10063 ooffset = m->fs.fp_offset;
10064 }
10065 else if (src == crtl->drap_reg)
10066 {
10067 valid = m->fs.drap_valid;
10068 ooffset = 0;
10069 }
10070 else
10071 {
10072 /* Else there are two possibilities: SP itself, which we set
10073 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
10074 taken care of this by hand along the eh_return path. */
10075 gcc_checking_assert (src == stack_pointer_rtx
10076 || offset == const0_rtx);
10077 }
10078
10079 m->fs.sp_offset = ooffset - INTVAL (offset);
10080 m->fs.sp_valid = valid;
10081 }
10082 }
10083
10084 /* Find an available register to be used as dynamic realign argument
10085 pointer regsiter. Such a register will be written in prologue and
10086 used in begin of body, so it must not be
10087 1. parameter passing register.
10088 2. GOT pointer.
10089 We reuse static-chain register if it is available. Otherwise, we
10090 use DI for i386 and R13 for x86-64. We chose R13 since it has
10091 shorter encoding.
10092
10093 Return: the regno of chosen register. */
10094
10095 static unsigned int
10096 find_drap_reg (void)
10097 {
10098 tree decl = cfun->decl;
10099
10100 if (TARGET_64BIT)
10101 {
10102 /* Use R13 for nested function or function need static chain.
10103 Since function with tail call may use any caller-saved
10104 registers in epilogue, DRAP must not use caller-saved
10105 register in such case. */
10106 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10107 return R13_REG;
10108
10109 return R10_REG;
10110 }
10111 else
10112 {
10113 /* Use DI for nested function or function need static chain.
10114 Since function with tail call may use any caller-saved
10115 registers in epilogue, DRAP must not use caller-saved
10116 register in such case. */
10117 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10118 return DI_REG;
10119
10120 /* Reuse static chain register if it isn't used for parameter
10121 passing. */
10122 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
10123 {
10124 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
10125 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
10126 return CX_REG;
10127 }
10128 return DI_REG;
10129 }
10130 }
10131
10132 /* Return minimum incoming stack alignment. */
10133
10134 static unsigned int
10135 ix86_minimum_incoming_stack_boundary (bool sibcall)
10136 {
10137 unsigned int incoming_stack_boundary;
10138
10139 /* Prefer the one specified at command line. */
10140 if (ix86_user_incoming_stack_boundary)
10141 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
10142 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
10143 if -mstackrealign is used, it isn't used for sibcall check and
10144 estimated stack alignment is 128bit. */
10145 else if (!sibcall
10146 && !TARGET_64BIT
10147 && ix86_force_align_arg_pointer
10148 && crtl->stack_alignment_estimated == 128)
10149 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10150 else
10151 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
10152
10153 /* Incoming stack alignment can be changed on individual functions
10154 via force_align_arg_pointer attribute. We use the smallest
10155 incoming stack boundary. */
10156 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
10157 && lookup_attribute (ix86_force_align_arg_pointer_string,
10158 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
10159 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10160
10161 /* The incoming stack frame has to be aligned at least at
10162 parm_stack_boundary. */
10163 if (incoming_stack_boundary < crtl->parm_stack_boundary)
10164 incoming_stack_boundary = crtl->parm_stack_boundary;
10165
10166 /* Stack at entrance of main is aligned by runtime. We use the
10167 smallest incoming stack boundary. */
10168 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
10169 && DECL_NAME (current_function_decl)
10170 && MAIN_NAME_P (DECL_NAME (current_function_decl))
10171 && DECL_FILE_SCOPE_P (current_function_decl))
10172 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
10173
10174 return incoming_stack_boundary;
10175 }
10176
10177 /* Update incoming stack boundary and estimated stack alignment. */
10178
10179 static void
10180 ix86_update_stack_boundary (void)
10181 {
10182 ix86_incoming_stack_boundary
10183 = ix86_minimum_incoming_stack_boundary (false);
10184
10185 /* x86_64 vararg needs 16byte stack alignment for register save
10186 area. */
10187 if (TARGET_64BIT
10188 && cfun->stdarg
10189 && crtl->stack_alignment_estimated < 128)
10190 crtl->stack_alignment_estimated = 128;
10191 }
10192
10193 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
10194 needed or an rtx for DRAP otherwise. */
10195
10196 static rtx
10197 ix86_get_drap_rtx (void)
10198 {
10199 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
10200 crtl->need_drap = true;
10201
10202 if (stack_realign_drap)
10203 {
10204 /* Assign DRAP to vDRAP and returns vDRAP */
10205 unsigned int regno = find_drap_reg ();
10206 rtx drap_vreg;
10207 rtx arg_ptr;
10208 rtx seq, insn;
10209
10210 arg_ptr = gen_rtx_REG (Pmode, regno);
10211 crtl->drap_reg = arg_ptr;
10212
10213 start_sequence ();
10214 drap_vreg = copy_to_reg (arg_ptr);
10215 seq = get_insns ();
10216 end_sequence ();
10217
10218 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
10219 if (!optimize)
10220 {
10221 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
10222 RTX_FRAME_RELATED_P (insn) = 1;
10223 }
10224 return drap_vreg;
10225 }
10226 else
10227 return NULL;
10228 }
10229
10230 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
10231
10232 static rtx
10233 ix86_internal_arg_pointer (void)
10234 {
10235 return virtual_incoming_args_rtx;
10236 }
10237
10238 struct scratch_reg {
10239 rtx reg;
10240 bool saved;
10241 };
10242
10243 /* Return a short-lived scratch register for use on function entry.
10244 In 32-bit mode, it is valid only after the registers are saved
10245 in the prologue. This register must be released by means of
10246 release_scratch_register_on_entry once it is dead. */
10247
10248 static void
10249 get_scratch_register_on_entry (struct scratch_reg *sr)
10250 {
10251 int regno;
10252
10253 sr->saved = false;
10254
10255 if (TARGET_64BIT)
10256 {
10257 /* We always use R11 in 64-bit mode. */
10258 regno = R11_REG;
10259 }
10260 else
10261 {
10262 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
10263 bool fastcall_p
10264 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10265 bool thiscall_p
10266 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10267 bool static_chain_p = DECL_STATIC_CHAIN (decl);
10268 int regparm = ix86_function_regparm (fntype, decl);
10269 int drap_regno
10270 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
10271
10272 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
10273 for the static chain register. */
10274 if ((regparm < 1 || (fastcall_p && !static_chain_p))
10275 && drap_regno != AX_REG)
10276 regno = AX_REG;
10277 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
10278 for the static chain register. */
10279 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
10280 regno = AX_REG;
10281 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
10282 regno = DX_REG;
10283 /* ecx is the static chain register. */
10284 else if (regparm < 3 && !fastcall_p && !thiscall_p
10285 && !static_chain_p
10286 && drap_regno != CX_REG)
10287 regno = CX_REG;
10288 else if (ix86_save_reg (BX_REG, true))
10289 regno = BX_REG;
10290 /* esi is the static chain register. */
10291 else if (!(regparm == 3 && static_chain_p)
10292 && ix86_save_reg (SI_REG, true))
10293 regno = SI_REG;
10294 else if (ix86_save_reg (DI_REG, true))
10295 regno = DI_REG;
10296 else
10297 {
10298 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
10299 sr->saved = true;
10300 }
10301 }
10302
10303 sr->reg = gen_rtx_REG (Pmode, regno);
10304 if (sr->saved)
10305 {
10306 rtx insn = emit_insn (gen_push (sr->reg));
10307 RTX_FRAME_RELATED_P (insn) = 1;
10308 }
10309 }
10310
10311 /* Release a scratch register obtained from the preceding function. */
10312
10313 static void
10314 release_scratch_register_on_entry (struct scratch_reg *sr)
10315 {
10316 if (sr->saved)
10317 {
10318 struct machine_function *m = cfun->machine;
10319 rtx x, insn = emit_insn (gen_pop (sr->reg));
10320
10321 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
10322 RTX_FRAME_RELATED_P (insn) = 1;
10323 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
10324 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10325 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
10326 m->fs.sp_offset -= UNITS_PER_WORD;
10327 }
10328 }
10329
10330 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
10331
10332 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
10333
10334 static void
10335 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
10336 {
10337 /* We skip the probe for the first interval + a small dope of 4 words and
10338 probe that many bytes past the specified size to maintain a protection
10339 area at the botton of the stack. */
10340 const int dope = 4 * UNITS_PER_WORD;
10341 rtx size_rtx = GEN_INT (size), last;
10342
10343 /* See if we have a constant small number of probes to generate. If so,
10344 that's the easy case. The run-time loop is made up of 11 insns in the
10345 generic case while the compile-time loop is made up of 3+2*(n-1) insns
10346 for n # of intervals. */
10347 if (size <= 5 * PROBE_INTERVAL)
10348 {
10349 HOST_WIDE_INT i, adjust;
10350 bool first_probe = true;
10351
10352 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
10353 values of N from 1 until it exceeds SIZE. If only one probe is
10354 needed, this will not generate any code. Then adjust and probe
10355 to PROBE_INTERVAL + SIZE. */
10356 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10357 {
10358 if (first_probe)
10359 {
10360 adjust = 2 * PROBE_INTERVAL + dope;
10361 first_probe = false;
10362 }
10363 else
10364 adjust = PROBE_INTERVAL;
10365
10366 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10367 plus_constant (Pmode, stack_pointer_rtx,
10368 -adjust)));
10369 emit_stack_probe (stack_pointer_rtx);
10370 }
10371
10372 if (first_probe)
10373 adjust = size + PROBE_INTERVAL + dope;
10374 else
10375 adjust = size + PROBE_INTERVAL - i;
10376
10377 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10378 plus_constant (Pmode, stack_pointer_rtx,
10379 -adjust)));
10380 emit_stack_probe (stack_pointer_rtx);
10381
10382 /* Adjust back to account for the additional first interval. */
10383 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10384 plus_constant (Pmode, stack_pointer_rtx,
10385 PROBE_INTERVAL + dope)));
10386 }
10387
10388 /* Otherwise, do the same as above, but in a loop. Note that we must be
10389 extra careful with variables wrapping around because we might be at
10390 the very top (or the very bottom) of the address space and we have
10391 to be able to handle this case properly; in particular, we use an
10392 equality test for the loop condition. */
10393 else
10394 {
10395 HOST_WIDE_INT rounded_size;
10396 struct scratch_reg sr;
10397
10398 get_scratch_register_on_entry (&sr);
10399
10400
10401 /* Step 1: round SIZE to the previous multiple of the interval. */
10402
10403 rounded_size = size & -PROBE_INTERVAL;
10404
10405
10406 /* Step 2: compute initial and final value of the loop counter. */
10407
10408 /* SP = SP_0 + PROBE_INTERVAL. */
10409 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10410 plus_constant (Pmode, stack_pointer_rtx,
10411 - (PROBE_INTERVAL + dope))));
10412
10413 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
10414 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
10415 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
10416 gen_rtx_PLUS (Pmode, sr.reg,
10417 stack_pointer_rtx)));
10418
10419
10420 /* Step 3: the loop
10421
10422 while (SP != LAST_ADDR)
10423 {
10424 SP = SP + PROBE_INTERVAL
10425 probe at SP
10426 }
10427
10428 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
10429 values of N from 1 until it is equal to ROUNDED_SIZE. */
10430
10431 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
10432
10433
10434 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
10435 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
10436
10437 if (size != rounded_size)
10438 {
10439 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10440 plus_constant (Pmode, stack_pointer_rtx,
10441 rounded_size - size)));
10442 emit_stack_probe (stack_pointer_rtx);
10443 }
10444
10445 /* Adjust back to account for the additional first interval. */
10446 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10447 plus_constant (Pmode, stack_pointer_rtx,
10448 PROBE_INTERVAL + dope)));
10449
10450 release_scratch_register_on_entry (&sr);
10451 }
10452
10453 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
10454
10455 /* Even if the stack pointer isn't the CFA register, we need to correctly
10456 describe the adjustments made to it, in particular differentiate the
10457 frame-related ones from the frame-unrelated ones. */
10458 if (size > 0)
10459 {
10460 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
10461 XVECEXP (expr, 0, 0)
10462 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10463 plus_constant (Pmode, stack_pointer_rtx, -size));
10464 XVECEXP (expr, 0, 1)
10465 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10466 plus_constant (Pmode, stack_pointer_rtx,
10467 PROBE_INTERVAL + dope + size));
10468 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
10469 RTX_FRAME_RELATED_P (last) = 1;
10470
10471 cfun->machine->fs.sp_offset += size;
10472 }
10473
10474 /* Make sure nothing is scheduled before we are done. */
10475 emit_insn (gen_blockage ());
10476 }
10477
10478 /* Adjust the stack pointer up to REG while probing it. */
10479
10480 const char *
10481 output_adjust_stack_and_probe (rtx reg)
10482 {
10483 static int labelno = 0;
10484 char loop_lab[32], end_lab[32];
10485 rtx xops[2];
10486
10487 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10488 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10489
10490 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10491
10492 /* Jump to END_LAB if SP == LAST_ADDR. */
10493 xops[0] = stack_pointer_rtx;
10494 xops[1] = reg;
10495 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10496 fputs ("\tje\t", asm_out_file);
10497 assemble_name_raw (asm_out_file, end_lab);
10498 fputc ('\n', asm_out_file);
10499
10500 /* SP = SP + PROBE_INTERVAL. */
10501 xops[1] = GEN_INT (PROBE_INTERVAL);
10502 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10503
10504 /* Probe at SP. */
10505 xops[1] = const0_rtx;
10506 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
10507
10508 fprintf (asm_out_file, "\tjmp\t");
10509 assemble_name_raw (asm_out_file, loop_lab);
10510 fputc ('\n', asm_out_file);
10511
10512 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10513
10514 return "";
10515 }
10516
10517 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
10518 inclusive. These are offsets from the current stack pointer. */
10519
10520 static void
10521 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
10522 {
10523 /* See if we have a constant small number of probes to generate. If so,
10524 that's the easy case. The run-time loop is made up of 7 insns in the
10525 generic case while the compile-time loop is made up of n insns for n #
10526 of intervals. */
10527 if (size <= 7 * PROBE_INTERVAL)
10528 {
10529 HOST_WIDE_INT i;
10530
10531 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
10532 it exceeds SIZE. If only one probe is needed, this will not
10533 generate any code. Then probe at FIRST + SIZE. */
10534 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10535 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
10536 -(first + i)));
10537
10538 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
10539 -(first + size)));
10540 }
10541
10542 /* Otherwise, do the same as above, but in a loop. Note that we must be
10543 extra careful with variables wrapping around because we might be at
10544 the very top (or the very bottom) of the address space and we have
10545 to be able to handle this case properly; in particular, we use an
10546 equality test for the loop condition. */
10547 else
10548 {
10549 HOST_WIDE_INT rounded_size, last;
10550 struct scratch_reg sr;
10551
10552 get_scratch_register_on_entry (&sr);
10553
10554
10555 /* Step 1: round SIZE to the previous multiple of the interval. */
10556
10557 rounded_size = size & -PROBE_INTERVAL;
10558
10559
10560 /* Step 2: compute initial and final value of the loop counter. */
10561
10562 /* TEST_OFFSET = FIRST. */
10563 emit_move_insn (sr.reg, GEN_INT (-first));
10564
10565 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
10566 last = first + rounded_size;
10567
10568
10569 /* Step 3: the loop
10570
10571 while (TEST_ADDR != LAST_ADDR)
10572 {
10573 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
10574 probe at TEST_ADDR
10575 }
10576
10577 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
10578 until it is equal to ROUNDED_SIZE. */
10579
10580 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
10581
10582
10583 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
10584 that SIZE is equal to ROUNDED_SIZE. */
10585
10586 if (size != rounded_size)
10587 emit_stack_probe (plus_constant (Pmode,
10588 gen_rtx_PLUS (Pmode,
10589 stack_pointer_rtx,
10590 sr.reg),
10591 rounded_size - size));
10592
10593 release_scratch_register_on_entry (&sr);
10594 }
10595
10596 /* Make sure nothing is scheduled before we are done. */
10597 emit_insn (gen_blockage ());
10598 }
10599
10600 /* Probe a range of stack addresses from REG to END, inclusive. These are
10601 offsets from the current stack pointer. */
10602
10603 const char *
10604 output_probe_stack_range (rtx reg, rtx end)
10605 {
10606 static int labelno = 0;
10607 char loop_lab[32], end_lab[32];
10608 rtx xops[3];
10609
10610 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10611 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10612
10613 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10614
10615 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
10616 xops[0] = reg;
10617 xops[1] = end;
10618 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10619 fputs ("\tje\t", asm_out_file);
10620 assemble_name_raw (asm_out_file, end_lab);
10621 fputc ('\n', asm_out_file);
10622
10623 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
10624 xops[1] = GEN_INT (PROBE_INTERVAL);
10625 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10626
10627 /* Probe at TEST_ADDR. */
10628 xops[0] = stack_pointer_rtx;
10629 xops[1] = reg;
10630 xops[2] = const0_rtx;
10631 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
10632
10633 fprintf (asm_out_file, "\tjmp\t");
10634 assemble_name_raw (asm_out_file, loop_lab);
10635 fputc ('\n', asm_out_file);
10636
10637 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10638
10639 return "";
10640 }
10641
10642 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
10643 to be generated in correct form. */
10644 static void
10645 ix86_finalize_stack_realign_flags (void)
10646 {
10647 /* Check if stack realign is really needed after reload, and
10648 stores result in cfun */
10649 unsigned int incoming_stack_boundary
10650 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
10651 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
10652 unsigned int stack_realign = (incoming_stack_boundary
10653 < (crtl->is_leaf
10654 ? crtl->max_used_stack_slot_alignment
10655 : crtl->stack_alignment_needed));
10656
10657 if (crtl->stack_realign_finalized)
10658 {
10659 /* After stack_realign_needed is finalized, we can't no longer
10660 change it. */
10661 gcc_assert (crtl->stack_realign_needed == stack_realign);
10662 return;
10663 }
10664
10665 /* If the only reason for frame_pointer_needed is that we conservatively
10666 assumed stack realignment might be needed, but in the end nothing that
10667 needed the stack alignment had been spilled, clear frame_pointer_needed
10668 and say we don't need stack realignment. */
10669 if (stack_realign
10670 && frame_pointer_needed
10671 && crtl->is_leaf
10672 && flag_omit_frame_pointer
10673 && crtl->sp_is_unchanging
10674 && !ix86_current_function_calls_tls_descriptor
10675 && !crtl->accesses_prior_frames
10676 && !cfun->calls_alloca
10677 && !crtl->calls_eh_return
10678 && !(flag_stack_check && STACK_CHECK_MOVING_SP)
10679 && !ix86_frame_pointer_required ()
10680 && get_frame_size () == 0
10681 && ix86_nsaved_sseregs () == 0
10682 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
10683 {
10684 HARD_REG_SET set_up_by_prologue, prologue_used;
10685 basic_block bb;
10686
10687 CLEAR_HARD_REG_SET (prologue_used);
10688 CLEAR_HARD_REG_SET (set_up_by_prologue);
10689 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
10690 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
10691 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
10692 HARD_FRAME_POINTER_REGNUM);
10693 FOR_EACH_BB_FN (bb, cfun)
10694 {
10695 rtx insn;
10696 FOR_BB_INSNS (bb, insn)
10697 if (NONDEBUG_INSN_P (insn)
10698 && requires_stack_frame_p (insn, prologue_used,
10699 set_up_by_prologue))
10700 {
10701 crtl->stack_realign_needed = stack_realign;
10702 crtl->stack_realign_finalized = true;
10703 return;
10704 }
10705 }
10706
10707 /* If drap has been set, but it actually isn't live at the start
10708 of the function, there is no reason to set it up. */
10709 if (crtl->drap_reg)
10710 {
10711 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
10712 if (! REGNO_REG_SET_P (DF_LR_IN (bb), REGNO (crtl->drap_reg)))
10713 {
10714 crtl->drap_reg = NULL_RTX;
10715 crtl->need_drap = false;
10716 }
10717 }
10718 else
10719 cfun->machine->no_drap_save_restore = true;
10720
10721 frame_pointer_needed = false;
10722 stack_realign = false;
10723 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
10724 crtl->stack_alignment_needed = incoming_stack_boundary;
10725 crtl->stack_alignment_estimated = incoming_stack_boundary;
10726 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
10727 crtl->preferred_stack_boundary = incoming_stack_boundary;
10728 df_finish_pass (true);
10729 df_scan_alloc (NULL);
10730 df_scan_blocks ();
10731 df_compute_regs_ever_live (true);
10732 df_analyze ();
10733 }
10734
10735 crtl->stack_realign_needed = stack_realign;
10736 crtl->stack_realign_finalized = true;
10737 }
10738
10739 /* Expand the prologue into a bunch of separate insns. */
10740
10741 void
10742 ix86_expand_prologue (void)
10743 {
10744 struct machine_function *m = cfun->machine;
10745 rtx insn, t;
10746 bool pic_reg_used;
10747 struct ix86_frame frame;
10748 HOST_WIDE_INT allocate;
10749 bool int_registers_saved;
10750 bool sse_registers_saved;
10751
10752 ix86_finalize_stack_realign_flags ();
10753
10754 /* DRAP should not coexist with stack_realign_fp */
10755 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
10756
10757 memset (&m->fs, 0, sizeof (m->fs));
10758
10759 /* Initialize CFA state for before the prologue. */
10760 m->fs.cfa_reg = stack_pointer_rtx;
10761 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
10762
10763 /* Track SP offset to the CFA. We continue tracking this after we've
10764 swapped the CFA register away from SP. In the case of re-alignment
10765 this is fudged; we're interested to offsets within the local frame. */
10766 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10767 m->fs.sp_valid = true;
10768
10769 ix86_compute_frame_layout (&frame);
10770
10771 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
10772 {
10773 /* We should have already generated an error for any use of
10774 ms_hook on a nested function. */
10775 gcc_checking_assert (!ix86_static_chain_on_stack);
10776
10777 /* Check if profiling is active and we shall use profiling before
10778 prologue variant. If so sorry. */
10779 if (crtl->profile && flag_fentry != 0)
10780 sorry ("ms_hook_prologue attribute isn%'t compatible "
10781 "with -mfentry for 32-bit");
10782
10783 /* In ix86_asm_output_function_label we emitted:
10784 8b ff movl.s %edi,%edi
10785 55 push %ebp
10786 8b ec movl.s %esp,%ebp
10787
10788 This matches the hookable function prologue in Win32 API
10789 functions in Microsoft Windows XP Service Pack 2 and newer.
10790 Wine uses this to enable Windows apps to hook the Win32 API
10791 functions provided by Wine.
10792
10793 What that means is that we've already set up the frame pointer. */
10794
10795 if (frame_pointer_needed
10796 && !(crtl->drap_reg && crtl->stack_realign_needed))
10797 {
10798 rtx push, mov;
10799
10800 /* We've decided to use the frame pointer already set up.
10801 Describe this to the unwinder by pretending that both
10802 push and mov insns happen right here.
10803
10804 Putting the unwind info here at the end of the ms_hook
10805 is done so that we can make absolutely certain we get
10806 the required byte sequence at the start of the function,
10807 rather than relying on an assembler that can produce
10808 the exact encoding required.
10809
10810 However it does mean (in the unpatched case) that we have
10811 a 1 insn window where the asynchronous unwind info is
10812 incorrect. However, if we placed the unwind info at
10813 its correct location we would have incorrect unwind info
10814 in the patched case. Which is probably all moot since
10815 I don't expect Wine generates dwarf2 unwind info for the
10816 system libraries that use this feature. */
10817
10818 insn = emit_insn (gen_blockage ());
10819
10820 push = gen_push (hard_frame_pointer_rtx);
10821 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
10822 stack_pointer_rtx);
10823 RTX_FRAME_RELATED_P (push) = 1;
10824 RTX_FRAME_RELATED_P (mov) = 1;
10825
10826 RTX_FRAME_RELATED_P (insn) = 1;
10827 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10828 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
10829
10830 /* Note that gen_push incremented m->fs.cfa_offset, even
10831 though we didn't emit the push insn here. */
10832 m->fs.cfa_reg = hard_frame_pointer_rtx;
10833 m->fs.fp_offset = m->fs.cfa_offset;
10834 m->fs.fp_valid = true;
10835 }
10836 else
10837 {
10838 /* The frame pointer is not needed so pop %ebp again.
10839 This leaves us with a pristine state. */
10840 emit_insn (gen_pop (hard_frame_pointer_rtx));
10841 }
10842 }
10843
10844 /* The first insn of a function that accepts its static chain on the
10845 stack is to push the register that would be filled in by a direct
10846 call. This insn will be skipped by the trampoline. */
10847 else if (ix86_static_chain_on_stack)
10848 {
10849 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
10850 emit_insn (gen_blockage ());
10851
10852 /* We don't want to interpret this push insn as a register save,
10853 only as a stack adjustment. The real copy of the register as
10854 a save will be done later, if needed. */
10855 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
10856 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
10857 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
10858 RTX_FRAME_RELATED_P (insn) = 1;
10859 }
10860
10861 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
10862 of DRAP is needed and stack realignment is really needed after reload */
10863 if (stack_realign_drap)
10864 {
10865 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10866
10867 /* Only need to push parameter pointer reg if it is caller saved. */
10868 if (!call_used_regs[REGNO (crtl->drap_reg)])
10869 {
10870 /* Push arg pointer reg */
10871 insn = emit_insn (gen_push (crtl->drap_reg));
10872 RTX_FRAME_RELATED_P (insn) = 1;
10873 }
10874
10875 /* Grab the argument pointer. */
10876 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
10877 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10878 RTX_FRAME_RELATED_P (insn) = 1;
10879 m->fs.cfa_reg = crtl->drap_reg;
10880 m->fs.cfa_offset = 0;
10881
10882 /* Align the stack. */
10883 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10884 stack_pointer_rtx,
10885 GEN_INT (-align_bytes)));
10886 RTX_FRAME_RELATED_P (insn) = 1;
10887
10888 /* Replicate the return address on the stack so that return
10889 address can be reached via (argp - 1) slot. This is needed
10890 to implement macro RETURN_ADDR_RTX and intrinsic function
10891 expand_builtin_return_addr etc. */
10892 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
10893 t = gen_frame_mem (word_mode, t);
10894 insn = emit_insn (gen_push (t));
10895 RTX_FRAME_RELATED_P (insn) = 1;
10896
10897 /* For the purposes of frame and register save area addressing,
10898 we've started over with a new frame. */
10899 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10900 m->fs.realigned = true;
10901 }
10902
10903 int_registers_saved = (frame.nregs == 0);
10904 sse_registers_saved = (frame.nsseregs == 0);
10905
10906 if (frame_pointer_needed && !m->fs.fp_valid)
10907 {
10908 /* Note: AT&T enter does NOT have reversed args. Enter is probably
10909 slower on all targets. Also sdb doesn't like it. */
10910 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
10911 RTX_FRAME_RELATED_P (insn) = 1;
10912
10913 /* Push registers now, before setting the frame pointer
10914 on SEH target. */
10915 if (!int_registers_saved
10916 && TARGET_SEH
10917 && !frame.save_regs_using_mov)
10918 {
10919 ix86_emit_save_regs ();
10920 int_registers_saved = true;
10921 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
10922 }
10923
10924 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
10925 {
10926 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
10927 RTX_FRAME_RELATED_P (insn) = 1;
10928
10929 if (m->fs.cfa_reg == stack_pointer_rtx)
10930 m->fs.cfa_reg = hard_frame_pointer_rtx;
10931 m->fs.fp_offset = m->fs.sp_offset;
10932 m->fs.fp_valid = true;
10933 }
10934 }
10935
10936 if (!int_registers_saved)
10937 {
10938 /* If saving registers via PUSH, do so now. */
10939 if (!frame.save_regs_using_mov)
10940 {
10941 ix86_emit_save_regs ();
10942 int_registers_saved = true;
10943 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
10944 }
10945
10946 /* When using red zone we may start register saving before allocating
10947 the stack frame saving one cycle of the prologue. However, avoid
10948 doing this if we have to probe the stack; at least on x86_64 the
10949 stack probe can turn into a call that clobbers a red zone location. */
10950 else if (ix86_using_red_zone ()
10951 && (! TARGET_STACK_PROBE
10952 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
10953 {
10954 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10955 int_registers_saved = true;
10956 }
10957 }
10958
10959 if (stack_realign_fp)
10960 {
10961 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10962 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
10963
10964 /* The computation of the size of the re-aligned stack frame means
10965 that we must allocate the size of the register save area before
10966 performing the actual alignment. Otherwise we cannot guarantee
10967 that there's enough storage above the realignment point. */
10968 if (m->fs.sp_offset != frame.sse_reg_save_offset)
10969 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10970 GEN_INT (m->fs.sp_offset
10971 - frame.sse_reg_save_offset),
10972 -1, false);
10973
10974 /* Align the stack. */
10975 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10976 stack_pointer_rtx,
10977 GEN_INT (-align_bytes)));
10978
10979 /* For the purposes of register save area addressing, the stack
10980 pointer is no longer valid. As for the value of sp_offset,
10981 see ix86_compute_frame_layout, which we need to match in order
10982 to pass verification of stack_pointer_offset at the end. */
10983 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
10984 m->fs.sp_valid = false;
10985 }
10986
10987 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
10988
10989 if (flag_stack_usage_info)
10990 {
10991 /* We start to count from ARG_POINTER. */
10992 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
10993
10994 /* If it was realigned, take into account the fake frame. */
10995 if (stack_realign_drap)
10996 {
10997 if (ix86_static_chain_on_stack)
10998 stack_size += UNITS_PER_WORD;
10999
11000 if (!call_used_regs[REGNO (crtl->drap_reg)])
11001 stack_size += UNITS_PER_WORD;
11002
11003 /* This over-estimates by 1 minimal-stack-alignment-unit but
11004 mitigates that by counting in the new return address slot. */
11005 current_function_dynamic_stack_size
11006 += crtl->stack_alignment_needed / BITS_PER_UNIT;
11007 }
11008
11009 current_function_static_stack_size = stack_size;
11010 }
11011
11012 /* On SEH target with very large frame size, allocate an area to save
11013 SSE registers (as the very large allocation won't be described). */
11014 if (TARGET_SEH
11015 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
11016 && !sse_registers_saved)
11017 {
11018 HOST_WIDE_INT sse_size =
11019 frame.sse_reg_save_offset - frame.reg_save_offset;
11020
11021 gcc_assert (int_registers_saved);
11022
11023 /* No need to do stack checking as the area will be immediately
11024 written. */
11025 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11026 GEN_INT (-sse_size), -1,
11027 m->fs.cfa_reg == stack_pointer_rtx);
11028 allocate -= sse_size;
11029 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11030 sse_registers_saved = true;
11031 }
11032
11033 /* The stack has already been decremented by the instruction calling us
11034 so probe if the size is non-negative to preserve the protection area. */
11035 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
11036 {
11037 /* We expect the registers to be saved when probes are used. */
11038 gcc_assert (int_registers_saved);
11039
11040 if (STACK_CHECK_MOVING_SP)
11041 {
11042 if (!(crtl->is_leaf && !cfun->calls_alloca
11043 && allocate <= PROBE_INTERVAL))
11044 {
11045 ix86_adjust_stack_and_probe (allocate);
11046 allocate = 0;
11047 }
11048 }
11049 else
11050 {
11051 HOST_WIDE_INT size = allocate;
11052
11053 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
11054 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
11055
11056 if (TARGET_STACK_PROBE)
11057 {
11058 if (crtl->is_leaf && !cfun->calls_alloca)
11059 {
11060 if (size > PROBE_INTERVAL)
11061 ix86_emit_probe_stack_range (0, size);
11062 }
11063 else
11064 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
11065 }
11066 else
11067 {
11068 if (crtl->is_leaf && !cfun->calls_alloca)
11069 {
11070 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
11071 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT,
11072 size - STACK_CHECK_PROTECT);
11073 }
11074 else
11075 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
11076 }
11077 }
11078 }
11079
11080 if (allocate == 0)
11081 ;
11082 else if (!ix86_target_stack_probe ()
11083 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
11084 {
11085 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11086 GEN_INT (-allocate), -1,
11087 m->fs.cfa_reg == stack_pointer_rtx);
11088 }
11089 else
11090 {
11091 rtx eax = gen_rtx_REG (Pmode, AX_REG);
11092 rtx r10 = NULL;
11093 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
11094 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
11095 bool eax_live = ix86_eax_live_at_start_p ();
11096 bool r10_live = false;
11097
11098 if (TARGET_64BIT)
11099 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
11100
11101 if (eax_live)
11102 {
11103 insn = emit_insn (gen_push (eax));
11104 allocate -= UNITS_PER_WORD;
11105 /* Note that SEH directives need to continue tracking the stack
11106 pointer even after the frame pointer has been set up. */
11107 if (sp_is_cfa_reg || TARGET_SEH)
11108 {
11109 if (sp_is_cfa_reg)
11110 m->fs.cfa_offset += UNITS_PER_WORD;
11111 RTX_FRAME_RELATED_P (insn) = 1;
11112 }
11113 }
11114
11115 if (r10_live)
11116 {
11117 r10 = gen_rtx_REG (Pmode, R10_REG);
11118 insn = emit_insn (gen_push (r10));
11119 allocate -= UNITS_PER_WORD;
11120 if (sp_is_cfa_reg || TARGET_SEH)
11121 {
11122 if (sp_is_cfa_reg)
11123 m->fs.cfa_offset += UNITS_PER_WORD;
11124 RTX_FRAME_RELATED_P (insn) = 1;
11125 }
11126 }
11127
11128 emit_move_insn (eax, GEN_INT (allocate));
11129 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
11130
11131 /* Use the fact that AX still contains ALLOCATE. */
11132 adjust_stack_insn = (Pmode == DImode
11133 ? gen_pro_epilogue_adjust_stack_di_sub
11134 : gen_pro_epilogue_adjust_stack_si_sub);
11135
11136 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
11137 stack_pointer_rtx, eax));
11138
11139 if (sp_is_cfa_reg || TARGET_SEH)
11140 {
11141 if (sp_is_cfa_reg)
11142 m->fs.cfa_offset += allocate;
11143 RTX_FRAME_RELATED_P (insn) = 1;
11144 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11145 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11146 plus_constant (Pmode, stack_pointer_rtx,
11147 -allocate)));
11148 }
11149 m->fs.sp_offset += allocate;
11150
11151 /* Use stack_pointer_rtx for relative addressing so that code
11152 works for realigned stack, too. */
11153 if (r10_live && eax_live)
11154 {
11155 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11156 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
11157 gen_frame_mem (word_mode, t));
11158 t = plus_constant (Pmode, t, UNITS_PER_WORD);
11159 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
11160 gen_frame_mem (word_mode, t));
11161 }
11162 else if (eax_live || r10_live)
11163 {
11164 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11165 emit_move_insn (gen_rtx_REG (word_mode,
11166 (eax_live ? AX_REG : R10_REG)),
11167 gen_frame_mem (word_mode, t));
11168 }
11169 }
11170 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
11171
11172 /* If we havn't already set up the frame pointer, do so now. */
11173 if (frame_pointer_needed && !m->fs.fp_valid)
11174 {
11175 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
11176 GEN_INT (frame.stack_pointer_offset
11177 - frame.hard_frame_pointer_offset));
11178 insn = emit_insn (insn);
11179 RTX_FRAME_RELATED_P (insn) = 1;
11180 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
11181
11182 if (m->fs.cfa_reg == stack_pointer_rtx)
11183 m->fs.cfa_reg = hard_frame_pointer_rtx;
11184 m->fs.fp_offset = frame.hard_frame_pointer_offset;
11185 m->fs.fp_valid = true;
11186 }
11187
11188 if (!int_registers_saved)
11189 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11190 if (!sse_registers_saved)
11191 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11192
11193 pic_reg_used = false;
11194 /* We don't use pic-register for pe-coff target. */
11195 if (pic_offset_table_rtx
11196 && !TARGET_PECOFF
11197 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
11198 || crtl->profile))
11199 {
11200 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
11201
11202 if (alt_pic_reg_used != INVALID_REGNUM)
11203 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
11204
11205 pic_reg_used = true;
11206 }
11207
11208 if (pic_reg_used)
11209 {
11210 if (TARGET_64BIT)
11211 {
11212 if (ix86_cmodel == CM_LARGE_PIC)
11213 {
11214 rtx label, tmp_reg;
11215
11216 gcc_assert (Pmode == DImode);
11217 label = gen_label_rtx ();
11218 emit_label (label);
11219 LABEL_PRESERVE_P (label) = 1;
11220 tmp_reg = gen_rtx_REG (Pmode, R11_REG);
11221 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
11222 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
11223 label));
11224 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
11225 insn = emit_insn (ix86_gen_add3 (pic_offset_table_rtx,
11226 pic_offset_table_rtx, tmp_reg));
11227 }
11228 else
11229 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
11230 }
11231 else
11232 {
11233 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
11234 RTX_FRAME_RELATED_P (insn) = 1;
11235 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
11236 }
11237 }
11238
11239 /* In the pic_reg_used case, make sure that the got load isn't deleted
11240 when mcount needs it. Blockage to avoid call movement across mcount
11241 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
11242 note. */
11243 if (crtl->profile && !flag_fentry && pic_reg_used)
11244 emit_insn (gen_prologue_use (pic_offset_table_rtx));
11245
11246 if (crtl->drap_reg && !crtl->stack_realign_needed)
11247 {
11248 /* vDRAP is setup but after reload it turns out stack realign
11249 isn't necessary, here we will emit prologue to setup DRAP
11250 without stack realign adjustment */
11251 t = choose_baseaddr (0);
11252 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
11253 }
11254
11255 /* Prevent instructions from being scheduled into register save push
11256 sequence when access to the redzone area is done through frame pointer.
11257 The offset between the frame pointer and the stack pointer is calculated
11258 relative to the value of the stack pointer at the end of the function
11259 prologue, and moving instructions that access redzone area via frame
11260 pointer inside push sequence violates this assumption. */
11261 if (frame_pointer_needed && frame.red_zone_size)
11262 emit_insn (gen_memory_blockage ());
11263
11264 /* Emit cld instruction if stringops are used in the function. */
11265 if (TARGET_CLD && ix86_current_function_needs_cld)
11266 emit_insn (gen_cld ());
11267
11268 /* SEH requires that the prologue end within 256 bytes of the start of
11269 the function. Prevent instruction schedules that would extend that.
11270 Further, prevent alloca modifications to the stack pointer from being
11271 combined with prologue modifications. */
11272 if (TARGET_SEH)
11273 emit_insn (gen_prologue_use (stack_pointer_rtx));
11274 }
11275
11276 /* Emit code to restore REG using a POP insn. */
11277
11278 static void
11279 ix86_emit_restore_reg_using_pop (rtx reg)
11280 {
11281 struct machine_function *m = cfun->machine;
11282 rtx insn = emit_insn (gen_pop (reg));
11283
11284 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
11285 m->fs.sp_offset -= UNITS_PER_WORD;
11286
11287 if (m->fs.cfa_reg == crtl->drap_reg
11288 && REGNO (reg) == REGNO (crtl->drap_reg))
11289 {
11290 /* Previously we'd represented the CFA as an expression
11291 like *(%ebp - 8). We've just popped that value from
11292 the stack, which means we need to reset the CFA to
11293 the drap register. This will remain until we restore
11294 the stack pointer. */
11295 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11296 RTX_FRAME_RELATED_P (insn) = 1;
11297
11298 /* This means that the DRAP register is valid for addressing too. */
11299 m->fs.drap_valid = true;
11300 return;
11301 }
11302
11303 if (m->fs.cfa_reg == stack_pointer_rtx)
11304 {
11305 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
11306 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
11307 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
11308 RTX_FRAME_RELATED_P (insn) = 1;
11309
11310 m->fs.cfa_offset -= UNITS_PER_WORD;
11311 }
11312
11313 /* When the frame pointer is the CFA, and we pop it, we are
11314 swapping back to the stack pointer as the CFA. This happens
11315 for stack frames that don't allocate other data, so we assume
11316 the stack pointer is now pointing at the return address, i.e.
11317 the function entry state, which makes the offset be 1 word. */
11318 if (reg == hard_frame_pointer_rtx)
11319 {
11320 m->fs.fp_valid = false;
11321 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11322 {
11323 m->fs.cfa_reg = stack_pointer_rtx;
11324 m->fs.cfa_offset -= UNITS_PER_WORD;
11325
11326 add_reg_note (insn, REG_CFA_DEF_CFA,
11327 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11328 GEN_INT (m->fs.cfa_offset)));
11329 RTX_FRAME_RELATED_P (insn) = 1;
11330 }
11331 }
11332 }
11333
11334 /* Emit code to restore saved registers using POP insns. */
11335
11336 static void
11337 ix86_emit_restore_regs_using_pop (void)
11338 {
11339 unsigned int regno;
11340
11341 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11342 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
11343 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
11344 }
11345
11346 /* Emit code and notes for the LEAVE instruction. */
11347
11348 static void
11349 ix86_emit_leave (void)
11350 {
11351 struct machine_function *m = cfun->machine;
11352 rtx insn = emit_insn (ix86_gen_leave ());
11353
11354 ix86_add_queued_cfa_restore_notes (insn);
11355
11356 gcc_assert (m->fs.fp_valid);
11357 m->fs.sp_valid = true;
11358 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
11359 m->fs.fp_valid = false;
11360
11361 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11362 {
11363 m->fs.cfa_reg = stack_pointer_rtx;
11364 m->fs.cfa_offset = m->fs.sp_offset;
11365
11366 add_reg_note (insn, REG_CFA_DEF_CFA,
11367 plus_constant (Pmode, stack_pointer_rtx,
11368 m->fs.sp_offset));
11369 RTX_FRAME_RELATED_P (insn) = 1;
11370 }
11371 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
11372 m->fs.fp_offset);
11373 }
11374
11375 /* Emit code to restore saved registers using MOV insns.
11376 First register is restored from CFA - CFA_OFFSET. */
11377 static void
11378 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
11379 bool maybe_eh_return)
11380 {
11381 struct machine_function *m = cfun->machine;
11382 unsigned int regno;
11383
11384 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11385 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11386 {
11387 rtx reg = gen_rtx_REG (word_mode, regno);
11388 rtx insn, mem;
11389
11390 mem = choose_baseaddr (cfa_offset);
11391 mem = gen_frame_mem (word_mode, mem);
11392 insn = emit_move_insn (reg, mem);
11393
11394 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
11395 {
11396 /* Previously we'd represented the CFA as an expression
11397 like *(%ebp - 8). We've just popped that value from
11398 the stack, which means we need to reset the CFA to
11399 the drap register. This will remain until we restore
11400 the stack pointer. */
11401 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11402 RTX_FRAME_RELATED_P (insn) = 1;
11403
11404 /* This means that the DRAP register is valid for addressing. */
11405 m->fs.drap_valid = true;
11406 }
11407 else
11408 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11409
11410 cfa_offset -= UNITS_PER_WORD;
11411 }
11412 }
11413
11414 /* Emit code to restore saved registers using MOV insns.
11415 First register is restored from CFA - CFA_OFFSET. */
11416 static void
11417 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
11418 bool maybe_eh_return)
11419 {
11420 unsigned int regno;
11421
11422 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11423 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11424 {
11425 rtx reg = gen_rtx_REG (V4SFmode, regno);
11426 rtx mem;
11427
11428 mem = choose_baseaddr (cfa_offset);
11429 mem = gen_rtx_MEM (V4SFmode, mem);
11430 set_mem_align (mem, 128);
11431 emit_move_insn (reg, mem);
11432
11433 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11434
11435 cfa_offset -= 16;
11436 }
11437 }
11438
11439 /* Restore function stack, frame, and registers. */
11440
11441 void
11442 ix86_expand_epilogue (int style)
11443 {
11444 struct machine_function *m = cfun->machine;
11445 struct machine_frame_state frame_state_save = m->fs;
11446 struct ix86_frame frame;
11447 bool restore_regs_via_mov;
11448 bool using_drap;
11449
11450 ix86_finalize_stack_realign_flags ();
11451 ix86_compute_frame_layout (&frame);
11452
11453 m->fs.sp_valid = (!frame_pointer_needed
11454 || (crtl->sp_is_unchanging
11455 && !stack_realign_fp));
11456 gcc_assert (!m->fs.sp_valid
11457 || m->fs.sp_offset == frame.stack_pointer_offset);
11458
11459 /* The FP must be valid if the frame pointer is present. */
11460 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
11461 gcc_assert (!m->fs.fp_valid
11462 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
11463
11464 /* We must have *some* valid pointer to the stack frame. */
11465 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
11466
11467 /* The DRAP is never valid at this point. */
11468 gcc_assert (!m->fs.drap_valid);
11469
11470 /* See the comment about red zone and frame
11471 pointer usage in ix86_expand_prologue. */
11472 if (frame_pointer_needed && frame.red_zone_size)
11473 emit_insn (gen_memory_blockage ());
11474
11475 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
11476 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
11477
11478 /* Determine the CFA offset of the end of the red-zone. */
11479 m->fs.red_zone_offset = 0;
11480 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
11481 {
11482 /* The red-zone begins below the return address. */
11483 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
11484
11485 /* When the register save area is in the aligned portion of
11486 the stack, determine the maximum runtime displacement that
11487 matches up with the aligned frame. */
11488 if (stack_realign_drap)
11489 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
11490 + UNITS_PER_WORD);
11491 }
11492
11493 /* Special care must be taken for the normal return case of a function
11494 using eh_return: the eax and edx registers are marked as saved, but
11495 not restored along this path. Adjust the save location to match. */
11496 if (crtl->calls_eh_return && style != 2)
11497 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
11498
11499 /* EH_RETURN requires the use of moves to function properly. */
11500 if (crtl->calls_eh_return)
11501 restore_regs_via_mov = true;
11502 /* SEH requires the use of pops to identify the epilogue. */
11503 else if (TARGET_SEH)
11504 restore_regs_via_mov = false;
11505 /* If we're only restoring one register and sp is not valid then
11506 using a move instruction to restore the register since it's
11507 less work than reloading sp and popping the register. */
11508 else if (!m->fs.sp_valid && frame.nregs <= 1)
11509 restore_regs_via_mov = true;
11510 else if (TARGET_EPILOGUE_USING_MOVE
11511 && cfun->machine->use_fast_prologue_epilogue
11512 && (frame.nregs > 1
11513 || m->fs.sp_offset != frame.reg_save_offset))
11514 restore_regs_via_mov = true;
11515 else if (frame_pointer_needed
11516 && !frame.nregs
11517 && m->fs.sp_offset != frame.reg_save_offset)
11518 restore_regs_via_mov = true;
11519 else if (frame_pointer_needed
11520 && TARGET_USE_LEAVE
11521 && cfun->machine->use_fast_prologue_epilogue
11522 && frame.nregs == 1)
11523 restore_regs_via_mov = true;
11524 else
11525 restore_regs_via_mov = false;
11526
11527 if (restore_regs_via_mov || frame.nsseregs)
11528 {
11529 /* Ensure that the entire register save area is addressable via
11530 the stack pointer, if we will restore via sp. */
11531 if (TARGET_64BIT
11532 && m->fs.sp_offset > 0x7fffffff
11533 && !(m->fs.fp_valid || m->fs.drap_valid)
11534 && (frame.nsseregs + frame.nregs) != 0)
11535 {
11536 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11537 GEN_INT (m->fs.sp_offset
11538 - frame.sse_reg_save_offset),
11539 style,
11540 m->fs.cfa_reg == stack_pointer_rtx);
11541 }
11542 }
11543
11544 /* If there are any SSE registers to restore, then we have to do it
11545 via moves, since there's obviously no pop for SSE regs. */
11546 if (frame.nsseregs)
11547 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
11548 style == 2);
11549
11550 if (restore_regs_via_mov)
11551 {
11552 rtx t;
11553
11554 if (frame.nregs)
11555 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
11556
11557 /* eh_return epilogues need %ecx added to the stack pointer. */
11558 if (style == 2)
11559 {
11560 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
11561
11562 /* Stack align doesn't work with eh_return. */
11563 gcc_assert (!stack_realign_drap);
11564 /* Neither does regparm nested functions. */
11565 gcc_assert (!ix86_static_chain_on_stack);
11566
11567 if (frame_pointer_needed)
11568 {
11569 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
11570 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
11571 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
11572
11573 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
11574 insn = emit_move_insn (hard_frame_pointer_rtx, t);
11575
11576 /* Note that we use SA as a temporary CFA, as the return
11577 address is at the proper place relative to it. We
11578 pretend this happens at the FP restore insn because
11579 prior to this insn the FP would be stored at the wrong
11580 offset relative to SA, and after this insn we have no
11581 other reasonable register to use for the CFA. We don't
11582 bother resetting the CFA to the SP for the duration of
11583 the return insn. */
11584 add_reg_note (insn, REG_CFA_DEF_CFA,
11585 plus_constant (Pmode, sa, UNITS_PER_WORD));
11586 ix86_add_queued_cfa_restore_notes (insn);
11587 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
11588 RTX_FRAME_RELATED_P (insn) = 1;
11589
11590 m->fs.cfa_reg = sa;
11591 m->fs.cfa_offset = UNITS_PER_WORD;
11592 m->fs.fp_valid = false;
11593
11594 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
11595 const0_rtx, style, false);
11596 }
11597 else
11598 {
11599 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
11600 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
11601 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
11602 ix86_add_queued_cfa_restore_notes (insn);
11603
11604 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
11605 if (m->fs.cfa_offset != UNITS_PER_WORD)
11606 {
11607 m->fs.cfa_offset = UNITS_PER_WORD;
11608 add_reg_note (insn, REG_CFA_DEF_CFA,
11609 plus_constant (Pmode, stack_pointer_rtx,
11610 UNITS_PER_WORD));
11611 RTX_FRAME_RELATED_P (insn) = 1;
11612 }
11613 }
11614 m->fs.sp_offset = UNITS_PER_WORD;
11615 m->fs.sp_valid = true;
11616 }
11617 }
11618 else
11619 {
11620 /* SEH requires that the function end with (1) a stack adjustment
11621 if necessary, (2) a sequence of pops, and (3) a return or
11622 jump instruction. Prevent insns from the function body from
11623 being scheduled into this sequence. */
11624 if (TARGET_SEH)
11625 {
11626 /* Prevent a catch region from being adjacent to the standard
11627 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
11628 several other flags that would be interesting to test are
11629 not yet set up. */
11630 if (flag_non_call_exceptions)
11631 emit_insn (gen_nops (const1_rtx));
11632 else
11633 emit_insn (gen_blockage ());
11634 }
11635
11636 /* First step is to deallocate the stack frame so that we can
11637 pop the registers. Also do it on SEH target for very large
11638 frame as the emitted instructions aren't allowed by the ABI in
11639 epilogues. */
11640 if (!m->fs.sp_valid
11641 || (TARGET_SEH
11642 && (m->fs.sp_offset - frame.reg_save_offset
11643 >= SEH_MAX_FRAME_SIZE)))
11644 {
11645 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
11646 GEN_INT (m->fs.fp_offset
11647 - frame.reg_save_offset),
11648 style, false);
11649 }
11650 else if (m->fs.sp_offset != frame.reg_save_offset)
11651 {
11652 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11653 GEN_INT (m->fs.sp_offset
11654 - frame.reg_save_offset),
11655 style,
11656 m->fs.cfa_reg == stack_pointer_rtx);
11657 }
11658
11659 ix86_emit_restore_regs_using_pop ();
11660 }
11661
11662 /* If we used a stack pointer and haven't already got rid of it,
11663 then do so now. */
11664 if (m->fs.fp_valid)
11665 {
11666 /* If the stack pointer is valid and pointing at the frame
11667 pointer store address, then we only need a pop. */
11668 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
11669 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
11670 /* Leave results in shorter dependency chains on CPUs that are
11671 able to grok it fast. */
11672 else if (TARGET_USE_LEAVE
11673 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
11674 || !cfun->machine->use_fast_prologue_epilogue)
11675 ix86_emit_leave ();
11676 else
11677 {
11678 pro_epilogue_adjust_stack (stack_pointer_rtx,
11679 hard_frame_pointer_rtx,
11680 const0_rtx, style, !using_drap);
11681 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
11682 }
11683 }
11684
11685 if (using_drap)
11686 {
11687 int param_ptr_offset = UNITS_PER_WORD;
11688 rtx insn;
11689
11690 gcc_assert (stack_realign_drap);
11691
11692 if (ix86_static_chain_on_stack)
11693 param_ptr_offset += UNITS_PER_WORD;
11694 if (!call_used_regs[REGNO (crtl->drap_reg)])
11695 param_ptr_offset += UNITS_PER_WORD;
11696
11697 insn = emit_insn (gen_rtx_SET
11698 (VOIDmode, stack_pointer_rtx,
11699 gen_rtx_PLUS (Pmode,
11700 crtl->drap_reg,
11701 GEN_INT (-param_ptr_offset))));
11702 m->fs.cfa_reg = stack_pointer_rtx;
11703 m->fs.cfa_offset = param_ptr_offset;
11704 m->fs.sp_offset = param_ptr_offset;
11705 m->fs.realigned = false;
11706
11707 add_reg_note (insn, REG_CFA_DEF_CFA,
11708 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11709 GEN_INT (param_ptr_offset)));
11710 RTX_FRAME_RELATED_P (insn) = 1;
11711
11712 if (!call_used_regs[REGNO (crtl->drap_reg)])
11713 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
11714 }
11715
11716 /* At this point the stack pointer must be valid, and we must have
11717 restored all of the registers. We may not have deallocated the
11718 entire stack frame. We've delayed this until now because it may
11719 be possible to merge the local stack deallocation with the
11720 deallocation forced by ix86_static_chain_on_stack. */
11721 gcc_assert (m->fs.sp_valid);
11722 gcc_assert (!m->fs.fp_valid);
11723 gcc_assert (!m->fs.realigned);
11724 if (m->fs.sp_offset != UNITS_PER_WORD)
11725 {
11726 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11727 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
11728 style, true);
11729 }
11730 else
11731 ix86_add_queued_cfa_restore_notes (get_last_insn ());
11732
11733 /* Sibcall epilogues don't want a return instruction. */
11734 if (style == 0)
11735 {
11736 m->fs = frame_state_save;
11737 return;
11738 }
11739
11740 if (crtl->args.pops_args && crtl->args.size)
11741 {
11742 rtx popc = GEN_INT (crtl->args.pops_args);
11743
11744 /* i386 can only pop 64K bytes. If asked to pop more, pop return
11745 address, do explicit add, and jump indirectly to the caller. */
11746
11747 if (crtl->args.pops_args >= 65536)
11748 {
11749 rtx ecx = gen_rtx_REG (SImode, CX_REG);
11750 rtx insn;
11751
11752 /* There is no "pascal" calling convention in any 64bit ABI. */
11753 gcc_assert (!TARGET_64BIT);
11754
11755 insn = emit_insn (gen_pop (ecx));
11756 m->fs.cfa_offset -= UNITS_PER_WORD;
11757 m->fs.sp_offset -= UNITS_PER_WORD;
11758
11759 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
11760 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
11761 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
11762 add_reg_note (insn, REG_CFA_REGISTER,
11763 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
11764 RTX_FRAME_RELATED_P (insn) = 1;
11765
11766 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11767 popc, -1, true);
11768 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
11769 }
11770 else
11771 emit_jump_insn (gen_simple_return_pop_internal (popc));
11772 }
11773 else
11774 emit_jump_insn (gen_simple_return_internal ());
11775
11776 /* Restore the state back to the state from the prologue,
11777 so that it's correct for the next epilogue. */
11778 m->fs = frame_state_save;
11779 }
11780
11781 /* Reset from the function's potential modifications. */
11782
11783 static void
11784 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, HOST_WIDE_INT)
11785 {
11786 if (pic_offset_table_rtx)
11787 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
11788 #if TARGET_MACHO
11789 /* Mach-O doesn't support labels at the end of objects, so if
11790 it looks like we might want one, insert a NOP. */
11791 {
11792 rtx insn = get_last_insn ();
11793 rtx deleted_debug_label = NULL_RTX;
11794 while (insn
11795 && NOTE_P (insn)
11796 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
11797 {
11798 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
11799 notes only, instead set their CODE_LABEL_NUMBER to -1,
11800 otherwise there would be code generation differences
11801 in between -g and -g0. */
11802 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
11803 deleted_debug_label = insn;
11804 insn = PREV_INSN (insn);
11805 }
11806 if (insn
11807 && (LABEL_P (insn)
11808 || (NOTE_P (insn)
11809 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
11810 fputs ("\tnop\n", file);
11811 else if (deleted_debug_label)
11812 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
11813 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
11814 CODE_LABEL_NUMBER (insn) = -1;
11815 }
11816 #endif
11817
11818 }
11819
11820 /* Return a scratch register to use in the split stack prologue. The
11821 split stack prologue is used for -fsplit-stack. It is the first
11822 instructions in the function, even before the regular prologue.
11823 The scratch register can be any caller-saved register which is not
11824 used for parameters or for the static chain. */
11825
11826 static unsigned int
11827 split_stack_prologue_scratch_regno (void)
11828 {
11829 if (TARGET_64BIT)
11830 return R11_REG;
11831 else
11832 {
11833 bool is_fastcall, is_thiscall;
11834 int regparm;
11835
11836 is_fastcall = (lookup_attribute ("fastcall",
11837 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
11838 != NULL);
11839 is_thiscall = (lookup_attribute ("thiscall",
11840 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
11841 != NULL);
11842 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
11843
11844 if (is_fastcall)
11845 {
11846 if (DECL_STATIC_CHAIN (cfun->decl))
11847 {
11848 sorry ("-fsplit-stack does not support fastcall with "
11849 "nested function");
11850 return INVALID_REGNUM;
11851 }
11852 return AX_REG;
11853 }
11854 else if (is_thiscall)
11855 {
11856 if (!DECL_STATIC_CHAIN (cfun->decl))
11857 return DX_REG;
11858 return AX_REG;
11859 }
11860 else if (regparm < 3)
11861 {
11862 if (!DECL_STATIC_CHAIN (cfun->decl))
11863 return CX_REG;
11864 else
11865 {
11866 if (regparm >= 2)
11867 {
11868 sorry ("-fsplit-stack does not support 2 register "
11869 "parameters for a nested function");
11870 return INVALID_REGNUM;
11871 }
11872 return DX_REG;
11873 }
11874 }
11875 else
11876 {
11877 /* FIXME: We could make this work by pushing a register
11878 around the addition and comparison. */
11879 sorry ("-fsplit-stack does not support 3 register parameters");
11880 return INVALID_REGNUM;
11881 }
11882 }
11883 }
11884
11885 /* A SYMBOL_REF for the function which allocates new stackspace for
11886 -fsplit-stack. */
11887
11888 static GTY(()) rtx split_stack_fn;
11889
11890 /* A SYMBOL_REF for the more stack function when using the large
11891 model. */
11892
11893 static GTY(()) rtx split_stack_fn_large;
11894
11895 /* Handle -fsplit-stack. These are the first instructions in the
11896 function, even before the regular prologue. */
11897
11898 void
11899 ix86_expand_split_stack_prologue (void)
11900 {
11901 struct ix86_frame frame;
11902 HOST_WIDE_INT allocate;
11903 unsigned HOST_WIDE_INT args_size;
11904 rtx label, limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
11905 rtx scratch_reg = NULL_RTX;
11906 rtx varargs_label = NULL_RTX;
11907 rtx fn;
11908
11909 gcc_assert (flag_split_stack && reload_completed);
11910
11911 ix86_finalize_stack_realign_flags ();
11912 ix86_compute_frame_layout (&frame);
11913 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
11914
11915 /* This is the label we will branch to if we have enough stack
11916 space. We expect the basic block reordering pass to reverse this
11917 branch if optimizing, so that we branch in the unlikely case. */
11918 label = gen_label_rtx ();
11919
11920 /* We need to compare the stack pointer minus the frame size with
11921 the stack boundary in the TCB. The stack boundary always gives
11922 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
11923 can compare directly. Otherwise we need to do an addition. */
11924
11925 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
11926 UNSPEC_STACK_CHECK);
11927 limit = gen_rtx_CONST (Pmode, limit);
11928 limit = gen_rtx_MEM (Pmode, limit);
11929 if (allocate < SPLIT_STACK_AVAILABLE)
11930 current = stack_pointer_rtx;
11931 else
11932 {
11933 unsigned int scratch_regno;
11934 rtx offset;
11935
11936 /* We need a scratch register to hold the stack pointer minus
11937 the required frame size. Since this is the very start of the
11938 function, the scratch register can be any caller-saved
11939 register which is not used for parameters. */
11940 offset = GEN_INT (- allocate);
11941 scratch_regno = split_stack_prologue_scratch_regno ();
11942 if (scratch_regno == INVALID_REGNUM)
11943 return;
11944 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11945 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
11946 {
11947 /* We don't use ix86_gen_add3 in this case because it will
11948 want to split to lea, but when not optimizing the insn
11949 will not be split after this point. */
11950 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11951 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11952 offset)));
11953 }
11954 else
11955 {
11956 emit_move_insn (scratch_reg, offset);
11957 emit_insn (ix86_gen_add3 (scratch_reg, scratch_reg,
11958 stack_pointer_rtx));
11959 }
11960 current = scratch_reg;
11961 }
11962
11963 ix86_expand_branch (GEU, current, limit, label);
11964 jump_insn = get_last_insn ();
11965 JUMP_LABEL (jump_insn) = label;
11966
11967 /* Mark the jump as very likely to be taken. */
11968 add_int_reg_note (jump_insn, REG_BR_PROB,
11969 REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100);
11970
11971 if (split_stack_fn == NULL_RTX)
11972 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
11973 fn = split_stack_fn;
11974
11975 /* Get more stack space. We pass in the desired stack space and the
11976 size of the arguments to copy to the new stack. In 32-bit mode
11977 we push the parameters; __morestack will return on a new stack
11978 anyhow. In 64-bit mode we pass the parameters in r10 and
11979 r11. */
11980 allocate_rtx = GEN_INT (allocate);
11981 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
11982 call_fusage = NULL_RTX;
11983 if (TARGET_64BIT)
11984 {
11985 rtx reg10, reg11;
11986
11987 reg10 = gen_rtx_REG (Pmode, R10_REG);
11988 reg11 = gen_rtx_REG (Pmode, R11_REG);
11989
11990 /* If this function uses a static chain, it will be in %r10.
11991 Preserve it across the call to __morestack. */
11992 if (DECL_STATIC_CHAIN (cfun->decl))
11993 {
11994 rtx rax;
11995
11996 rax = gen_rtx_REG (word_mode, AX_REG);
11997 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
11998 use_reg (&call_fusage, rax);
11999 }
12000
12001 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
12002 && !TARGET_PECOFF)
12003 {
12004 HOST_WIDE_INT argval;
12005
12006 gcc_assert (Pmode == DImode);
12007 /* When using the large model we need to load the address
12008 into a register, and we've run out of registers. So we
12009 switch to a different calling convention, and we call a
12010 different function: __morestack_large. We pass the
12011 argument size in the upper 32 bits of r10 and pass the
12012 frame size in the lower 32 bits. */
12013 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
12014 gcc_assert ((args_size & 0xffffffff) == args_size);
12015
12016 if (split_stack_fn_large == NULL_RTX)
12017 split_stack_fn_large =
12018 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
12019
12020 if (ix86_cmodel == CM_LARGE_PIC)
12021 {
12022 rtx label, x;
12023
12024 label = gen_label_rtx ();
12025 emit_label (label);
12026 LABEL_PRESERVE_P (label) = 1;
12027 emit_insn (gen_set_rip_rex64 (reg10, label));
12028 emit_insn (gen_set_got_offset_rex64 (reg11, label));
12029 emit_insn (ix86_gen_add3 (reg10, reg10, reg11));
12030 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
12031 UNSPEC_GOT);
12032 x = gen_rtx_CONST (Pmode, x);
12033 emit_move_insn (reg11, x);
12034 x = gen_rtx_PLUS (Pmode, reg10, reg11);
12035 x = gen_const_mem (Pmode, x);
12036 emit_move_insn (reg11, x);
12037 }
12038 else
12039 emit_move_insn (reg11, split_stack_fn_large);
12040
12041 fn = reg11;
12042
12043 argval = ((args_size << 16) << 16) + allocate;
12044 emit_move_insn (reg10, GEN_INT (argval));
12045 }
12046 else
12047 {
12048 emit_move_insn (reg10, allocate_rtx);
12049 emit_move_insn (reg11, GEN_INT (args_size));
12050 use_reg (&call_fusage, reg11);
12051 }
12052
12053 use_reg (&call_fusage, reg10);
12054 }
12055 else
12056 {
12057 emit_insn (gen_push (GEN_INT (args_size)));
12058 emit_insn (gen_push (allocate_rtx));
12059 }
12060 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
12061 GEN_INT (UNITS_PER_WORD), constm1_rtx,
12062 NULL_RTX, false);
12063 add_function_usage_to (call_insn, call_fusage);
12064
12065 /* In order to make call/return prediction work right, we now need
12066 to execute a return instruction. See
12067 libgcc/config/i386/morestack.S for the details on how this works.
12068
12069 For flow purposes gcc must not see this as a return
12070 instruction--we need control flow to continue at the subsequent
12071 label. Therefore, we use an unspec. */
12072 gcc_assert (crtl->args.pops_args < 65536);
12073 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
12074
12075 /* If we are in 64-bit mode and this function uses a static chain,
12076 we saved %r10 in %rax before calling _morestack. */
12077 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
12078 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
12079 gen_rtx_REG (word_mode, AX_REG));
12080
12081 /* If this function calls va_start, we need to store a pointer to
12082 the arguments on the old stack, because they may not have been
12083 all copied to the new stack. At this point the old stack can be
12084 found at the frame pointer value used by __morestack, because
12085 __morestack has set that up before calling back to us. Here we
12086 store that pointer in a scratch register, and in
12087 ix86_expand_prologue we store the scratch register in a stack
12088 slot. */
12089 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12090 {
12091 unsigned int scratch_regno;
12092 rtx frame_reg;
12093 int words;
12094
12095 scratch_regno = split_stack_prologue_scratch_regno ();
12096 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12097 frame_reg = gen_rtx_REG (Pmode, BP_REG);
12098
12099 /* 64-bit:
12100 fp -> old fp value
12101 return address within this function
12102 return address of caller of this function
12103 stack arguments
12104 So we add three words to get to the stack arguments.
12105
12106 32-bit:
12107 fp -> old fp value
12108 return address within this function
12109 first argument to __morestack
12110 second argument to __morestack
12111 return address of caller of this function
12112 stack arguments
12113 So we add five words to get to the stack arguments.
12114 */
12115 words = TARGET_64BIT ? 3 : 5;
12116 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12117 gen_rtx_PLUS (Pmode, frame_reg,
12118 GEN_INT (words * UNITS_PER_WORD))));
12119
12120 varargs_label = gen_label_rtx ();
12121 emit_jump_insn (gen_jump (varargs_label));
12122 JUMP_LABEL (get_last_insn ()) = varargs_label;
12123
12124 emit_barrier ();
12125 }
12126
12127 emit_label (label);
12128 LABEL_NUSES (label) = 1;
12129
12130 /* If this function calls va_start, we now have to set the scratch
12131 register for the case where we do not call __morestack. In this
12132 case we need to set it based on the stack pointer. */
12133 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12134 {
12135 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12136 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12137 GEN_INT (UNITS_PER_WORD))));
12138
12139 emit_label (varargs_label);
12140 LABEL_NUSES (varargs_label) = 1;
12141 }
12142 }
12143
12144 /* We may have to tell the dataflow pass that the split stack prologue
12145 is initializing a scratch register. */
12146
12147 static void
12148 ix86_live_on_entry (bitmap regs)
12149 {
12150 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12151 {
12152 gcc_assert (flag_split_stack);
12153 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
12154 }
12155 }
12156 \f
12157 /* Extract the parts of an RTL expression that is a valid memory address
12158 for an instruction. Return 0 if the structure of the address is
12159 grossly off. Return -1 if the address contains ASHIFT, so it is not
12160 strictly valid, but still used for computing length of lea instruction. */
12161
12162 int
12163 ix86_decompose_address (rtx addr, struct ix86_address *out)
12164 {
12165 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
12166 rtx base_reg, index_reg;
12167 HOST_WIDE_INT scale = 1;
12168 rtx scale_rtx = NULL_RTX;
12169 rtx tmp;
12170 int retval = 1;
12171 enum ix86_address_seg seg = SEG_DEFAULT;
12172
12173 /* Allow zero-extended SImode addresses,
12174 they will be emitted with addr32 prefix. */
12175 if (TARGET_64BIT && GET_MODE (addr) == DImode)
12176 {
12177 if (GET_CODE (addr) == ZERO_EXTEND
12178 && GET_MODE (XEXP (addr, 0)) == SImode)
12179 {
12180 addr = XEXP (addr, 0);
12181 if (CONST_INT_P (addr))
12182 return 0;
12183 }
12184 else if (GET_CODE (addr) == AND
12185 && const_32bit_mask (XEXP (addr, 1), DImode))
12186 {
12187 addr = simplify_gen_subreg (SImode, XEXP (addr, 0), DImode, 0);
12188 if (addr == NULL_RTX)
12189 return 0;
12190
12191 if (CONST_INT_P (addr))
12192 return 0;
12193 }
12194 }
12195
12196 /* Allow SImode subregs of DImode addresses,
12197 they will be emitted with addr32 prefix. */
12198 if (TARGET_64BIT && GET_MODE (addr) == SImode)
12199 {
12200 if (GET_CODE (addr) == SUBREG
12201 && GET_MODE (SUBREG_REG (addr)) == DImode)
12202 {
12203 addr = SUBREG_REG (addr);
12204 if (CONST_INT_P (addr))
12205 return 0;
12206 }
12207 }
12208
12209 if (REG_P (addr))
12210 base = addr;
12211 else if (GET_CODE (addr) == SUBREG)
12212 {
12213 if (REG_P (SUBREG_REG (addr)))
12214 base = addr;
12215 else
12216 return 0;
12217 }
12218 else if (GET_CODE (addr) == PLUS)
12219 {
12220 rtx addends[4], op;
12221 int n = 0, i;
12222
12223 op = addr;
12224 do
12225 {
12226 if (n >= 4)
12227 return 0;
12228 addends[n++] = XEXP (op, 1);
12229 op = XEXP (op, 0);
12230 }
12231 while (GET_CODE (op) == PLUS);
12232 if (n >= 4)
12233 return 0;
12234 addends[n] = op;
12235
12236 for (i = n; i >= 0; --i)
12237 {
12238 op = addends[i];
12239 switch (GET_CODE (op))
12240 {
12241 case MULT:
12242 if (index)
12243 return 0;
12244 index = XEXP (op, 0);
12245 scale_rtx = XEXP (op, 1);
12246 break;
12247
12248 case ASHIFT:
12249 if (index)
12250 return 0;
12251 index = XEXP (op, 0);
12252 tmp = XEXP (op, 1);
12253 if (!CONST_INT_P (tmp))
12254 return 0;
12255 scale = INTVAL (tmp);
12256 if ((unsigned HOST_WIDE_INT) scale > 3)
12257 return 0;
12258 scale = 1 << scale;
12259 break;
12260
12261 case ZERO_EXTEND:
12262 op = XEXP (op, 0);
12263 if (GET_CODE (op) != UNSPEC)
12264 return 0;
12265 /* FALLTHRU */
12266
12267 case UNSPEC:
12268 if (XINT (op, 1) == UNSPEC_TP
12269 && TARGET_TLS_DIRECT_SEG_REFS
12270 && seg == SEG_DEFAULT)
12271 seg = DEFAULT_TLS_SEG_REG;
12272 else
12273 return 0;
12274 break;
12275
12276 case SUBREG:
12277 if (!REG_P (SUBREG_REG (op)))
12278 return 0;
12279 /* FALLTHRU */
12280
12281 case REG:
12282 if (!base)
12283 base = op;
12284 else if (!index)
12285 index = op;
12286 else
12287 return 0;
12288 break;
12289
12290 case CONST:
12291 case CONST_INT:
12292 case SYMBOL_REF:
12293 case LABEL_REF:
12294 if (disp)
12295 return 0;
12296 disp = op;
12297 break;
12298
12299 default:
12300 return 0;
12301 }
12302 }
12303 }
12304 else if (GET_CODE (addr) == MULT)
12305 {
12306 index = XEXP (addr, 0); /* index*scale */
12307 scale_rtx = XEXP (addr, 1);
12308 }
12309 else if (GET_CODE (addr) == ASHIFT)
12310 {
12311 /* We're called for lea too, which implements ashift on occasion. */
12312 index = XEXP (addr, 0);
12313 tmp = XEXP (addr, 1);
12314 if (!CONST_INT_P (tmp))
12315 return 0;
12316 scale = INTVAL (tmp);
12317 if ((unsigned HOST_WIDE_INT) scale > 3)
12318 return 0;
12319 scale = 1 << scale;
12320 retval = -1;
12321 }
12322 else
12323 disp = addr; /* displacement */
12324
12325 if (index)
12326 {
12327 if (REG_P (index))
12328 ;
12329 else if (GET_CODE (index) == SUBREG
12330 && REG_P (SUBREG_REG (index)))
12331 ;
12332 else
12333 return 0;
12334 }
12335
12336 /* Extract the integral value of scale. */
12337 if (scale_rtx)
12338 {
12339 if (!CONST_INT_P (scale_rtx))
12340 return 0;
12341 scale = INTVAL (scale_rtx);
12342 }
12343
12344 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
12345 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
12346
12347 /* Avoid useless 0 displacement. */
12348 if (disp == const0_rtx && (base || index))
12349 disp = NULL_RTX;
12350
12351 /* Allow arg pointer and stack pointer as index if there is not scaling. */
12352 if (base_reg && index_reg && scale == 1
12353 && (index_reg == arg_pointer_rtx
12354 || index_reg == frame_pointer_rtx
12355 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
12356 {
12357 rtx tmp;
12358 tmp = base, base = index, index = tmp;
12359 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
12360 }
12361
12362 /* Special case: %ebp cannot be encoded as a base without a displacement.
12363 Similarly %r13. */
12364 if (!disp
12365 && base_reg
12366 && (base_reg == hard_frame_pointer_rtx
12367 || base_reg == frame_pointer_rtx
12368 || base_reg == arg_pointer_rtx
12369 || (REG_P (base_reg)
12370 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
12371 || REGNO (base_reg) == R13_REG))))
12372 disp = const0_rtx;
12373
12374 /* Special case: on K6, [%esi] makes the instruction vector decoded.
12375 Avoid this by transforming to [%esi+0].
12376 Reload calls address legitimization without cfun defined, so we need
12377 to test cfun for being non-NULL. */
12378 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
12379 && base_reg && !index_reg && !disp
12380 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
12381 disp = const0_rtx;
12382
12383 /* Special case: encode reg+reg instead of reg*2. */
12384 if (!base && index && scale == 2)
12385 base = index, base_reg = index_reg, scale = 1;
12386
12387 /* Special case: scaling cannot be encoded without base or displacement. */
12388 if (!base && !disp && index && scale != 1)
12389 disp = const0_rtx;
12390
12391 out->base = base;
12392 out->index = index;
12393 out->disp = disp;
12394 out->scale = scale;
12395 out->seg = seg;
12396
12397 return retval;
12398 }
12399 \f
12400 /* Return cost of the memory address x.
12401 For i386, it is better to use a complex address than let gcc copy
12402 the address into a reg and make a new pseudo. But not if the address
12403 requires to two regs - that would mean more pseudos with longer
12404 lifetimes. */
12405 static int
12406 ix86_address_cost (rtx x, enum machine_mode, addr_space_t, bool)
12407 {
12408 struct ix86_address parts;
12409 int cost = 1;
12410 int ok = ix86_decompose_address (x, &parts);
12411
12412 gcc_assert (ok);
12413
12414 if (parts.base && GET_CODE (parts.base) == SUBREG)
12415 parts.base = SUBREG_REG (parts.base);
12416 if (parts.index && GET_CODE (parts.index) == SUBREG)
12417 parts.index = SUBREG_REG (parts.index);
12418
12419 /* Attempt to minimize number of registers in the address. */
12420 if ((parts.base
12421 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
12422 || (parts.index
12423 && (!REG_P (parts.index)
12424 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
12425 cost++;
12426
12427 if (parts.base
12428 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
12429 && parts.index
12430 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
12431 && parts.base != parts.index)
12432 cost++;
12433
12434 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
12435 since it's predecode logic can't detect the length of instructions
12436 and it degenerates to vector decoded. Increase cost of such
12437 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
12438 to split such addresses or even refuse such addresses at all.
12439
12440 Following addressing modes are affected:
12441 [base+scale*index]
12442 [scale*index+disp]
12443 [base+index]
12444
12445 The first and last case may be avoidable by explicitly coding the zero in
12446 memory address, but I don't have AMD-K6 machine handy to check this
12447 theory. */
12448
12449 if (TARGET_K6
12450 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
12451 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
12452 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
12453 cost += 10;
12454
12455 return cost;
12456 }
12457 \f
12458 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
12459 this is used for to form addresses to local data when -fPIC is in
12460 use. */
12461
12462 static bool
12463 darwin_local_data_pic (rtx disp)
12464 {
12465 return (GET_CODE (disp) == UNSPEC
12466 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
12467 }
12468
12469 /* Determine if a given RTX is a valid constant. We already know this
12470 satisfies CONSTANT_P. */
12471
12472 static bool
12473 ix86_legitimate_constant_p (enum machine_mode, rtx x)
12474 {
12475 switch (GET_CODE (x))
12476 {
12477 case CONST:
12478 x = XEXP (x, 0);
12479
12480 if (GET_CODE (x) == PLUS)
12481 {
12482 if (!CONST_INT_P (XEXP (x, 1)))
12483 return false;
12484 x = XEXP (x, 0);
12485 }
12486
12487 if (TARGET_MACHO && darwin_local_data_pic (x))
12488 return true;
12489
12490 /* Only some unspecs are valid as "constants". */
12491 if (GET_CODE (x) == UNSPEC)
12492 switch (XINT (x, 1))
12493 {
12494 case UNSPEC_GOT:
12495 case UNSPEC_GOTOFF:
12496 case UNSPEC_PLTOFF:
12497 return TARGET_64BIT;
12498 case UNSPEC_TPOFF:
12499 case UNSPEC_NTPOFF:
12500 x = XVECEXP (x, 0, 0);
12501 return (GET_CODE (x) == SYMBOL_REF
12502 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
12503 case UNSPEC_DTPOFF:
12504 x = XVECEXP (x, 0, 0);
12505 return (GET_CODE (x) == SYMBOL_REF
12506 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
12507 default:
12508 return false;
12509 }
12510
12511 /* We must have drilled down to a symbol. */
12512 if (GET_CODE (x) == LABEL_REF)
12513 return true;
12514 if (GET_CODE (x) != SYMBOL_REF)
12515 return false;
12516 /* FALLTHRU */
12517
12518 case SYMBOL_REF:
12519 /* TLS symbols are never valid. */
12520 if (SYMBOL_REF_TLS_MODEL (x))
12521 return false;
12522
12523 /* DLLIMPORT symbols are never valid. */
12524 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12525 && SYMBOL_REF_DLLIMPORT_P (x))
12526 return false;
12527
12528 #if TARGET_MACHO
12529 /* mdynamic-no-pic */
12530 if (MACHO_DYNAMIC_NO_PIC_P)
12531 return machopic_symbol_defined_p (x);
12532 #endif
12533 break;
12534
12535 case CONST_DOUBLE:
12536 if (GET_MODE (x) == TImode
12537 && x != CONST0_RTX (TImode)
12538 && !TARGET_64BIT)
12539 return false;
12540 break;
12541
12542 case CONST_VECTOR:
12543 if (!standard_sse_constant_p (x))
12544 return false;
12545
12546 default:
12547 break;
12548 }
12549
12550 /* Otherwise we handle everything else in the move patterns. */
12551 return true;
12552 }
12553
12554 /* Determine if it's legal to put X into the constant pool. This
12555 is not possible for the address of thread-local symbols, which
12556 is checked above. */
12557
12558 static bool
12559 ix86_cannot_force_const_mem (enum machine_mode mode, rtx x)
12560 {
12561 /* We can always put integral constants and vectors in memory. */
12562 switch (GET_CODE (x))
12563 {
12564 case CONST_INT:
12565 case CONST_DOUBLE:
12566 case CONST_VECTOR:
12567 return false;
12568
12569 default:
12570 break;
12571 }
12572 return !ix86_legitimate_constant_p (mode, x);
12573 }
12574
12575 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
12576 otherwise zero. */
12577
12578 static bool
12579 is_imported_p (rtx x)
12580 {
12581 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
12582 || GET_CODE (x) != SYMBOL_REF)
12583 return false;
12584
12585 return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
12586 }
12587
12588
12589 /* Nonzero if the constant value X is a legitimate general operand
12590 when generating PIC code. It is given that flag_pic is on and
12591 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
12592
12593 bool
12594 legitimate_pic_operand_p (rtx x)
12595 {
12596 rtx inner;
12597
12598 switch (GET_CODE (x))
12599 {
12600 case CONST:
12601 inner = XEXP (x, 0);
12602 if (GET_CODE (inner) == PLUS
12603 && CONST_INT_P (XEXP (inner, 1)))
12604 inner = XEXP (inner, 0);
12605
12606 /* Only some unspecs are valid as "constants". */
12607 if (GET_CODE (inner) == UNSPEC)
12608 switch (XINT (inner, 1))
12609 {
12610 case UNSPEC_GOT:
12611 case UNSPEC_GOTOFF:
12612 case UNSPEC_PLTOFF:
12613 return TARGET_64BIT;
12614 case UNSPEC_TPOFF:
12615 x = XVECEXP (inner, 0, 0);
12616 return (GET_CODE (x) == SYMBOL_REF
12617 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
12618 case UNSPEC_MACHOPIC_OFFSET:
12619 return legitimate_pic_address_disp_p (x);
12620 default:
12621 return false;
12622 }
12623 /* FALLTHRU */
12624
12625 case SYMBOL_REF:
12626 case LABEL_REF:
12627 return legitimate_pic_address_disp_p (x);
12628
12629 default:
12630 return true;
12631 }
12632 }
12633
12634 /* Determine if a given CONST RTX is a valid memory displacement
12635 in PIC mode. */
12636
12637 bool
12638 legitimate_pic_address_disp_p (rtx disp)
12639 {
12640 bool saw_plus;
12641
12642 /* In 64bit mode we can allow direct addresses of symbols and labels
12643 when they are not dynamic symbols. */
12644 if (TARGET_64BIT)
12645 {
12646 rtx op0 = disp, op1;
12647
12648 switch (GET_CODE (disp))
12649 {
12650 case LABEL_REF:
12651 return true;
12652
12653 case CONST:
12654 if (GET_CODE (XEXP (disp, 0)) != PLUS)
12655 break;
12656 op0 = XEXP (XEXP (disp, 0), 0);
12657 op1 = XEXP (XEXP (disp, 0), 1);
12658 if (!CONST_INT_P (op1)
12659 || INTVAL (op1) >= 16*1024*1024
12660 || INTVAL (op1) < -16*1024*1024)
12661 break;
12662 if (GET_CODE (op0) == LABEL_REF)
12663 return true;
12664 if (GET_CODE (op0) == CONST
12665 && GET_CODE (XEXP (op0, 0)) == UNSPEC
12666 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
12667 return true;
12668 if (GET_CODE (op0) == UNSPEC
12669 && XINT (op0, 1) == UNSPEC_PCREL)
12670 return true;
12671 if (GET_CODE (op0) != SYMBOL_REF)
12672 break;
12673 /* FALLTHRU */
12674
12675 case SYMBOL_REF:
12676 /* TLS references should always be enclosed in UNSPEC.
12677 The dllimported symbol needs always to be resolved. */
12678 if (SYMBOL_REF_TLS_MODEL (op0)
12679 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
12680 return false;
12681
12682 if (TARGET_PECOFF)
12683 {
12684 if (is_imported_p (op0))
12685 return true;
12686
12687 if (SYMBOL_REF_FAR_ADDR_P (op0)
12688 || !SYMBOL_REF_LOCAL_P (op0))
12689 break;
12690
12691 /* Function-symbols need to be resolved only for
12692 large-model.
12693 For the small-model we don't need to resolve anything
12694 here. */
12695 if ((ix86_cmodel != CM_LARGE_PIC
12696 && SYMBOL_REF_FUNCTION_P (op0))
12697 || ix86_cmodel == CM_SMALL_PIC)
12698 return true;
12699 /* Non-external symbols don't need to be resolved for
12700 large, and medium-model. */
12701 if ((ix86_cmodel == CM_LARGE_PIC
12702 || ix86_cmodel == CM_MEDIUM_PIC)
12703 && !SYMBOL_REF_EXTERNAL_P (op0))
12704 return true;
12705 }
12706 else if (!SYMBOL_REF_FAR_ADDR_P (op0)
12707 && SYMBOL_REF_LOCAL_P (op0)
12708 && ix86_cmodel != CM_LARGE_PIC)
12709 return true;
12710 break;
12711
12712 default:
12713 break;
12714 }
12715 }
12716 if (GET_CODE (disp) != CONST)
12717 return false;
12718 disp = XEXP (disp, 0);
12719
12720 if (TARGET_64BIT)
12721 {
12722 /* We are unsafe to allow PLUS expressions. This limit allowed distance
12723 of GOT tables. We should not need these anyway. */
12724 if (GET_CODE (disp) != UNSPEC
12725 || (XINT (disp, 1) != UNSPEC_GOTPCREL
12726 && XINT (disp, 1) != UNSPEC_GOTOFF
12727 && XINT (disp, 1) != UNSPEC_PCREL
12728 && XINT (disp, 1) != UNSPEC_PLTOFF))
12729 return false;
12730
12731 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
12732 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
12733 return false;
12734 return true;
12735 }
12736
12737 saw_plus = false;
12738 if (GET_CODE (disp) == PLUS)
12739 {
12740 if (!CONST_INT_P (XEXP (disp, 1)))
12741 return false;
12742 disp = XEXP (disp, 0);
12743 saw_plus = true;
12744 }
12745
12746 if (TARGET_MACHO && darwin_local_data_pic (disp))
12747 return true;
12748
12749 if (GET_CODE (disp) != UNSPEC)
12750 return false;
12751
12752 switch (XINT (disp, 1))
12753 {
12754 case UNSPEC_GOT:
12755 if (saw_plus)
12756 return false;
12757 /* We need to check for both symbols and labels because VxWorks loads
12758 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
12759 details. */
12760 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
12761 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
12762 case UNSPEC_GOTOFF:
12763 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
12764 While ABI specify also 32bit relocation but we don't produce it in
12765 small PIC model at all. */
12766 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
12767 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
12768 && !TARGET_64BIT)
12769 return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
12770 return false;
12771 case UNSPEC_GOTTPOFF:
12772 case UNSPEC_GOTNTPOFF:
12773 case UNSPEC_INDNTPOFF:
12774 if (saw_plus)
12775 return false;
12776 disp = XVECEXP (disp, 0, 0);
12777 return (GET_CODE (disp) == SYMBOL_REF
12778 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
12779 case UNSPEC_NTPOFF:
12780 disp = XVECEXP (disp, 0, 0);
12781 return (GET_CODE (disp) == SYMBOL_REF
12782 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
12783 case UNSPEC_DTPOFF:
12784 disp = XVECEXP (disp, 0, 0);
12785 return (GET_CODE (disp) == SYMBOL_REF
12786 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
12787 }
12788
12789 return false;
12790 }
12791
12792 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
12793 replace the input X, or the original X if no replacement is called for.
12794 The output parameter *WIN is 1 if the calling macro should goto WIN,
12795 0 if it should not. */
12796
12797 bool
12798 ix86_legitimize_reload_address (rtx x, enum machine_mode, int opnum, int type,
12799 int)
12800 {
12801 /* Reload can generate:
12802
12803 (plus:DI (plus:DI (unspec:DI [(const_int 0 [0])] UNSPEC_TP)
12804 (reg:DI 97))
12805 (reg:DI 2 cx))
12806
12807 This RTX is rejected from ix86_legitimate_address_p due to
12808 non-strictness of base register 97. Following this rejection,
12809 reload pushes all three components into separate registers,
12810 creating invalid memory address RTX.
12811
12812 Following code reloads only the invalid part of the
12813 memory address RTX. */
12814
12815 if (GET_CODE (x) == PLUS
12816 && REG_P (XEXP (x, 1))
12817 && GET_CODE (XEXP (x, 0)) == PLUS
12818 && REG_P (XEXP (XEXP (x, 0), 1)))
12819 {
12820 rtx base, index;
12821 bool something_reloaded = false;
12822
12823 base = XEXP (XEXP (x, 0), 1);
12824 if (!REG_OK_FOR_BASE_STRICT_P (base))
12825 {
12826 push_reload (base, NULL_RTX, &XEXP (XEXP (x, 0), 1), NULL,
12827 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
12828 opnum, (enum reload_type) type);
12829 something_reloaded = true;
12830 }
12831
12832 index = XEXP (x, 1);
12833 if (!REG_OK_FOR_INDEX_STRICT_P (index))
12834 {
12835 push_reload (index, NULL_RTX, &XEXP (x, 1), NULL,
12836 INDEX_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
12837 opnum, (enum reload_type) type);
12838 something_reloaded = true;
12839 }
12840
12841 gcc_assert (something_reloaded);
12842 return true;
12843 }
12844
12845 return false;
12846 }
12847
12848 /* Determine if op is suitable RTX for an address register.
12849 Return naked register if a register or a register subreg is
12850 found, otherwise return NULL_RTX. */
12851
12852 static rtx
12853 ix86_validate_address_register (rtx op)
12854 {
12855 enum machine_mode mode = GET_MODE (op);
12856
12857 /* Only SImode or DImode registers can form the address. */
12858 if (mode != SImode && mode != DImode)
12859 return NULL_RTX;
12860
12861 if (REG_P (op))
12862 return op;
12863 else if (GET_CODE (op) == SUBREG)
12864 {
12865 rtx reg = SUBREG_REG (op);
12866
12867 if (!REG_P (reg))
12868 return NULL_RTX;
12869
12870 mode = GET_MODE (reg);
12871
12872 /* Don't allow SUBREGs that span more than a word. It can
12873 lead to spill failures when the register is one word out
12874 of a two word structure. */
12875 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
12876 return NULL_RTX;
12877
12878 /* Allow only SUBREGs of non-eliminable hard registers. */
12879 if (register_no_elim_operand (reg, mode))
12880 return reg;
12881 }
12882
12883 /* Op is not a register. */
12884 return NULL_RTX;
12885 }
12886
12887 /* Recognizes RTL expressions that are valid memory addresses for an
12888 instruction. The MODE argument is the machine mode for the MEM
12889 expression that wants to use this address.
12890
12891 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
12892 convert common non-canonical forms to canonical form so that they will
12893 be recognized. */
12894
12895 static bool
12896 ix86_legitimate_address_p (enum machine_mode, rtx addr, bool strict)
12897 {
12898 struct ix86_address parts;
12899 rtx base, index, disp;
12900 HOST_WIDE_INT scale;
12901 enum ix86_address_seg seg;
12902
12903 if (ix86_decompose_address (addr, &parts) <= 0)
12904 /* Decomposition failed. */
12905 return false;
12906
12907 base = parts.base;
12908 index = parts.index;
12909 disp = parts.disp;
12910 scale = parts.scale;
12911 seg = parts.seg;
12912
12913 /* Validate base register. */
12914 if (base)
12915 {
12916 rtx reg = ix86_validate_address_register (base);
12917
12918 if (reg == NULL_RTX)
12919 return false;
12920
12921 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
12922 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
12923 /* Base is not valid. */
12924 return false;
12925 }
12926
12927 /* Validate index register. */
12928 if (index)
12929 {
12930 rtx reg = ix86_validate_address_register (index);
12931
12932 if (reg == NULL_RTX)
12933 return false;
12934
12935 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
12936 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
12937 /* Index is not valid. */
12938 return false;
12939 }
12940
12941 /* Index and base should have the same mode. */
12942 if (base && index
12943 && GET_MODE (base) != GET_MODE (index))
12944 return false;
12945
12946 /* Address override works only on the (%reg) part of %fs:(%reg). */
12947 if (seg != SEG_DEFAULT
12948 && ((base && GET_MODE (base) != word_mode)
12949 || (index && GET_MODE (index) != word_mode)))
12950 return false;
12951
12952 /* Validate scale factor. */
12953 if (scale != 1)
12954 {
12955 if (!index)
12956 /* Scale without index. */
12957 return false;
12958
12959 if (scale != 2 && scale != 4 && scale != 8)
12960 /* Scale is not a valid multiplier. */
12961 return false;
12962 }
12963
12964 /* Validate displacement. */
12965 if (disp)
12966 {
12967 if (GET_CODE (disp) == CONST
12968 && GET_CODE (XEXP (disp, 0)) == UNSPEC
12969 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
12970 switch (XINT (XEXP (disp, 0), 1))
12971 {
12972 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
12973 used. While ABI specify also 32bit relocations, we don't produce
12974 them at all and use IP relative instead. */
12975 case UNSPEC_GOT:
12976 case UNSPEC_GOTOFF:
12977 gcc_assert (flag_pic);
12978 if (!TARGET_64BIT)
12979 goto is_legitimate_pic;
12980
12981 /* 64bit address unspec. */
12982 return false;
12983
12984 case UNSPEC_GOTPCREL:
12985 case UNSPEC_PCREL:
12986 gcc_assert (flag_pic);
12987 goto is_legitimate_pic;
12988
12989 case UNSPEC_GOTTPOFF:
12990 case UNSPEC_GOTNTPOFF:
12991 case UNSPEC_INDNTPOFF:
12992 case UNSPEC_NTPOFF:
12993 case UNSPEC_DTPOFF:
12994 break;
12995
12996 case UNSPEC_STACK_CHECK:
12997 gcc_assert (flag_split_stack);
12998 break;
12999
13000 default:
13001 /* Invalid address unspec. */
13002 return false;
13003 }
13004
13005 else if (SYMBOLIC_CONST (disp)
13006 && (flag_pic
13007 || (TARGET_MACHO
13008 #if TARGET_MACHO
13009 && MACHOPIC_INDIRECT
13010 && !machopic_operand_p (disp)
13011 #endif
13012 )))
13013 {
13014
13015 is_legitimate_pic:
13016 if (TARGET_64BIT && (index || base))
13017 {
13018 /* foo@dtpoff(%rX) is ok. */
13019 if (GET_CODE (disp) != CONST
13020 || GET_CODE (XEXP (disp, 0)) != PLUS
13021 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
13022 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
13023 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
13024 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
13025 /* Non-constant pic memory reference. */
13026 return false;
13027 }
13028 else if ((!TARGET_MACHO || flag_pic)
13029 && ! legitimate_pic_address_disp_p (disp))
13030 /* Displacement is an invalid pic construct. */
13031 return false;
13032 #if TARGET_MACHO
13033 else if (MACHO_DYNAMIC_NO_PIC_P
13034 && !ix86_legitimate_constant_p (Pmode, disp))
13035 /* displacment must be referenced via non_lazy_pointer */
13036 return false;
13037 #endif
13038
13039 /* This code used to verify that a symbolic pic displacement
13040 includes the pic_offset_table_rtx register.
13041
13042 While this is good idea, unfortunately these constructs may
13043 be created by "adds using lea" optimization for incorrect
13044 code like:
13045
13046 int a;
13047 int foo(int i)
13048 {
13049 return *(&a+i);
13050 }
13051
13052 This code is nonsensical, but results in addressing
13053 GOT table with pic_offset_table_rtx base. We can't
13054 just refuse it easily, since it gets matched by
13055 "addsi3" pattern, that later gets split to lea in the
13056 case output register differs from input. While this
13057 can be handled by separate addsi pattern for this case
13058 that never results in lea, this seems to be easier and
13059 correct fix for crash to disable this test. */
13060 }
13061 else if (GET_CODE (disp) != LABEL_REF
13062 && !CONST_INT_P (disp)
13063 && (GET_CODE (disp) != CONST
13064 || !ix86_legitimate_constant_p (Pmode, disp))
13065 && (GET_CODE (disp) != SYMBOL_REF
13066 || !ix86_legitimate_constant_p (Pmode, disp)))
13067 /* Displacement is not constant. */
13068 return false;
13069 else if (TARGET_64BIT
13070 && !x86_64_immediate_operand (disp, VOIDmode))
13071 /* Displacement is out of range. */
13072 return false;
13073 /* In x32 mode, constant addresses are sign extended to 64bit, so
13074 we have to prevent addresses from 0x80000000 to 0xffffffff. */
13075 else if (TARGET_X32 && !(index || base)
13076 && CONST_INT_P (disp)
13077 && val_signbit_known_set_p (SImode, INTVAL (disp)))
13078 return false;
13079 }
13080
13081 /* Everything looks valid. */
13082 return true;
13083 }
13084
13085 /* Determine if a given RTX is a valid constant address. */
13086
13087 bool
13088 constant_address_p (rtx x)
13089 {
13090 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
13091 }
13092 \f
13093 /* Return a unique alias set for the GOT. */
13094
13095 static alias_set_type
13096 ix86_GOT_alias_set (void)
13097 {
13098 static alias_set_type set = -1;
13099 if (set == -1)
13100 set = new_alias_set ();
13101 return set;
13102 }
13103
13104 /* Return a legitimate reference for ORIG (an address) using the
13105 register REG. If REG is 0, a new pseudo is generated.
13106
13107 There are two types of references that must be handled:
13108
13109 1. Global data references must load the address from the GOT, via
13110 the PIC reg. An insn is emitted to do this load, and the reg is
13111 returned.
13112
13113 2. Static data references, constant pool addresses, and code labels
13114 compute the address as an offset from the GOT, whose base is in
13115 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
13116 differentiate them from global data objects. The returned
13117 address is the PIC reg + an unspec constant.
13118
13119 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
13120 reg also appears in the address. */
13121
13122 static rtx
13123 legitimize_pic_address (rtx orig, rtx reg)
13124 {
13125 rtx addr = orig;
13126 rtx new_rtx = orig;
13127
13128 #if TARGET_MACHO
13129 if (TARGET_MACHO && !TARGET_64BIT)
13130 {
13131 if (reg == 0)
13132 reg = gen_reg_rtx (Pmode);
13133 /* Use the generic Mach-O PIC machinery. */
13134 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
13135 }
13136 #endif
13137
13138 if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13139 {
13140 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13141 if (tmp)
13142 return tmp;
13143 }
13144
13145 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
13146 new_rtx = addr;
13147 else if (TARGET_64BIT && !TARGET_PECOFF
13148 && ix86_cmodel != CM_SMALL_PIC && gotoff_operand (addr, Pmode))
13149 {
13150 rtx tmpreg;
13151 /* This symbol may be referenced via a displacement from the PIC
13152 base address (@GOTOFF). */
13153
13154 if (reload_in_progress)
13155 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13156 if (GET_CODE (addr) == CONST)
13157 addr = XEXP (addr, 0);
13158 if (GET_CODE (addr) == PLUS)
13159 {
13160 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13161 UNSPEC_GOTOFF);
13162 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13163 }
13164 else
13165 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13166 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13167 if (!reg)
13168 tmpreg = gen_reg_rtx (Pmode);
13169 else
13170 tmpreg = reg;
13171 emit_move_insn (tmpreg, new_rtx);
13172
13173 if (reg != 0)
13174 {
13175 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
13176 tmpreg, 1, OPTAB_DIRECT);
13177 new_rtx = reg;
13178 }
13179 else
13180 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
13181 }
13182 else if (!TARGET_64BIT && !TARGET_PECOFF && gotoff_operand (addr, Pmode))
13183 {
13184 /* This symbol may be referenced via a displacement from the PIC
13185 base address (@GOTOFF). */
13186
13187 if (reload_in_progress)
13188 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13189 if (GET_CODE (addr) == CONST)
13190 addr = XEXP (addr, 0);
13191 if (GET_CODE (addr) == PLUS)
13192 {
13193 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13194 UNSPEC_GOTOFF);
13195 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13196 }
13197 else
13198 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13199 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13200 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13201
13202 if (reg != 0)
13203 {
13204 emit_move_insn (reg, new_rtx);
13205 new_rtx = reg;
13206 }
13207 }
13208 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
13209 /* We can't use @GOTOFF for text labels on VxWorks;
13210 see gotoff_operand. */
13211 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
13212 {
13213 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13214 if (tmp)
13215 return tmp;
13216
13217 /* For x64 PE-COFF there is no GOT table. So we use address
13218 directly. */
13219 if (TARGET_64BIT && TARGET_PECOFF)
13220 {
13221 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
13222 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13223
13224 if (reg == 0)
13225 reg = gen_reg_rtx (Pmode);
13226 emit_move_insn (reg, new_rtx);
13227 new_rtx = reg;
13228 }
13229 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
13230 {
13231 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
13232 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13233 new_rtx = gen_const_mem (Pmode, new_rtx);
13234 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13235
13236 if (reg == 0)
13237 reg = gen_reg_rtx (Pmode);
13238 /* Use directly gen_movsi, otherwise the address is loaded
13239 into register for CSE. We don't want to CSE this addresses,
13240 instead we CSE addresses from the GOT table, so skip this. */
13241 emit_insn (gen_movsi (reg, new_rtx));
13242 new_rtx = reg;
13243 }
13244 else
13245 {
13246 /* This symbol must be referenced via a load from the
13247 Global Offset Table (@GOT). */
13248
13249 if (reload_in_progress)
13250 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13251 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
13252 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13253 if (TARGET_64BIT)
13254 new_rtx = force_reg (Pmode, new_rtx);
13255 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13256 new_rtx = gen_const_mem (Pmode, new_rtx);
13257 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13258
13259 if (reg == 0)
13260 reg = gen_reg_rtx (Pmode);
13261 emit_move_insn (reg, new_rtx);
13262 new_rtx = reg;
13263 }
13264 }
13265 else
13266 {
13267 if (CONST_INT_P (addr)
13268 && !x86_64_immediate_operand (addr, VOIDmode))
13269 {
13270 if (reg)
13271 {
13272 emit_move_insn (reg, addr);
13273 new_rtx = reg;
13274 }
13275 else
13276 new_rtx = force_reg (Pmode, addr);
13277 }
13278 else if (GET_CODE (addr) == CONST)
13279 {
13280 addr = XEXP (addr, 0);
13281
13282 /* We must match stuff we generate before. Assume the only
13283 unspecs that can get here are ours. Not that we could do
13284 anything with them anyway.... */
13285 if (GET_CODE (addr) == UNSPEC
13286 || (GET_CODE (addr) == PLUS
13287 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
13288 return orig;
13289 gcc_assert (GET_CODE (addr) == PLUS);
13290 }
13291 if (GET_CODE (addr) == PLUS)
13292 {
13293 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
13294
13295 /* Check first to see if this is a constant offset from a @GOTOFF
13296 symbol reference. */
13297 if (!TARGET_PECOFF && gotoff_operand (op0, Pmode)
13298 && CONST_INT_P (op1))
13299 {
13300 if (!TARGET_64BIT)
13301 {
13302 if (reload_in_progress)
13303 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13304 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
13305 UNSPEC_GOTOFF);
13306 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
13307 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13308 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13309
13310 if (reg != 0)
13311 {
13312 emit_move_insn (reg, new_rtx);
13313 new_rtx = reg;
13314 }
13315 }
13316 else
13317 {
13318 if (INTVAL (op1) < -16*1024*1024
13319 || INTVAL (op1) >= 16*1024*1024)
13320 {
13321 if (!x86_64_immediate_operand (op1, Pmode))
13322 op1 = force_reg (Pmode, op1);
13323 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
13324 }
13325 }
13326 }
13327 else
13328 {
13329 rtx base = legitimize_pic_address (op0, reg);
13330 enum machine_mode mode = GET_MODE (base);
13331 new_rtx
13332 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
13333
13334 if (CONST_INT_P (new_rtx))
13335 {
13336 if (INTVAL (new_rtx) < -16*1024*1024
13337 || INTVAL (new_rtx) >= 16*1024*1024)
13338 {
13339 if (!x86_64_immediate_operand (new_rtx, mode))
13340 new_rtx = force_reg (mode, new_rtx);
13341 new_rtx
13342 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
13343 }
13344 else
13345 new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
13346 }
13347 else
13348 {
13349 if (GET_CODE (new_rtx) == PLUS
13350 && CONSTANT_P (XEXP (new_rtx, 1)))
13351 {
13352 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
13353 new_rtx = XEXP (new_rtx, 1);
13354 }
13355 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
13356 }
13357 }
13358 }
13359 }
13360 return new_rtx;
13361 }
13362 \f
13363 /* Load the thread pointer. If TO_REG is true, force it into a register. */
13364
13365 static rtx
13366 get_thread_pointer (enum machine_mode tp_mode, bool to_reg)
13367 {
13368 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
13369
13370 if (GET_MODE (tp) != tp_mode)
13371 {
13372 gcc_assert (GET_MODE (tp) == SImode);
13373 gcc_assert (tp_mode == DImode);
13374
13375 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
13376 }
13377
13378 if (to_reg)
13379 tp = copy_to_mode_reg (tp_mode, tp);
13380
13381 return tp;
13382 }
13383
13384 /* Construct the SYMBOL_REF for the tls_get_addr function. */
13385
13386 static GTY(()) rtx ix86_tls_symbol;
13387
13388 static rtx
13389 ix86_tls_get_addr (void)
13390 {
13391 if (!ix86_tls_symbol)
13392 {
13393 const char *sym
13394 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
13395 ? "___tls_get_addr" : "__tls_get_addr");
13396
13397 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
13398 }
13399
13400 if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
13401 {
13402 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
13403 UNSPEC_PLTOFF);
13404 return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
13405 gen_rtx_CONST (Pmode, unspec));
13406 }
13407
13408 return ix86_tls_symbol;
13409 }
13410
13411 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13412
13413 static GTY(()) rtx ix86_tls_module_base_symbol;
13414
13415 rtx
13416 ix86_tls_module_base (void)
13417 {
13418 if (!ix86_tls_module_base_symbol)
13419 {
13420 ix86_tls_module_base_symbol
13421 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
13422
13423 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
13424 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
13425 }
13426
13427 return ix86_tls_module_base_symbol;
13428 }
13429
13430 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
13431 false if we expect this to be used for a memory address and true if
13432 we expect to load the address into a register. */
13433
13434 static rtx
13435 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
13436 {
13437 rtx dest, base, off;
13438 rtx pic = NULL_RTX, tp = NULL_RTX;
13439 enum machine_mode tp_mode = Pmode;
13440 int type;
13441
13442 /* Fall back to global dynamic model if tool chain cannot support local
13443 dynamic. */
13444 if (TARGET_SUN_TLS && !TARGET_64BIT
13445 && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
13446 && model == TLS_MODEL_LOCAL_DYNAMIC)
13447 model = TLS_MODEL_GLOBAL_DYNAMIC;
13448
13449 switch (model)
13450 {
13451 case TLS_MODEL_GLOBAL_DYNAMIC:
13452 dest = gen_reg_rtx (Pmode);
13453
13454 if (!TARGET_64BIT)
13455 {
13456 if (flag_pic && !TARGET_PECOFF)
13457 pic = pic_offset_table_rtx;
13458 else
13459 {
13460 pic = gen_reg_rtx (Pmode);
13461 emit_insn (gen_set_got (pic));
13462 }
13463 }
13464
13465 if (TARGET_GNU2_TLS)
13466 {
13467 if (TARGET_64BIT)
13468 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
13469 else
13470 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
13471
13472 tp = get_thread_pointer (Pmode, true);
13473 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
13474
13475 if (GET_MODE (x) != Pmode)
13476 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13477
13478 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
13479 }
13480 else
13481 {
13482 rtx caddr = ix86_tls_get_addr ();
13483
13484 if (TARGET_64BIT)
13485 {
13486 rtx rax = gen_rtx_REG (Pmode, AX_REG);
13487 rtx insns;
13488
13489 start_sequence ();
13490 emit_call_insn
13491 (ix86_gen_tls_global_dynamic_64 (rax, x, caddr));
13492 insns = get_insns ();
13493 end_sequence ();
13494
13495 if (GET_MODE (x) != Pmode)
13496 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13497
13498 RTL_CONST_CALL_P (insns) = 1;
13499 emit_libcall_block (insns, dest, rax, x);
13500 }
13501 else
13502 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
13503 }
13504 break;
13505
13506 case TLS_MODEL_LOCAL_DYNAMIC:
13507 base = gen_reg_rtx (Pmode);
13508
13509 if (!TARGET_64BIT)
13510 {
13511 if (flag_pic)
13512 pic = pic_offset_table_rtx;
13513 else
13514 {
13515 pic = gen_reg_rtx (Pmode);
13516 emit_insn (gen_set_got (pic));
13517 }
13518 }
13519
13520 if (TARGET_GNU2_TLS)
13521 {
13522 rtx tmp = ix86_tls_module_base ();
13523
13524 if (TARGET_64BIT)
13525 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
13526 else
13527 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
13528
13529 tp = get_thread_pointer (Pmode, true);
13530 set_unique_reg_note (get_last_insn (), REG_EQUAL,
13531 gen_rtx_MINUS (Pmode, tmp, tp));
13532 }
13533 else
13534 {
13535 rtx caddr = ix86_tls_get_addr ();
13536
13537 if (TARGET_64BIT)
13538 {
13539 rtx rax = gen_rtx_REG (Pmode, AX_REG);
13540 rtx insns, eqv;
13541
13542 start_sequence ();
13543 emit_call_insn
13544 (ix86_gen_tls_local_dynamic_base_64 (rax, caddr));
13545 insns = get_insns ();
13546 end_sequence ();
13547
13548 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
13549 share the LD_BASE result with other LD model accesses. */
13550 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
13551 UNSPEC_TLS_LD_BASE);
13552
13553 RTL_CONST_CALL_P (insns) = 1;
13554 emit_libcall_block (insns, base, rax, eqv);
13555 }
13556 else
13557 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
13558 }
13559
13560 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
13561 off = gen_rtx_CONST (Pmode, off);
13562
13563 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
13564
13565 if (TARGET_GNU2_TLS)
13566 {
13567 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
13568
13569 if (GET_MODE (x) != Pmode)
13570 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13571
13572 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
13573 }
13574 break;
13575
13576 case TLS_MODEL_INITIAL_EXEC:
13577 if (TARGET_64BIT)
13578 {
13579 if (TARGET_SUN_TLS && !TARGET_X32)
13580 {
13581 /* The Sun linker took the AMD64 TLS spec literally
13582 and can only handle %rax as destination of the
13583 initial executable code sequence. */
13584
13585 dest = gen_reg_rtx (DImode);
13586 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
13587 return dest;
13588 }
13589
13590 /* Generate DImode references to avoid %fs:(%reg32)
13591 problems and linker IE->LE relaxation bug. */
13592 tp_mode = DImode;
13593 pic = NULL;
13594 type = UNSPEC_GOTNTPOFF;
13595 }
13596 else if (flag_pic)
13597 {
13598 if (reload_in_progress)
13599 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13600 pic = pic_offset_table_rtx;
13601 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
13602 }
13603 else if (!TARGET_ANY_GNU_TLS)
13604 {
13605 pic = gen_reg_rtx (Pmode);
13606 emit_insn (gen_set_got (pic));
13607 type = UNSPEC_GOTTPOFF;
13608 }
13609 else
13610 {
13611 pic = NULL;
13612 type = UNSPEC_INDNTPOFF;
13613 }
13614
13615 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
13616 off = gen_rtx_CONST (tp_mode, off);
13617 if (pic)
13618 off = gen_rtx_PLUS (tp_mode, pic, off);
13619 off = gen_const_mem (tp_mode, off);
13620 set_mem_alias_set (off, ix86_GOT_alias_set ());
13621
13622 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13623 {
13624 base = get_thread_pointer (tp_mode,
13625 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
13626 off = force_reg (tp_mode, off);
13627 return gen_rtx_PLUS (tp_mode, base, off);
13628 }
13629 else
13630 {
13631 base = get_thread_pointer (Pmode, true);
13632 dest = gen_reg_rtx (Pmode);
13633 emit_insn (ix86_gen_sub3 (dest, base, off));
13634 }
13635 break;
13636
13637 case TLS_MODEL_LOCAL_EXEC:
13638 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
13639 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13640 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
13641 off = gen_rtx_CONST (Pmode, off);
13642
13643 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13644 {
13645 base = get_thread_pointer (Pmode,
13646 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
13647 return gen_rtx_PLUS (Pmode, base, off);
13648 }
13649 else
13650 {
13651 base = get_thread_pointer (Pmode, true);
13652 dest = gen_reg_rtx (Pmode);
13653 emit_insn (ix86_gen_sub3 (dest, base, off));
13654 }
13655 break;
13656
13657 default:
13658 gcc_unreachable ();
13659 }
13660
13661 return dest;
13662 }
13663
13664 /* Create or return the unique __imp_DECL dllimport symbol corresponding
13665 to symbol DECL if BEIMPORT is true. Otherwise create or return the
13666 unique refptr-DECL symbol corresponding to symbol DECL. */
13667
13668 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
13669 htab_t dllimport_map;
13670
13671 static tree
13672 get_dllimport_decl (tree decl, bool beimport)
13673 {
13674 struct tree_map *h, in;
13675 void **loc;
13676 const char *name;
13677 const char *prefix;
13678 size_t namelen, prefixlen;
13679 char *imp_name;
13680 tree to;
13681 rtx rtl;
13682
13683 if (!dllimport_map)
13684 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
13685
13686 in.hash = htab_hash_pointer (decl);
13687 in.base.from = decl;
13688 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
13689 h = (struct tree_map *) *loc;
13690 if (h)
13691 return h->to;
13692
13693 *loc = h = ggc_alloc<tree_map> ();
13694 h->hash = in.hash;
13695 h->base.from = decl;
13696 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
13697 VAR_DECL, NULL, ptr_type_node);
13698 DECL_ARTIFICIAL (to) = 1;
13699 DECL_IGNORED_P (to) = 1;
13700 DECL_EXTERNAL (to) = 1;
13701 TREE_READONLY (to) = 1;
13702
13703 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
13704 name = targetm.strip_name_encoding (name);
13705 if (beimport)
13706 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
13707 ? "*__imp_" : "*__imp__";
13708 else
13709 prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
13710 namelen = strlen (name);
13711 prefixlen = strlen (prefix);
13712 imp_name = (char *) alloca (namelen + prefixlen + 1);
13713 memcpy (imp_name, prefix, prefixlen);
13714 memcpy (imp_name + prefixlen, name, namelen + 1);
13715
13716 name = ggc_alloc_string (imp_name, namelen + prefixlen);
13717 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
13718 SET_SYMBOL_REF_DECL (rtl, to);
13719 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
13720 if (!beimport)
13721 {
13722 SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
13723 #ifdef SUB_TARGET_RECORD_STUB
13724 SUB_TARGET_RECORD_STUB (name);
13725 #endif
13726 }
13727
13728 rtl = gen_const_mem (Pmode, rtl);
13729 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
13730
13731 SET_DECL_RTL (to, rtl);
13732 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
13733
13734 return to;
13735 }
13736
13737 /* Expand SYMBOL into its corresponding far-addresse symbol.
13738 WANT_REG is true if we require the result be a register. */
13739
13740 static rtx
13741 legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
13742 {
13743 tree imp_decl;
13744 rtx x;
13745
13746 gcc_assert (SYMBOL_REF_DECL (symbol));
13747 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false);
13748
13749 x = DECL_RTL (imp_decl);
13750 if (want_reg)
13751 x = force_reg (Pmode, x);
13752 return x;
13753 }
13754
13755 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
13756 true if we require the result be a register. */
13757
13758 static rtx
13759 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
13760 {
13761 tree imp_decl;
13762 rtx x;
13763
13764 gcc_assert (SYMBOL_REF_DECL (symbol));
13765 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true);
13766
13767 x = DECL_RTL (imp_decl);
13768 if (want_reg)
13769 x = force_reg (Pmode, x);
13770 return x;
13771 }
13772
13773 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
13774 is true if we require the result be a register. */
13775
13776 static rtx
13777 legitimize_pe_coff_symbol (rtx addr, bool inreg)
13778 {
13779 if (!TARGET_PECOFF)
13780 return NULL_RTX;
13781
13782 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13783 {
13784 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
13785 return legitimize_dllimport_symbol (addr, inreg);
13786 if (GET_CODE (addr) == CONST
13787 && GET_CODE (XEXP (addr, 0)) == PLUS
13788 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
13789 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
13790 {
13791 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg);
13792 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
13793 }
13794 }
13795
13796 if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
13797 return NULL_RTX;
13798 if (GET_CODE (addr) == SYMBOL_REF
13799 && !is_imported_p (addr)
13800 && SYMBOL_REF_EXTERNAL_P (addr)
13801 && SYMBOL_REF_DECL (addr))
13802 return legitimize_pe_coff_extern_decl (addr, inreg);
13803
13804 if (GET_CODE (addr) == CONST
13805 && GET_CODE (XEXP (addr, 0)) == PLUS
13806 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
13807 && !is_imported_p (XEXP (XEXP (addr, 0), 0))
13808 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
13809 && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
13810 {
13811 rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg);
13812 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
13813 }
13814 return NULL_RTX;
13815 }
13816
13817 /* Try machine-dependent ways of modifying an illegitimate address
13818 to be legitimate. If we find one, return the new, valid address.
13819 This macro is used in only one place: `memory_address' in explow.c.
13820
13821 OLDX is the address as it was before break_out_memory_refs was called.
13822 In some cases it is useful to look at this to decide what needs to be done.
13823
13824 It is always safe for this macro to do nothing. It exists to recognize
13825 opportunities to optimize the output.
13826
13827 For the 80386, we handle X+REG by loading X into a register R and
13828 using R+REG. R will go in a general reg and indexing will be used.
13829 However, if REG is a broken-out memory address or multiplication,
13830 nothing needs to be done because REG can certainly go in a general reg.
13831
13832 When -fpic is used, special handling is needed for symbolic references.
13833 See comments by legitimize_pic_address in i386.c for details. */
13834
13835 static rtx
13836 ix86_legitimize_address (rtx x, rtx, enum machine_mode mode)
13837 {
13838 int changed = 0;
13839 unsigned log;
13840
13841 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
13842 if (log)
13843 return legitimize_tls_address (x, (enum tls_model) log, false);
13844 if (GET_CODE (x) == CONST
13845 && GET_CODE (XEXP (x, 0)) == PLUS
13846 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
13847 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
13848 {
13849 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
13850 (enum tls_model) log, false);
13851 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
13852 }
13853
13854 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13855 {
13856 rtx tmp = legitimize_pe_coff_symbol (x, true);
13857 if (tmp)
13858 return tmp;
13859 }
13860
13861 if (flag_pic && SYMBOLIC_CONST (x))
13862 return legitimize_pic_address (x, 0);
13863
13864 #if TARGET_MACHO
13865 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
13866 return machopic_indirect_data_reference (x, 0);
13867 #endif
13868
13869 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
13870 if (GET_CODE (x) == ASHIFT
13871 && CONST_INT_P (XEXP (x, 1))
13872 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
13873 {
13874 changed = 1;
13875 log = INTVAL (XEXP (x, 1));
13876 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
13877 GEN_INT (1 << log));
13878 }
13879
13880 if (GET_CODE (x) == PLUS)
13881 {
13882 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
13883
13884 if (GET_CODE (XEXP (x, 0)) == ASHIFT
13885 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
13886 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
13887 {
13888 changed = 1;
13889 log = INTVAL (XEXP (XEXP (x, 0), 1));
13890 XEXP (x, 0) = gen_rtx_MULT (Pmode,
13891 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
13892 GEN_INT (1 << log));
13893 }
13894
13895 if (GET_CODE (XEXP (x, 1)) == ASHIFT
13896 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
13897 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
13898 {
13899 changed = 1;
13900 log = INTVAL (XEXP (XEXP (x, 1), 1));
13901 XEXP (x, 1) = gen_rtx_MULT (Pmode,
13902 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
13903 GEN_INT (1 << log));
13904 }
13905
13906 /* Put multiply first if it isn't already. */
13907 if (GET_CODE (XEXP (x, 1)) == MULT)
13908 {
13909 rtx tmp = XEXP (x, 0);
13910 XEXP (x, 0) = XEXP (x, 1);
13911 XEXP (x, 1) = tmp;
13912 changed = 1;
13913 }
13914
13915 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
13916 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
13917 created by virtual register instantiation, register elimination, and
13918 similar optimizations. */
13919 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
13920 {
13921 changed = 1;
13922 x = gen_rtx_PLUS (Pmode,
13923 gen_rtx_PLUS (Pmode, XEXP (x, 0),
13924 XEXP (XEXP (x, 1), 0)),
13925 XEXP (XEXP (x, 1), 1));
13926 }
13927
13928 /* Canonicalize
13929 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
13930 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
13931 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
13932 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
13933 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
13934 && CONSTANT_P (XEXP (x, 1)))
13935 {
13936 rtx constant;
13937 rtx other = NULL_RTX;
13938
13939 if (CONST_INT_P (XEXP (x, 1)))
13940 {
13941 constant = XEXP (x, 1);
13942 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
13943 }
13944 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
13945 {
13946 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
13947 other = XEXP (x, 1);
13948 }
13949 else
13950 constant = 0;
13951
13952 if (constant)
13953 {
13954 changed = 1;
13955 x = gen_rtx_PLUS (Pmode,
13956 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
13957 XEXP (XEXP (XEXP (x, 0), 1), 0)),
13958 plus_constant (Pmode, other,
13959 INTVAL (constant)));
13960 }
13961 }
13962
13963 if (changed && ix86_legitimate_address_p (mode, x, false))
13964 return x;
13965
13966 if (GET_CODE (XEXP (x, 0)) == MULT)
13967 {
13968 changed = 1;
13969 XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
13970 }
13971
13972 if (GET_CODE (XEXP (x, 1)) == MULT)
13973 {
13974 changed = 1;
13975 XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
13976 }
13977
13978 if (changed
13979 && REG_P (XEXP (x, 1))
13980 && REG_P (XEXP (x, 0)))
13981 return x;
13982
13983 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
13984 {
13985 changed = 1;
13986 x = legitimize_pic_address (x, 0);
13987 }
13988
13989 if (changed && ix86_legitimate_address_p (mode, x, false))
13990 return x;
13991
13992 if (REG_P (XEXP (x, 0)))
13993 {
13994 rtx temp = gen_reg_rtx (Pmode);
13995 rtx val = force_operand (XEXP (x, 1), temp);
13996 if (val != temp)
13997 {
13998 val = convert_to_mode (Pmode, val, 1);
13999 emit_move_insn (temp, val);
14000 }
14001
14002 XEXP (x, 1) = temp;
14003 return x;
14004 }
14005
14006 else if (REG_P (XEXP (x, 1)))
14007 {
14008 rtx temp = gen_reg_rtx (Pmode);
14009 rtx val = force_operand (XEXP (x, 0), temp);
14010 if (val != temp)
14011 {
14012 val = convert_to_mode (Pmode, val, 1);
14013 emit_move_insn (temp, val);
14014 }
14015
14016 XEXP (x, 0) = temp;
14017 return x;
14018 }
14019 }
14020
14021 return x;
14022 }
14023 \f
14024 /* Print an integer constant expression in assembler syntax. Addition
14025 and subtraction are the only arithmetic that may appear in these
14026 expressions. FILE is the stdio stream to write to, X is the rtx, and
14027 CODE is the operand print code from the output string. */
14028
14029 static void
14030 output_pic_addr_const (FILE *file, rtx x, int code)
14031 {
14032 char buf[256];
14033
14034 switch (GET_CODE (x))
14035 {
14036 case PC:
14037 gcc_assert (flag_pic);
14038 putc ('.', file);
14039 break;
14040
14041 case SYMBOL_REF:
14042 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
14043 output_addr_const (file, x);
14044 else
14045 {
14046 const char *name = XSTR (x, 0);
14047
14048 /* Mark the decl as referenced so that cgraph will
14049 output the function. */
14050 if (SYMBOL_REF_DECL (x))
14051 mark_decl_referenced (SYMBOL_REF_DECL (x));
14052
14053 #if TARGET_MACHO
14054 if (MACHOPIC_INDIRECT
14055 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
14056 name = machopic_indirection_name (x, /*stub_p=*/true);
14057 #endif
14058 assemble_name (file, name);
14059 }
14060 if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
14061 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
14062 fputs ("@PLT", file);
14063 break;
14064
14065 case LABEL_REF:
14066 x = XEXP (x, 0);
14067 /* FALLTHRU */
14068 case CODE_LABEL:
14069 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
14070 assemble_name (asm_out_file, buf);
14071 break;
14072
14073 case CONST_INT:
14074 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14075 break;
14076
14077 case CONST:
14078 /* This used to output parentheses around the expression,
14079 but that does not work on the 386 (either ATT or BSD assembler). */
14080 output_pic_addr_const (file, XEXP (x, 0), code);
14081 break;
14082
14083 case CONST_DOUBLE:
14084 if (GET_MODE (x) == VOIDmode)
14085 {
14086 /* We can use %d if the number is <32 bits and positive. */
14087 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
14088 fprintf (file, "0x%lx%08lx",
14089 (unsigned long) CONST_DOUBLE_HIGH (x),
14090 (unsigned long) CONST_DOUBLE_LOW (x));
14091 else
14092 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
14093 }
14094 else
14095 /* We can't handle floating point constants;
14096 TARGET_PRINT_OPERAND must handle them. */
14097 output_operand_lossage ("floating constant misused");
14098 break;
14099
14100 case PLUS:
14101 /* Some assemblers need integer constants to appear first. */
14102 if (CONST_INT_P (XEXP (x, 0)))
14103 {
14104 output_pic_addr_const (file, XEXP (x, 0), code);
14105 putc ('+', file);
14106 output_pic_addr_const (file, XEXP (x, 1), code);
14107 }
14108 else
14109 {
14110 gcc_assert (CONST_INT_P (XEXP (x, 1)));
14111 output_pic_addr_const (file, XEXP (x, 1), code);
14112 putc ('+', file);
14113 output_pic_addr_const (file, XEXP (x, 0), code);
14114 }
14115 break;
14116
14117 case MINUS:
14118 if (!TARGET_MACHO)
14119 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
14120 output_pic_addr_const (file, XEXP (x, 0), code);
14121 putc ('-', file);
14122 output_pic_addr_const (file, XEXP (x, 1), code);
14123 if (!TARGET_MACHO)
14124 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
14125 break;
14126
14127 case UNSPEC:
14128 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
14129 {
14130 bool f = i386_asm_output_addr_const_extra (file, x);
14131 gcc_assert (f);
14132 break;
14133 }
14134
14135 gcc_assert (XVECLEN (x, 0) == 1);
14136 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
14137 switch (XINT (x, 1))
14138 {
14139 case UNSPEC_GOT:
14140 fputs ("@GOT", file);
14141 break;
14142 case UNSPEC_GOTOFF:
14143 fputs ("@GOTOFF", file);
14144 break;
14145 case UNSPEC_PLTOFF:
14146 fputs ("@PLTOFF", file);
14147 break;
14148 case UNSPEC_PCREL:
14149 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14150 "(%rip)" : "[rip]", file);
14151 break;
14152 case UNSPEC_GOTPCREL:
14153 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14154 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
14155 break;
14156 case UNSPEC_GOTTPOFF:
14157 /* FIXME: This might be @TPOFF in Sun ld too. */
14158 fputs ("@gottpoff", file);
14159 break;
14160 case UNSPEC_TPOFF:
14161 fputs ("@tpoff", file);
14162 break;
14163 case UNSPEC_NTPOFF:
14164 if (TARGET_64BIT)
14165 fputs ("@tpoff", file);
14166 else
14167 fputs ("@ntpoff", file);
14168 break;
14169 case UNSPEC_DTPOFF:
14170 fputs ("@dtpoff", file);
14171 break;
14172 case UNSPEC_GOTNTPOFF:
14173 if (TARGET_64BIT)
14174 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14175 "@gottpoff(%rip)": "@gottpoff[rip]", file);
14176 else
14177 fputs ("@gotntpoff", file);
14178 break;
14179 case UNSPEC_INDNTPOFF:
14180 fputs ("@indntpoff", file);
14181 break;
14182 #if TARGET_MACHO
14183 case UNSPEC_MACHOPIC_OFFSET:
14184 putc ('-', file);
14185 machopic_output_function_base_name (file);
14186 break;
14187 #endif
14188 default:
14189 output_operand_lossage ("invalid UNSPEC as operand");
14190 break;
14191 }
14192 break;
14193
14194 default:
14195 output_operand_lossage ("invalid expression as operand");
14196 }
14197 }
14198
14199 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
14200 We need to emit DTP-relative relocations. */
14201
14202 static void ATTRIBUTE_UNUSED
14203 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
14204 {
14205 fputs (ASM_LONG, file);
14206 output_addr_const (file, x);
14207 fputs ("@dtpoff", file);
14208 switch (size)
14209 {
14210 case 4:
14211 break;
14212 case 8:
14213 fputs (", 0", file);
14214 break;
14215 default:
14216 gcc_unreachable ();
14217 }
14218 }
14219
14220 /* Return true if X is a representation of the PIC register. This copes
14221 with calls from ix86_find_base_term, where the register might have
14222 been replaced by a cselib value. */
14223
14224 static bool
14225 ix86_pic_register_p (rtx x)
14226 {
14227 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
14228 return (pic_offset_table_rtx
14229 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
14230 else
14231 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
14232 }
14233
14234 /* Helper function for ix86_delegitimize_address.
14235 Attempt to delegitimize TLS local-exec accesses. */
14236
14237 static rtx
14238 ix86_delegitimize_tls_address (rtx orig_x)
14239 {
14240 rtx x = orig_x, unspec;
14241 struct ix86_address addr;
14242
14243 if (!TARGET_TLS_DIRECT_SEG_REFS)
14244 return orig_x;
14245 if (MEM_P (x))
14246 x = XEXP (x, 0);
14247 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
14248 return orig_x;
14249 if (ix86_decompose_address (x, &addr) == 0
14250 || addr.seg != DEFAULT_TLS_SEG_REG
14251 || addr.disp == NULL_RTX
14252 || GET_CODE (addr.disp) != CONST)
14253 return orig_x;
14254 unspec = XEXP (addr.disp, 0);
14255 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
14256 unspec = XEXP (unspec, 0);
14257 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
14258 return orig_x;
14259 x = XVECEXP (unspec, 0, 0);
14260 gcc_assert (GET_CODE (x) == SYMBOL_REF);
14261 if (unspec != XEXP (addr.disp, 0))
14262 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
14263 if (addr.index)
14264 {
14265 rtx idx = addr.index;
14266 if (addr.scale != 1)
14267 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
14268 x = gen_rtx_PLUS (Pmode, idx, x);
14269 }
14270 if (addr.base)
14271 x = gen_rtx_PLUS (Pmode, addr.base, x);
14272 if (MEM_P (orig_x))
14273 x = replace_equiv_address_nv (orig_x, x);
14274 return x;
14275 }
14276
14277 /* In the name of slightly smaller debug output, and to cater to
14278 general assembler lossage, recognize PIC+GOTOFF and turn it back
14279 into a direct symbol reference.
14280
14281 On Darwin, this is necessary to avoid a crash, because Darwin
14282 has a different PIC label for each routine but the DWARF debugging
14283 information is not associated with any particular routine, so it's
14284 necessary to remove references to the PIC label from RTL stored by
14285 the DWARF output code. */
14286
14287 static rtx
14288 ix86_delegitimize_address (rtx x)
14289 {
14290 rtx orig_x = delegitimize_mem_from_attrs (x);
14291 /* addend is NULL or some rtx if x is something+GOTOFF where
14292 something doesn't include the PIC register. */
14293 rtx addend = NULL_RTX;
14294 /* reg_addend is NULL or a multiple of some register. */
14295 rtx reg_addend = NULL_RTX;
14296 /* const_addend is NULL or a const_int. */
14297 rtx const_addend = NULL_RTX;
14298 /* This is the result, or NULL. */
14299 rtx result = NULL_RTX;
14300
14301 x = orig_x;
14302
14303 if (MEM_P (x))
14304 x = XEXP (x, 0);
14305
14306 if (TARGET_64BIT)
14307 {
14308 if (GET_CODE (x) == CONST
14309 && GET_CODE (XEXP (x, 0)) == PLUS
14310 && GET_MODE (XEXP (x, 0)) == Pmode
14311 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
14312 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
14313 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
14314 {
14315 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
14316 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
14317 if (MEM_P (orig_x))
14318 x = replace_equiv_address_nv (orig_x, x);
14319 return x;
14320 }
14321
14322 if (GET_CODE (x) == CONST
14323 && GET_CODE (XEXP (x, 0)) == UNSPEC
14324 && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
14325 || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
14326 && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
14327 {
14328 x = XVECEXP (XEXP (x, 0), 0, 0);
14329 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
14330 {
14331 x = simplify_gen_subreg (GET_MODE (orig_x), x,
14332 GET_MODE (x), 0);
14333 if (x == NULL_RTX)
14334 return orig_x;
14335 }
14336 return x;
14337 }
14338
14339 if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
14340 return ix86_delegitimize_tls_address (orig_x);
14341
14342 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
14343 and -mcmodel=medium -fpic. */
14344 }
14345
14346 if (GET_CODE (x) != PLUS
14347 || GET_CODE (XEXP (x, 1)) != CONST)
14348 return ix86_delegitimize_tls_address (orig_x);
14349
14350 if (ix86_pic_register_p (XEXP (x, 0)))
14351 /* %ebx + GOT/GOTOFF */
14352 ;
14353 else if (GET_CODE (XEXP (x, 0)) == PLUS)
14354 {
14355 /* %ebx + %reg * scale + GOT/GOTOFF */
14356 reg_addend = XEXP (x, 0);
14357 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
14358 reg_addend = XEXP (reg_addend, 1);
14359 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
14360 reg_addend = XEXP (reg_addend, 0);
14361 else
14362 {
14363 reg_addend = NULL_RTX;
14364 addend = XEXP (x, 0);
14365 }
14366 }
14367 else
14368 addend = XEXP (x, 0);
14369
14370 x = XEXP (XEXP (x, 1), 0);
14371 if (GET_CODE (x) == PLUS
14372 && CONST_INT_P (XEXP (x, 1)))
14373 {
14374 const_addend = XEXP (x, 1);
14375 x = XEXP (x, 0);
14376 }
14377
14378 if (GET_CODE (x) == UNSPEC
14379 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
14380 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
14381 || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
14382 && !MEM_P (orig_x) && !addend)))
14383 result = XVECEXP (x, 0, 0);
14384
14385 if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
14386 && !MEM_P (orig_x))
14387 result = XVECEXP (x, 0, 0);
14388
14389 if (! result)
14390 return ix86_delegitimize_tls_address (orig_x);
14391
14392 if (const_addend)
14393 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
14394 if (reg_addend)
14395 result = gen_rtx_PLUS (Pmode, reg_addend, result);
14396 if (addend)
14397 {
14398 /* If the rest of original X doesn't involve the PIC register, add
14399 addend and subtract pic_offset_table_rtx. This can happen e.g.
14400 for code like:
14401 leal (%ebx, %ecx, 4), %ecx
14402 ...
14403 movl foo@GOTOFF(%ecx), %edx
14404 in which case we return (%ecx - %ebx) + foo. */
14405 if (pic_offset_table_rtx)
14406 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
14407 pic_offset_table_rtx),
14408 result);
14409 else
14410 return orig_x;
14411 }
14412 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
14413 {
14414 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
14415 if (result == NULL_RTX)
14416 return orig_x;
14417 }
14418 return result;
14419 }
14420
14421 /* If X is a machine specific address (i.e. a symbol or label being
14422 referenced as a displacement from the GOT implemented using an
14423 UNSPEC), then return the base term. Otherwise return X. */
14424
14425 rtx
14426 ix86_find_base_term (rtx x)
14427 {
14428 rtx term;
14429
14430 if (TARGET_64BIT)
14431 {
14432 if (GET_CODE (x) != CONST)
14433 return x;
14434 term = XEXP (x, 0);
14435 if (GET_CODE (term) == PLUS
14436 && (CONST_INT_P (XEXP (term, 1))
14437 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
14438 term = XEXP (term, 0);
14439 if (GET_CODE (term) != UNSPEC
14440 || (XINT (term, 1) != UNSPEC_GOTPCREL
14441 && XINT (term, 1) != UNSPEC_PCREL))
14442 return x;
14443
14444 return XVECEXP (term, 0, 0);
14445 }
14446
14447 return ix86_delegitimize_address (x);
14448 }
14449 \f
14450 static void
14451 put_condition_code (enum rtx_code code, enum machine_mode mode, bool reverse,
14452 bool fp, FILE *file)
14453 {
14454 const char *suffix;
14455
14456 if (mode == CCFPmode || mode == CCFPUmode)
14457 {
14458 code = ix86_fp_compare_code_to_integer (code);
14459 mode = CCmode;
14460 }
14461 if (reverse)
14462 code = reverse_condition (code);
14463
14464 switch (code)
14465 {
14466 case EQ:
14467 switch (mode)
14468 {
14469 case CCAmode:
14470 suffix = "a";
14471 break;
14472
14473 case CCCmode:
14474 suffix = "c";
14475 break;
14476
14477 case CCOmode:
14478 suffix = "o";
14479 break;
14480
14481 case CCSmode:
14482 suffix = "s";
14483 break;
14484
14485 default:
14486 suffix = "e";
14487 }
14488 break;
14489 case NE:
14490 switch (mode)
14491 {
14492 case CCAmode:
14493 suffix = "na";
14494 break;
14495
14496 case CCCmode:
14497 suffix = "nc";
14498 break;
14499
14500 case CCOmode:
14501 suffix = "no";
14502 break;
14503
14504 case CCSmode:
14505 suffix = "ns";
14506 break;
14507
14508 default:
14509 suffix = "ne";
14510 }
14511 break;
14512 case GT:
14513 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
14514 suffix = "g";
14515 break;
14516 case GTU:
14517 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
14518 Those same assemblers have the same but opposite lossage on cmov. */
14519 if (mode == CCmode)
14520 suffix = fp ? "nbe" : "a";
14521 else
14522 gcc_unreachable ();
14523 break;
14524 case LT:
14525 switch (mode)
14526 {
14527 case CCNOmode:
14528 case CCGOCmode:
14529 suffix = "s";
14530 break;
14531
14532 case CCmode:
14533 case CCGCmode:
14534 suffix = "l";
14535 break;
14536
14537 default:
14538 gcc_unreachable ();
14539 }
14540 break;
14541 case LTU:
14542 if (mode == CCmode)
14543 suffix = "b";
14544 else if (mode == CCCmode)
14545 suffix = "c";
14546 else
14547 gcc_unreachable ();
14548 break;
14549 case GE:
14550 switch (mode)
14551 {
14552 case CCNOmode:
14553 case CCGOCmode:
14554 suffix = "ns";
14555 break;
14556
14557 case CCmode:
14558 case CCGCmode:
14559 suffix = "ge";
14560 break;
14561
14562 default:
14563 gcc_unreachable ();
14564 }
14565 break;
14566 case GEU:
14567 if (mode == CCmode)
14568 suffix = fp ? "nb" : "ae";
14569 else if (mode == CCCmode)
14570 suffix = "nc";
14571 else
14572 gcc_unreachable ();
14573 break;
14574 case LE:
14575 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
14576 suffix = "le";
14577 break;
14578 case LEU:
14579 if (mode == CCmode)
14580 suffix = "be";
14581 else
14582 gcc_unreachable ();
14583 break;
14584 case UNORDERED:
14585 suffix = fp ? "u" : "p";
14586 break;
14587 case ORDERED:
14588 suffix = fp ? "nu" : "np";
14589 break;
14590 default:
14591 gcc_unreachable ();
14592 }
14593 fputs (suffix, file);
14594 }
14595
14596 /* Print the name of register X to FILE based on its machine mode and number.
14597 If CODE is 'w', pretend the mode is HImode.
14598 If CODE is 'b', pretend the mode is QImode.
14599 If CODE is 'k', pretend the mode is SImode.
14600 If CODE is 'q', pretend the mode is DImode.
14601 If CODE is 'x', pretend the mode is V4SFmode.
14602 If CODE is 't', pretend the mode is V8SFmode.
14603 If CODE is 'g', pretend the mode is V16SFmode.
14604 If CODE is 'h', pretend the reg is the 'high' byte register.
14605 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
14606 If CODE is 'd', duplicate the operand for AVX instruction.
14607 */
14608
14609 void
14610 print_reg (rtx x, int code, FILE *file)
14611 {
14612 const char *reg;
14613 unsigned int regno;
14614 bool duplicated = code == 'd' && TARGET_AVX;
14615
14616 if (ASSEMBLER_DIALECT == ASM_ATT)
14617 putc ('%', file);
14618
14619 if (x == pc_rtx)
14620 {
14621 gcc_assert (TARGET_64BIT);
14622 fputs ("rip", file);
14623 return;
14624 }
14625
14626 regno = true_regnum (x);
14627 gcc_assert (regno != ARG_POINTER_REGNUM
14628 && regno != FRAME_POINTER_REGNUM
14629 && regno != FLAGS_REG
14630 && regno != FPSR_REG
14631 && regno != FPCR_REG);
14632
14633 if (code == 'w' || MMX_REG_P (x))
14634 code = 2;
14635 else if (code == 'b')
14636 code = 1;
14637 else if (code == 'k')
14638 code = 4;
14639 else if (code == 'q')
14640 code = 8;
14641 else if (code == 'y')
14642 code = 3;
14643 else if (code == 'h')
14644 code = 0;
14645 else if (code == 'x')
14646 code = 16;
14647 else if (code == 't')
14648 code = 32;
14649 else if (code == 'g')
14650 code = 64;
14651 else
14652 code = GET_MODE_SIZE (GET_MODE (x));
14653
14654 /* Irritatingly, AMD extended registers use different naming convention
14655 from the normal registers: "r%d[bwd]" */
14656 if (REX_INT_REGNO_P (regno))
14657 {
14658 gcc_assert (TARGET_64BIT);
14659 putc ('r', file);
14660 fprint_ul (file, regno - FIRST_REX_INT_REG + 8);
14661 switch (code)
14662 {
14663 case 0:
14664 error ("extended registers have no high halves");
14665 break;
14666 case 1:
14667 putc ('b', file);
14668 break;
14669 case 2:
14670 putc ('w', file);
14671 break;
14672 case 4:
14673 putc ('d', file);
14674 break;
14675 case 8:
14676 /* no suffix */
14677 break;
14678 default:
14679 error ("unsupported operand size for extended register");
14680 break;
14681 }
14682 return;
14683 }
14684
14685 reg = NULL;
14686 switch (code)
14687 {
14688 case 3:
14689 if (STACK_TOP_P (x))
14690 {
14691 reg = "st(0)";
14692 break;
14693 }
14694 /* FALLTHRU */
14695 case 8:
14696 case 4:
14697 case 12:
14698 if (! ANY_FP_REG_P (x))
14699 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
14700 /* FALLTHRU */
14701 case 16:
14702 case 2:
14703 normal:
14704 reg = hi_reg_name[regno];
14705 break;
14706 case 1:
14707 if (regno >= ARRAY_SIZE (qi_reg_name))
14708 goto normal;
14709 reg = qi_reg_name[regno];
14710 break;
14711 case 0:
14712 if (regno >= ARRAY_SIZE (qi_high_reg_name))
14713 goto normal;
14714 reg = qi_high_reg_name[regno];
14715 break;
14716 case 32:
14717 if (SSE_REG_P (x))
14718 {
14719 gcc_assert (!duplicated);
14720 putc ('y', file);
14721 fputs (hi_reg_name[regno] + 1, file);
14722 return;
14723 }
14724 case 64:
14725 if (SSE_REG_P (x))
14726 {
14727 gcc_assert (!duplicated);
14728 putc ('z', file);
14729 fputs (hi_reg_name[REGNO (x)] + 1, file);
14730 return;
14731 }
14732 break;
14733 default:
14734 gcc_unreachable ();
14735 }
14736
14737 fputs (reg, file);
14738 if (duplicated)
14739 {
14740 if (ASSEMBLER_DIALECT == ASM_ATT)
14741 fprintf (file, ", %%%s", reg);
14742 else
14743 fprintf (file, ", %s", reg);
14744 }
14745 }
14746
14747 /* Locate some local-dynamic symbol still in use by this function
14748 so that we can print its name in some tls_local_dynamic_base
14749 pattern. */
14750
14751 static int
14752 get_some_local_dynamic_name_1 (rtx *px, void *)
14753 {
14754 rtx x = *px;
14755
14756 if (GET_CODE (x) == SYMBOL_REF
14757 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
14758 {
14759 cfun->machine->some_ld_name = XSTR (x, 0);
14760 return 1;
14761 }
14762
14763 return 0;
14764 }
14765
14766 static const char *
14767 get_some_local_dynamic_name (void)
14768 {
14769 rtx insn;
14770
14771 if (cfun->machine->some_ld_name)
14772 return cfun->machine->some_ld_name;
14773
14774 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
14775 if (NONDEBUG_INSN_P (insn)
14776 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
14777 return cfun->machine->some_ld_name;
14778
14779 return NULL;
14780 }
14781
14782 /* Meaning of CODE:
14783 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
14784 C -- print opcode suffix for set/cmov insn.
14785 c -- like C, but print reversed condition
14786 F,f -- likewise, but for floating-point.
14787 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
14788 otherwise nothing
14789 R -- print embeded rounding and sae.
14790 r -- print only sae.
14791 z -- print the opcode suffix for the size of the current operand.
14792 Z -- likewise, with special suffixes for x87 instructions.
14793 * -- print a star (in certain assembler syntax)
14794 A -- print an absolute memory reference.
14795 E -- print address with DImode register names if TARGET_64BIT.
14796 w -- print the operand as if it's a "word" (HImode) even if it isn't.
14797 s -- print a shift double count, followed by the assemblers argument
14798 delimiter.
14799 b -- print the QImode name of the register for the indicated operand.
14800 %b0 would print %al if operands[0] is reg 0.
14801 w -- likewise, print the HImode name of the register.
14802 k -- likewise, print the SImode name of the register.
14803 q -- likewise, print the DImode name of the register.
14804 x -- likewise, print the V4SFmode name of the register.
14805 t -- likewise, print the V8SFmode name of the register.
14806 g -- likewise, print the V16SFmode name of the register.
14807 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
14808 y -- print "st(0)" instead of "st" as a register.
14809 d -- print duplicated register operand for AVX instruction.
14810 D -- print condition for SSE cmp instruction.
14811 P -- if PIC, print an @PLT suffix.
14812 p -- print raw symbol name.
14813 X -- don't print any sort of PIC '@' suffix for a symbol.
14814 & -- print some in-use local-dynamic symbol name.
14815 H -- print a memory address offset by 8; used for sse high-parts
14816 Y -- print condition for XOP pcom* instruction.
14817 + -- print a branch hint as 'cs' or 'ds' prefix
14818 ; -- print a semicolon (after prefixes due to bug in older gas).
14819 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
14820 @ -- print a segment register of thread base pointer load
14821 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
14822 */
14823
14824 void
14825 ix86_print_operand (FILE *file, rtx x, int code)
14826 {
14827 if (code)
14828 {
14829 switch (code)
14830 {
14831 case 'A':
14832 switch (ASSEMBLER_DIALECT)
14833 {
14834 case ASM_ATT:
14835 putc ('*', file);
14836 break;
14837
14838 case ASM_INTEL:
14839 /* Intel syntax. For absolute addresses, registers should not
14840 be surrounded by braces. */
14841 if (!REG_P (x))
14842 {
14843 putc ('[', file);
14844 ix86_print_operand (file, x, 0);
14845 putc (']', file);
14846 return;
14847 }
14848 break;
14849
14850 default:
14851 gcc_unreachable ();
14852 }
14853
14854 ix86_print_operand (file, x, 0);
14855 return;
14856
14857 case 'E':
14858 /* Wrap address in an UNSPEC to declare special handling. */
14859 if (TARGET_64BIT)
14860 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
14861
14862 output_address (x);
14863 return;
14864
14865 case 'L':
14866 if (ASSEMBLER_DIALECT == ASM_ATT)
14867 putc ('l', file);
14868 return;
14869
14870 case 'W':
14871 if (ASSEMBLER_DIALECT == ASM_ATT)
14872 putc ('w', file);
14873 return;
14874
14875 case 'B':
14876 if (ASSEMBLER_DIALECT == ASM_ATT)
14877 putc ('b', file);
14878 return;
14879
14880 case 'Q':
14881 if (ASSEMBLER_DIALECT == ASM_ATT)
14882 putc ('l', file);
14883 return;
14884
14885 case 'S':
14886 if (ASSEMBLER_DIALECT == ASM_ATT)
14887 putc ('s', file);
14888 return;
14889
14890 case 'T':
14891 if (ASSEMBLER_DIALECT == ASM_ATT)
14892 putc ('t', file);
14893 return;
14894
14895 case 'O':
14896 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14897 if (ASSEMBLER_DIALECT != ASM_ATT)
14898 return;
14899
14900 switch (GET_MODE_SIZE (GET_MODE (x)))
14901 {
14902 case 2:
14903 putc ('w', file);
14904 break;
14905
14906 case 4:
14907 putc ('l', file);
14908 break;
14909
14910 case 8:
14911 putc ('q', file);
14912 break;
14913
14914 default:
14915 output_operand_lossage
14916 ("invalid operand size for operand code 'O'");
14917 return;
14918 }
14919
14920 putc ('.', file);
14921 #endif
14922 return;
14923
14924 case 'z':
14925 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
14926 {
14927 /* Opcodes don't get size suffixes if using Intel opcodes. */
14928 if (ASSEMBLER_DIALECT == ASM_INTEL)
14929 return;
14930
14931 switch (GET_MODE_SIZE (GET_MODE (x)))
14932 {
14933 case 1:
14934 putc ('b', file);
14935 return;
14936
14937 case 2:
14938 putc ('w', file);
14939 return;
14940
14941 case 4:
14942 putc ('l', file);
14943 return;
14944
14945 case 8:
14946 putc ('q', file);
14947 return;
14948
14949 default:
14950 output_operand_lossage
14951 ("invalid operand size for operand code 'z'");
14952 return;
14953 }
14954 }
14955
14956 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14957 warning
14958 (0, "non-integer operand used with operand code 'z'");
14959 /* FALLTHRU */
14960
14961 case 'Z':
14962 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
14963 if (ASSEMBLER_DIALECT == ASM_INTEL)
14964 return;
14965
14966 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
14967 {
14968 switch (GET_MODE_SIZE (GET_MODE (x)))
14969 {
14970 case 2:
14971 #ifdef HAVE_AS_IX86_FILDS
14972 putc ('s', file);
14973 #endif
14974 return;
14975
14976 case 4:
14977 putc ('l', file);
14978 return;
14979
14980 case 8:
14981 #ifdef HAVE_AS_IX86_FILDQ
14982 putc ('q', file);
14983 #else
14984 fputs ("ll", file);
14985 #endif
14986 return;
14987
14988 default:
14989 break;
14990 }
14991 }
14992 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14993 {
14994 /* 387 opcodes don't get size suffixes
14995 if the operands are registers. */
14996 if (STACK_REG_P (x))
14997 return;
14998
14999 switch (GET_MODE_SIZE (GET_MODE (x)))
15000 {
15001 case 4:
15002 putc ('s', file);
15003 return;
15004
15005 case 8:
15006 putc ('l', file);
15007 return;
15008
15009 case 12:
15010 case 16:
15011 putc ('t', file);
15012 return;
15013
15014 default:
15015 break;
15016 }
15017 }
15018 else
15019 {
15020 output_operand_lossage
15021 ("invalid operand type used with operand code 'Z'");
15022 return;
15023 }
15024
15025 output_operand_lossage
15026 ("invalid operand size for operand code 'Z'");
15027 return;
15028
15029 case 'd':
15030 case 'b':
15031 case 'w':
15032 case 'k':
15033 case 'q':
15034 case 'h':
15035 case 't':
15036 case 'g':
15037 case 'y':
15038 case 'x':
15039 case 'X':
15040 case 'P':
15041 case 'p':
15042 break;
15043
15044 case 's':
15045 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
15046 {
15047 ix86_print_operand (file, x, 0);
15048 fputs (", ", file);
15049 }
15050 return;
15051
15052 case 'Y':
15053 switch (GET_CODE (x))
15054 {
15055 case NE:
15056 fputs ("neq", file);
15057 break;
15058 case EQ:
15059 fputs ("eq", file);
15060 break;
15061 case GE:
15062 case GEU:
15063 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
15064 break;
15065 case GT:
15066 case GTU:
15067 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
15068 break;
15069 case LE:
15070 case LEU:
15071 fputs ("le", file);
15072 break;
15073 case LT:
15074 case LTU:
15075 fputs ("lt", file);
15076 break;
15077 case UNORDERED:
15078 fputs ("unord", file);
15079 break;
15080 case ORDERED:
15081 fputs ("ord", file);
15082 break;
15083 case UNEQ:
15084 fputs ("ueq", file);
15085 break;
15086 case UNGE:
15087 fputs ("nlt", file);
15088 break;
15089 case UNGT:
15090 fputs ("nle", file);
15091 break;
15092 case UNLE:
15093 fputs ("ule", file);
15094 break;
15095 case UNLT:
15096 fputs ("ult", file);
15097 break;
15098 case LTGT:
15099 fputs ("une", file);
15100 break;
15101 default:
15102 output_operand_lossage ("operand is not a condition code, "
15103 "invalid operand code 'Y'");
15104 return;
15105 }
15106 return;
15107
15108 case 'D':
15109 /* Little bit of braindamage here. The SSE compare instructions
15110 does use completely different names for the comparisons that the
15111 fp conditional moves. */
15112 switch (GET_CODE (x))
15113 {
15114 case UNEQ:
15115 if (TARGET_AVX)
15116 {
15117 fputs ("eq_us", file);
15118 break;
15119 }
15120 case EQ:
15121 fputs ("eq", file);
15122 break;
15123 case UNLT:
15124 if (TARGET_AVX)
15125 {
15126 fputs ("nge", file);
15127 break;
15128 }
15129 case LT:
15130 fputs ("lt", file);
15131 break;
15132 case UNLE:
15133 if (TARGET_AVX)
15134 {
15135 fputs ("ngt", file);
15136 break;
15137 }
15138 case LE:
15139 fputs ("le", file);
15140 break;
15141 case UNORDERED:
15142 fputs ("unord", file);
15143 break;
15144 case LTGT:
15145 if (TARGET_AVX)
15146 {
15147 fputs ("neq_oq", file);
15148 break;
15149 }
15150 case NE:
15151 fputs ("neq", file);
15152 break;
15153 case GE:
15154 if (TARGET_AVX)
15155 {
15156 fputs ("ge", file);
15157 break;
15158 }
15159 case UNGE:
15160 fputs ("nlt", file);
15161 break;
15162 case GT:
15163 if (TARGET_AVX)
15164 {
15165 fputs ("gt", file);
15166 break;
15167 }
15168 case UNGT:
15169 fputs ("nle", file);
15170 break;
15171 case ORDERED:
15172 fputs ("ord", file);
15173 break;
15174 default:
15175 output_operand_lossage ("operand is not a condition code, "
15176 "invalid operand code 'D'");
15177 return;
15178 }
15179 return;
15180
15181 case 'F':
15182 case 'f':
15183 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15184 if (ASSEMBLER_DIALECT == ASM_ATT)
15185 putc ('.', file);
15186 #endif
15187
15188 case 'C':
15189 case 'c':
15190 if (!COMPARISON_P (x))
15191 {
15192 output_operand_lossage ("operand is not a condition code, "
15193 "invalid operand code '%c'", code);
15194 return;
15195 }
15196 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
15197 code == 'c' || code == 'f',
15198 code == 'F' || code == 'f',
15199 file);
15200 return;
15201
15202 case 'H':
15203 if (!offsettable_memref_p (x))
15204 {
15205 output_operand_lossage ("operand is not an offsettable memory "
15206 "reference, invalid operand code 'H'");
15207 return;
15208 }
15209 /* It doesn't actually matter what mode we use here, as we're
15210 only going to use this for printing. */
15211 x = adjust_address_nv (x, DImode, 8);
15212 /* Output 'qword ptr' for intel assembler dialect. */
15213 if (ASSEMBLER_DIALECT == ASM_INTEL)
15214 code = 'q';
15215 break;
15216
15217 case 'K':
15218 gcc_assert (CONST_INT_P (x));
15219
15220 if (INTVAL (x) & IX86_HLE_ACQUIRE)
15221 #ifdef HAVE_AS_IX86_HLE
15222 fputs ("xacquire ", file);
15223 #else
15224 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
15225 #endif
15226 else if (INTVAL (x) & IX86_HLE_RELEASE)
15227 #ifdef HAVE_AS_IX86_HLE
15228 fputs ("xrelease ", file);
15229 #else
15230 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
15231 #endif
15232 /* We do not want to print value of the operand. */
15233 return;
15234
15235 case 'N':
15236 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
15237 fputs ("{z}", file);
15238 return;
15239
15240 case 'r':
15241 gcc_assert (CONST_INT_P (x));
15242 gcc_assert (INTVAL (x) == ROUND_SAE);
15243
15244 if (ASSEMBLER_DIALECT == ASM_INTEL)
15245 fputs (", ", file);
15246
15247 fputs ("{sae}", file);
15248
15249 if (ASSEMBLER_DIALECT == ASM_ATT)
15250 fputs (", ", file);
15251
15252 return;
15253
15254 case 'R':
15255 gcc_assert (CONST_INT_P (x));
15256
15257 if (ASSEMBLER_DIALECT == ASM_INTEL)
15258 fputs (", ", file);
15259
15260 switch (INTVAL (x))
15261 {
15262 case ROUND_NEAREST_INT | ROUND_SAE:
15263 fputs ("{rn-sae}", file);
15264 break;
15265 case ROUND_NEG_INF | ROUND_SAE:
15266 fputs ("{rd-sae}", file);
15267 break;
15268 case ROUND_POS_INF | ROUND_SAE:
15269 fputs ("{ru-sae}", file);
15270 break;
15271 case ROUND_ZERO | ROUND_SAE:
15272 fputs ("{rz-sae}", file);
15273 break;
15274 default:
15275 gcc_unreachable ();
15276 }
15277
15278 if (ASSEMBLER_DIALECT == ASM_ATT)
15279 fputs (", ", file);
15280
15281 return;
15282
15283 case '*':
15284 if (ASSEMBLER_DIALECT == ASM_ATT)
15285 putc ('*', file);
15286 return;
15287
15288 case '&':
15289 {
15290 const char *name = get_some_local_dynamic_name ();
15291 if (name == NULL)
15292 output_operand_lossage ("'%%&' used without any "
15293 "local dynamic TLS references");
15294 else
15295 assemble_name (file, name);
15296 return;
15297 }
15298
15299 case '+':
15300 {
15301 rtx x;
15302
15303 if (!optimize
15304 || optimize_function_for_size_p (cfun)
15305 || !TARGET_BRANCH_PREDICTION_HINTS)
15306 return;
15307
15308 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
15309 if (x)
15310 {
15311 int pred_val = XINT (x, 0);
15312
15313 if (pred_val < REG_BR_PROB_BASE * 45 / 100
15314 || pred_val > REG_BR_PROB_BASE * 55 / 100)
15315 {
15316 bool taken = pred_val > REG_BR_PROB_BASE / 2;
15317 bool cputaken
15318 = final_forward_branch_p (current_output_insn) == 0;
15319
15320 /* Emit hints only in the case default branch prediction
15321 heuristics would fail. */
15322 if (taken != cputaken)
15323 {
15324 /* We use 3e (DS) prefix for taken branches and
15325 2e (CS) prefix for not taken branches. */
15326 if (taken)
15327 fputs ("ds ; ", file);
15328 else
15329 fputs ("cs ; ", file);
15330 }
15331 }
15332 }
15333 return;
15334 }
15335
15336 case ';':
15337 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
15338 putc (';', file);
15339 #endif
15340 return;
15341
15342 case '@':
15343 if (ASSEMBLER_DIALECT == ASM_ATT)
15344 putc ('%', file);
15345
15346 /* The kernel uses a different segment register for performance
15347 reasons; a system call would not have to trash the userspace
15348 segment register, which would be expensive. */
15349 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
15350 fputs ("fs", file);
15351 else
15352 fputs ("gs", file);
15353 return;
15354
15355 case '~':
15356 putc (TARGET_AVX2 ? 'i' : 'f', file);
15357 return;
15358
15359 case '^':
15360 if (TARGET_64BIT && Pmode != word_mode)
15361 fputs ("addr32 ", file);
15362 return;
15363
15364 default:
15365 output_operand_lossage ("invalid operand code '%c'", code);
15366 }
15367 }
15368
15369 if (REG_P (x))
15370 print_reg (x, code, file);
15371
15372 else if (MEM_P (x))
15373 {
15374 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
15375 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
15376 && GET_MODE (x) != BLKmode)
15377 {
15378 const char * size;
15379 switch (GET_MODE_SIZE (GET_MODE (x)))
15380 {
15381 case 1: size = "BYTE"; break;
15382 case 2: size = "WORD"; break;
15383 case 4: size = "DWORD"; break;
15384 case 8: size = "QWORD"; break;
15385 case 12: size = "TBYTE"; break;
15386 case 16:
15387 if (GET_MODE (x) == XFmode)
15388 size = "TBYTE";
15389 else
15390 size = "XMMWORD";
15391 break;
15392 case 32: size = "YMMWORD"; break;
15393 case 64: size = "ZMMWORD"; break;
15394 default:
15395 gcc_unreachable ();
15396 }
15397
15398 /* Check for explicit size override (codes 'b', 'w', 'k',
15399 'q' and 'x') */
15400 if (code == 'b')
15401 size = "BYTE";
15402 else if (code == 'w')
15403 size = "WORD";
15404 else if (code == 'k')
15405 size = "DWORD";
15406 else if (code == 'q')
15407 size = "QWORD";
15408 else if (code == 'x')
15409 size = "XMMWORD";
15410
15411 fputs (size, file);
15412 fputs (" PTR ", file);
15413 }
15414
15415 x = XEXP (x, 0);
15416 /* Avoid (%rip) for call operands. */
15417 if (CONSTANT_ADDRESS_P (x) && code == 'P'
15418 && !CONST_INT_P (x))
15419 output_addr_const (file, x);
15420 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
15421 output_operand_lossage ("invalid constraints for operand");
15422 else
15423 output_address (x);
15424 }
15425
15426 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
15427 {
15428 REAL_VALUE_TYPE r;
15429 long l;
15430
15431 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15432 REAL_VALUE_TO_TARGET_SINGLE (r, l);
15433
15434 if (ASSEMBLER_DIALECT == ASM_ATT)
15435 putc ('$', file);
15436 /* Sign extend 32bit SFmode immediate to 8 bytes. */
15437 if (code == 'q')
15438 fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
15439 (unsigned long long) (int) l);
15440 else
15441 fprintf (file, "0x%08x", (unsigned int) l);
15442 }
15443
15444 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
15445 {
15446 REAL_VALUE_TYPE r;
15447 long l[2];
15448
15449 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15450 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
15451
15452 if (ASSEMBLER_DIALECT == ASM_ATT)
15453 putc ('$', file);
15454 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
15455 }
15456
15457 /* These float cases don't actually occur as immediate operands. */
15458 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
15459 {
15460 char dstr[30];
15461
15462 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
15463 fputs (dstr, file);
15464 }
15465
15466 else
15467 {
15468 /* We have patterns that allow zero sets of memory, for instance.
15469 In 64-bit mode, we should probably support all 8-byte vectors,
15470 since we can in fact encode that into an immediate. */
15471 if (GET_CODE (x) == CONST_VECTOR)
15472 {
15473 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
15474 x = const0_rtx;
15475 }
15476
15477 if (code != 'P' && code != 'p')
15478 {
15479 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
15480 {
15481 if (ASSEMBLER_DIALECT == ASM_ATT)
15482 putc ('$', file);
15483 }
15484 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
15485 || GET_CODE (x) == LABEL_REF)
15486 {
15487 if (ASSEMBLER_DIALECT == ASM_ATT)
15488 putc ('$', file);
15489 else
15490 fputs ("OFFSET FLAT:", file);
15491 }
15492 }
15493 if (CONST_INT_P (x))
15494 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
15495 else if (flag_pic || MACHOPIC_INDIRECT)
15496 output_pic_addr_const (file, x, code);
15497 else
15498 output_addr_const (file, x);
15499 }
15500 }
15501
15502 static bool
15503 ix86_print_operand_punct_valid_p (unsigned char code)
15504 {
15505 return (code == '@' || code == '*' || code == '+' || code == '&'
15506 || code == ';' || code == '~' || code == '^');
15507 }
15508 \f
15509 /* Print a memory operand whose address is ADDR. */
15510
15511 static void
15512 ix86_print_operand_address (FILE *file, rtx addr)
15513 {
15514 struct ix86_address parts;
15515 rtx base, index, disp;
15516 int scale;
15517 int ok;
15518 bool vsib = false;
15519 int code = 0;
15520
15521 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
15522 {
15523 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15524 gcc_assert (parts.index == NULL_RTX);
15525 parts.index = XVECEXP (addr, 0, 1);
15526 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
15527 addr = XVECEXP (addr, 0, 0);
15528 vsib = true;
15529 }
15530 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
15531 {
15532 gcc_assert (TARGET_64BIT);
15533 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15534 code = 'q';
15535 }
15536 else
15537 ok = ix86_decompose_address (addr, &parts);
15538
15539 gcc_assert (ok);
15540
15541 base = parts.base;
15542 index = parts.index;
15543 disp = parts.disp;
15544 scale = parts.scale;
15545
15546 switch (parts.seg)
15547 {
15548 case SEG_DEFAULT:
15549 break;
15550 case SEG_FS:
15551 case SEG_GS:
15552 if (ASSEMBLER_DIALECT == ASM_ATT)
15553 putc ('%', file);
15554 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
15555 break;
15556 default:
15557 gcc_unreachable ();
15558 }
15559
15560 /* Use one byte shorter RIP relative addressing for 64bit mode. */
15561 if (TARGET_64BIT && !base && !index)
15562 {
15563 rtx symbol = disp;
15564
15565 if (GET_CODE (disp) == CONST
15566 && GET_CODE (XEXP (disp, 0)) == PLUS
15567 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
15568 symbol = XEXP (XEXP (disp, 0), 0);
15569
15570 if (GET_CODE (symbol) == LABEL_REF
15571 || (GET_CODE (symbol) == SYMBOL_REF
15572 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
15573 base = pc_rtx;
15574 }
15575 if (!base && !index)
15576 {
15577 /* Displacement only requires special attention. */
15578
15579 if (CONST_INT_P (disp))
15580 {
15581 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
15582 fputs ("ds:", file);
15583 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
15584 }
15585 else if (flag_pic)
15586 output_pic_addr_const (file, disp, 0);
15587 else
15588 output_addr_const (file, disp);
15589 }
15590 else
15591 {
15592 /* Print SImode register names to force addr32 prefix. */
15593 if (SImode_address_operand (addr, VOIDmode))
15594 {
15595 #ifdef ENABLE_CHECKING
15596 gcc_assert (TARGET_64BIT);
15597 switch (GET_CODE (addr))
15598 {
15599 case SUBREG:
15600 gcc_assert (GET_MODE (addr) == SImode);
15601 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
15602 break;
15603 case ZERO_EXTEND:
15604 case AND:
15605 gcc_assert (GET_MODE (addr) == DImode);
15606 break;
15607 default:
15608 gcc_unreachable ();
15609 }
15610 #endif
15611 gcc_assert (!code);
15612 code = 'k';
15613 }
15614 else if (code == 0
15615 && TARGET_X32
15616 && disp
15617 && CONST_INT_P (disp)
15618 && INTVAL (disp) < -16*1024*1024)
15619 {
15620 /* X32 runs in 64-bit mode, where displacement, DISP, in
15621 address DISP(%r64), is encoded as 32-bit immediate sign-
15622 extended from 32-bit to 64-bit. For -0x40000300(%r64),
15623 address is %r64 + 0xffffffffbffffd00. When %r64 <
15624 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
15625 which is invalid for x32. The correct address is %r64
15626 - 0x40000300 == 0xf7ffdd64. To properly encode
15627 -0x40000300(%r64) for x32, we zero-extend negative
15628 displacement by forcing addr32 prefix which truncates
15629 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
15630 zero-extend all negative displacements, including -1(%rsp).
15631 However, for small negative displacements, sign-extension
15632 won't cause overflow. We only zero-extend negative
15633 displacements if they < -16*1024*1024, which is also used
15634 to check legitimate address displacements for PIC. */
15635 code = 'k';
15636 }
15637
15638 if (ASSEMBLER_DIALECT == ASM_ATT)
15639 {
15640 if (disp)
15641 {
15642 if (flag_pic)
15643 output_pic_addr_const (file, disp, 0);
15644 else if (GET_CODE (disp) == LABEL_REF)
15645 output_asm_label (disp);
15646 else
15647 output_addr_const (file, disp);
15648 }
15649
15650 putc ('(', file);
15651 if (base)
15652 print_reg (base, code, file);
15653 if (index)
15654 {
15655 putc (',', file);
15656 print_reg (index, vsib ? 0 : code, file);
15657 if (scale != 1 || vsib)
15658 fprintf (file, ",%d", scale);
15659 }
15660 putc (')', file);
15661 }
15662 else
15663 {
15664 rtx offset = NULL_RTX;
15665
15666 if (disp)
15667 {
15668 /* Pull out the offset of a symbol; print any symbol itself. */
15669 if (GET_CODE (disp) == CONST
15670 && GET_CODE (XEXP (disp, 0)) == PLUS
15671 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
15672 {
15673 offset = XEXP (XEXP (disp, 0), 1);
15674 disp = gen_rtx_CONST (VOIDmode,
15675 XEXP (XEXP (disp, 0), 0));
15676 }
15677
15678 if (flag_pic)
15679 output_pic_addr_const (file, disp, 0);
15680 else if (GET_CODE (disp) == LABEL_REF)
15681 output_asm_label (disp);
15682 else if (CONST_INT_P (disp))
15683 offset = disp;
15684 else
15685 output_addr_const (file, disp);
15686 }
15687
15688 putc ('[', file);
15689 if (base)
15690 {
15691 print_reg (base, code, file);
15692 if (offset)
15693 {
15694 if (INTVAL (offset) >= 0)
15695 putc ('+', file);
15696 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
15697 }
15698 }
15699 else if (offset)
15700 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
15701 else
15702 putc ('0', file);
15703
15704 if (index)
15705 {
15706 putc ('+', file);
15707 print_reg (index, vsib ? 0 : code, file);
15708 if (scale != 1 || vsib)
15709 fprintf (file, "*%d", scale);
15710 }
15711 putc (']', file);
15712 }
15713 }
15714 }
15715
15716 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
15717
15718 static bool
15719 i386_asm_output_addr_const_extra (FILE *file, rtx x)
15720 {
15721 rtx op;
15722
15723 if (GET_CODE (x) != UNSPEC)
15724 return false;
15725
15726 op = XVECEXP (x, 0, 0);
15727 switch (XINT (x, 1))
15728 {
15729 case UNSPEC_GOTTPOFF:
15730 output_addr_const (file, op);
15731 /* FIXME: This might be @TPOFF in Sun ld. */
15732 fputs ("@gottpoff", file);
15733 break;
15734 case UNSPEC_TPOFF:
15735 output_addr_const (file, op);
15736 fputs ("@tpoff", file);
15737 break;
15738 case UNSPEC_NTPOFF:
15739 output_addr_const (file, op);
15740 if (TARGET_64BIT)
15741 fputs ("@tpoff", file);
15742 else
15743 fputs ("@ntpoff", file);
15744 break;
15745 case UNSPEC_DTPOFF:
15746 output_addr_const (file, op);
15747 fputs ("@dtpoff", file);
15748 break;
15749 case UNSPEC_GOTNTPOFF:
15750 output_addr_const (file, op);
15751 if (TARGET_64BIT)
15752 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
15753 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
15754 else
15755 fputs ("@gotntpoff", file);
15756 break;
15757 case UNSPEC_INDNTPOFF:
15758 output_addr_const (file, op);
15759 fputs ("@indntpoff", file);
15760 break;
15761 #if TARGET_MACHO
15762 case UNSPEC_MACHOPIC_OFFSET:
15763 output_addr_const (file, op);
15764 putc ('-', file);
15765 machopic_output_function_base_name (file);
15766 break;
15767 #endif
15768
15769 case UNSPEC_STACK_CHECK:
15770 {
15771 int offset;
15772
15773 gcc_assert (flag_split_stack);
15774
15775 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
15776 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
15777 #else
15778 gcc_unreachable ();
15779 #endif
15780
15781 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
15782 }
15783 break;
15784
15785 default:
15786 return false;
15787 }
15788
15789 return true;
15790 }
15791 \f
15792 /* Split one or more double-mode RTL references into pairs of half-mode
15793 references. The RTL can be REG, offsettable MEM, integer constant, or
15794 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
15795 split and "num" is its length. lo_half and hi_half are output arrays
15796 that parallel "operands". */
15797
15798 void
15799 split_double_mode (enum machine_mode mode, rtx operands[],
15800 int num, rtx lo_half[], rtx hi_half[])
15801 {
15802 enum machine_mode half_mode;
15803 unsigned int byte;
15804
15805 switch (mode)
15806 {
15807 case TImode:
15808 half_mode = DImode;
15809 break;
15810 case DImode:
15811 half_mode = SImode;
15812 break;
15813 default:
15814 gcc_unreachable ();
15815 }
15816
15817 byte = GET_MODE_SIZE (half_mode);
15818
15819 while (num--)
15820 {
15821 rtx op = operands[num];
15822
15823 /* simplify_subreg refuse to split volatile memory addresses,
15824 but we still have to handle it. */
15825 if (MEM_P (op))
15826 {
15827 lo_half[num] = adjust_address (op, half_mode, 0);
15828 hi_half[num] = adjust_address (op, half_mode, byte);
15829 }
15830 else
15831 {
15832 lo_half[num] = simplify_gen_subreg (half_mode, op,
15833 GET_MODE (op) == VOIDmode
15834 ? mode : GET_MODE (op), 0);
15835 hi_half[num] = simplify_gen_subreg (half_mode, op,
15836 GET_MODE (op) == VOIDmode
15837 ? mode : GET_MODE (op), byte);
15838 }
15839 }
15840 }
15841 \f
15842 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
15843 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
15844 is the expression of the binary operation. The output may either be
15845 emitted here, or returned to the caller, like all output_* functions.
15846
15847 There is no guarantee that the operands are the same mode, as they
15848 might be within FLOAT or FLOAT_EXTEND expressions. */
15849
15850 #ifndef SYSV386_COMPAT
15851 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
15852 wants to fix the assemblers because that causes incompatibility
15853 with gcc. No-one wants to fix gcc because that causes
15854 incompatibility with assemblers... You can use the option of
15855 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
15856 #define SYSV386_COMPAT 1
15857 #endif
15858
15859 const char *
15860 output_387_binary_op (rtx insn, rtx *operands)
15861 {
15862 static char buf[40];
15863 const char *p;
15864 const char *ssep;
15865 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
15866
15867 #ifdef ENABLE_CHECKING
15868 /* Even if we do not want to check the inputs, this documents input
15869 constraints. Which helps in understanding the following code. */
15870 if (STACK_REG_P (operands[0])
15871 && ((REG_P (operands[1])
15872 && REGNO (operands[0]) == REGNO (operands[1])
15873 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
15874 || (REG_P (operands[2])
15875 && REGNO (operands[0]) == REGNO (operands[2])
15876 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
15877 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
15878 ; /* ok */
15879 else
15880 gcc_assert (is_sse);
15881 #endif
15882
15883 switch (GET_CODE (operands[3]))
15884 {
15885 case PLUS:
15886 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15887 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15888 p = "fiadd";
15889 else
15890 p = "fadd";
15891 ssep = "vadd";
15892 break;
15893
15894 case MINUS:
15895 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15896 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15897 p = "fisub";
15898 else
15899 p = "fsub";
15900 ssep = "vsub";
15901 break;
15902
15903 case MULT:
15904 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15905 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15906 p = "fimul";
15907 else
15908 p = "fmul";
15909 ssep = "vmul";
15910 break;
15911
15912 case DIV:
15913 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15914 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15915 p = "fidiv";
15916 else
15917 p = "fdiv";
15918 ssep = "vdiv";
15919 break;
15920
15921 default:
15922 gcc_unreachable ();
15923 }
15924
15925 if (is_sse)
15926 {
15927 if (TARGET_AVX)
15928 {
15929 strcpy (buf, ssep);
15930 if (GET_MODE (operands[0]) == SFmode)
15931 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
15932 else
15933 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
15934 }
15935 else
15936 {
15937 strcpy (buf, ssep + 1);
15938 if (GET_MODE (operands[0]) == SFmode)
15939 strcat (buf, "ss\t{%2, %0|%0, %2}");
15940 else
15941 strcat (buf, "sd\t{%2, %0|%0, %2}");
15942 }
15943 return buf;
15944 }
15945 strcpy (buf, p);
15946
15947 switch (GET_CODE (operands[3]))
15948 {
15949 case MULT:
15950 case PLUS:
15951 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
15952 {
15953 rtx temp = operands[2];
15954 operands[2] = operands[1];
15955 operands[1] = temp;
15956 }
15957
15958 /* know operands[0] == operands[1]. */
15959
15960 if (MEM_P (operands[2]))
15961 {
15962 p = "%Z2\t%2";
15963 break;
15964 }
15965
15966 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
15967 {
15968 if (STACK_TOP_P (operands[0]))
15969 /* How is it that we are storing to a dead operand[2]?
15970 Well, presumably operands[1] is dead too. We can't
15971 store the result to st(0) as st(0) gets popped on this
15972 instruction. Instead store to operands[2] (which I
15973 think has to be st(1)). st(1) will be popped later.
15974 gcc <= 2.8.1 didn't have this check and generated
15975 assembly code that the Unixware assembler rejected. */
15976 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15977 else
15978 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15979 break;
15980 }
15981
15982 if (STACK_TOP_P (operands[0]))
15983 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15984 else
15985 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15986 break;
15987
15988 case MINUS:
15989 case DIV:
15990 if (MEM_P (operands[1]))
15991 {
15992 p = "r%Z1\t%1";
15993 break;
15994 }
15995
15996 if (MEM_P (operands[2]))
15997 {
15998 p = "%Z2\t%2";
15999 break;
16000 }
16001
16002 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
16003 {
16004 #if SYSV386_COMPAT
16005 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
16006 derived assemblers, confusingly reverse the direction of
16007 the operation for fsub{r} and fdiv{r} when the
16008 destination register is not st(0). The Intel assembler
16009 doesn't have this brain damage. Read !SYSV386_COMPAT to
16010 figure out what the hardware really does. */
16011 if (STACK_TOP_P (operands[0]))
16012 p = "{p\t%0, %2|rp\t%2, %0}";
16013 else
16014 p = "{rp\t%2, %0|p\t%0, %2}";
16015 #else
16016 if (STACK_TOP_P (operands[0]))
16017 /* As above for fmul/fadd, we can't store to st(0). */
16018 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
16019 else
16020 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
16021 #endif
16022 break;
16023 }
16024
16025 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
16026 {
16027 #if SYSV386_COMPAT
16028 if (STACK_TOP_P (operands[0]))
16029 p = "{rp\t%0, %1|p\t%1, %0}";
16030 else
16031 p = "{p\t%1, %0|rp\t%0, %1}";
16032 #else
16033 if (STACK_TOP_P (operands[0]))
16034 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
16035 else
16036 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
16037 #endif
16038 break;
16039 }
16040
16041 if (STACK_TOP_P (operands[0]))
16042 {
16043 if (STACK_TOP_P (operands[1]))
16044 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
16045 else
16046 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
16047 break;
16048 }
16049 else if (STACK_TOP_P (operands[1]))
16050 {
16051 #if SYSV386_COMPAT
16052 p = "{\t%1, %0|r\t%0, %1}";
16053 #else
16054 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
16055 #endif
16056 }
16057 else
16058 {
16059 #if SYSV386_COMPAT
16060 p = "{r\t%2, %0|\t%0, %2}";
16061 #else
16062 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16063 #endif
16064 }
16065 break;
16066
16067 default:
16068 gcc_unreachable ();
16069 }
16070
16071 strcat (buf, p);
16072 return buf;
16073 }
16074
16075 /* Check if a 256bit AVX register is referenced inside of EXP. */
16076
16077 static int
16078 ix86_check_avx256_register (rtx *pexp, void *)
16079 {
16080 rtx exp = *pexp;
16081
16082 if (GET_CODE (exp) == SUBREG)
16083 exp = SUBREG_REG (exp);
16084
16085 if (REG_P (exp)
16086 && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp)))
16087 return 1;
16088
16089 return 0;
16090 }
16091
16092 /* Return needed mode for entity in optimize_mode_switching pass. */
16093
16094 static int
16095 ix86_avx_u128_mode_needed (rtx insn)
16096 {
16097 if (CALL_P (insn))
16098 {
16099 rtx link;
16100
16101 /* Needed mode is set to AVX_U128_CLEAN if there are
16102 no 256bit modes used in function arguments. */
16103 for (link = CALL_INSN_FUNCTION_USAGE (insn);
16104 link;
16105 link = XEXP (link, 1))
16106 {
16107 if (GET_CODE (XEXP (link, 0)) == USE)
16108 {
16109 rtx arg = XEXP (XEXP (link, 0), 0);
16110
16111 if (ix86_check_avx256_register (&arg, NULL))
16112 return AVX_U128_DIRTY;
16113 }
16114 }
16115
16116 return AVX_U128_CLEAN;
16117 }
16118
16119 /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware
16120 changes state only when a 256bit register is written to, but we need
16121 to prevent the compiler from moving optimal insertion point above
16122 eventual read from 256bit register. */
16123 if (for_each_rtx (&PATTERN (insn), ix86_check_avx256_register, NULL))
16124 return AVX_U128_DIRTY;
16125
16126 return AVX_U128_ANY;
16127 }
16128
16129 /* Return mode that i387 must be switched into
16130 prior to the execution of insn. */
16131
16132 static int
16133 ix86_i387_mode_needed (int entity, rtx insn)
16134 {
16135 enum attr_i387_cw mode;
16136
16137 /* The mode UNINITIALIZED is used to store control word after a
16138 function call or ASM pattern. The mode ANY specify that function
16139 has no requirements on the control word and make no changes in the
16140 bits we are interested in. */
16141
16142 if (CALL_P (insn)
16143 || (NONJUMP_INSN_P (insn)
16144 && (asm_noperands (PATTERN (insn)) >= 0
16145 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
16146 return I387_CW_UNINITIALIZED;
16147
16148 if (recog_memoized (insn) < 0)
16149 return I387_CW_ANY;
16150
16151 mode = get_attr_i387_cw (insn);
16152
16153 switch (entity)
16154 {
16155 case I387_TRUNC:
16156 if (mode == I387_CW_TRUNC)
16157 return mode;
16158 break;
16159
16160 case I387_FLOOR:
16161 if (mode == I387_CW_FLOOR)
16162 return mode;
16163 break;
16164
16165 case I387_CEIL:
16166 if (mode == I387_CW_CEIL)
16167 return mode;
16168 break;
16169
16170 case I387_MASK_PM:
16171 if (mode == I387_CW_MASK_PM)
16172 return mode;
16173 break;
16174
16175 default:
16176 gcc_unreachable ();
16177 }
16178
16179 return I387_CW_ANY;
16180 }
16181
16182 /* Return mode that entity must be switched into
16183 prior to the execution of insn. */
16184
16185 static int
16186 ix86_mode_needed (int entity, rtx insn)
16187 {
16188 switch (entity)
16189 {
16190 case AVX_U128:
16191 return ix86_avx_u128_mode_needed (insn);
16192 case I387_TRUNC:
16193 case I387_FLOOR:
16194 case I387_CEIL:
16195 case I387_MASK_PM:
16196 return ix86_i387_mode_needed (entity, insn);
16197 default:
16198 gcc_unreachable ();
16199 }
16200 return 0;
16201 }
16202
16203 /* Check if a 256bit AVX register is referenced in stores. */
16204
16205 static void
16206 ix86_check_avx256_stores (rtx dest, const_rtx, void *data)
16207 {
16208 if (ix86_check_avx256_register (&dest, NULL))
16209 {
16210 bool *used = (bool *) data;
16211 *used = true;
16212 }
16213 }
16214
16215 /* Calculate mode of upper 128bit AVX registers after the insn. */
16216
16217 static int
16218 ix86_avx_u128_mode_after (int mode, rtx insn)
16219 {
16220 rtx pat = PATTERN (insn);
16221
16222 if (vzeroupper_operation (pat, VOIDmode)
16223 || vzeroall_operation (pat, VOIDmode))
16224 return AVX_U128_CLEAN;
16225
16226 /* We know that state is clean after CALL insn if there are no
16227 256bit registers used in the function return register. */
16228 if (CALL_P (insn))
16229 {
16230 bool avx_reg256_found = false;
16231 note_stores (pat, ix86_check_avx256_stores, &avx_reg256_found);
16232
16233 return avx_reg256_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
16234 }
16235
16236 /* Otherwise, return current mode. Remember that if insn
16237 references AVX 256bit registers, the mode was already changed
16238 to DIRTY from MODE_NEEDED. */
16239 return mode;
16240 }
16241
16242 /* Return the mode that an insn results in. */
16243
16244 int
16245 ix86_mode_after (int entity, int mode, rtx insn)
16246 {
16247 switch (entity)
16248 {
16249 case AVX_U128:
16250 return ix86_avx_u128_mode_after (mode, insn);
16251 case I387_TRUNC:
16252 case I387_FLOOR:
16253 case I387_CEIL:
16254 case I387_MASK_PM:
16255 return mode;
16256 default:
16257 gcc_unreachable ();
16258 }
16259 }
16260
16261 static int
16262 ix86_avx_u128_mode_entry (void)
16263 {
16264 tree arg;
16265
16266 /* Entry mode is set to AVX_U128_DIRTY if there are
16267 256bit modes used in function arguments. */
16268 for (arg = DECL_ARGUMENTS (current_function_decl); arg;
16269 arg = TREE_CHAIN (arg))
16270 {
16271 rtx incoming = DECL_INCOMING_RTL (arg);
16272
16273 if (incoming && ix86_check_avx256_register (&incoming, NULL))
16274 return AVX_U128_DIRTY;
16275 }
16276
16277 return AVX_U128_CLEAN;
16278 }
16279
16280 /* Return a mode that ENTITY is assumed to be
16281 switched to at function entry. */
16282
16283 static int
16284 ix86_mode_entry (int entity)
16285 {
16286 switch (entity)
16287 {
16288 case AVX_U128:
16289 return ix86_avx_u128_mode_entry ();
16290 case I387_TRUNC:
16291 case I387_FLOOR:
16292 case I387_CEIL:
16293 case I387_MASK_PM:
16294 return I387_CW_ANY;
16295 default:
16296 gcc_unreachable ();
16297 }
16298 }
16299
16300 static int
16301 ix86_avx_u128_mode_exit (void)
16302 {
16303 rtx reg = crtl->return_rtx;
16304
16305 /* Exit mode is set to AVX_U128_DIRTY if there are
16306 256bit modes used in the function return register. */
16307 if (reg && ix86_check_avx256_register (&reg, NULL))
16308 return AVX_U128_DIRTY;
16309
16310 return AVX_U128_CLEAN;
16311 }
16312
16313 /* Return a mode that ENTITY is assumed to be
16314 switched to at function exit. */
16315
16316 static int
16317 ix86_mode_exit (int entity)
16318 {
16319 switch (entity)
16320 {
16321 case AVX_U128:
16322 return ix86_avx_u128_mode_exit ();
16323 case I387_TRUNC:
16324 case I387_FLOOR:
16325 case I387_CEIL:
16326 case I387_MASK_PM:
16327 return I387_CW_ANY;
16328 default:
16329 gcc_unreachable ();
16330 }
16331 }
16332
16333 static int
16334 ix86_mode_priority (int, int n)
16335 {
16336 return n;
16337 }
16338
16339 /* Output code to initialize control word copies used by trunc?f?i and
16340 rounding patterns. CURRENT_MODE is set to current control word,
16341 while NEW_MODE is set to new control word. */
16342
16343 static void
16344 emit_i387_cw_initialization (int mode)
16345 {
16346 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
16347 rtx new_mode;
16348
16349 enum ix86_stack_slot slot;
16350
16351 rtx reg = gen_reg_rtx (HImode);
16352
16353 emit_insn (gen_x86_fnstcw_1 (stored_mode));
16354 emit_move_insn (reg, copy_rtx (stored_mode));
16355
16356 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
16357 || optimize_insn_for_size_p ())
16358 {
16359 switch (mode)
16360 {
16361 case I387_CW_TRUNC:
16362 /* round toward zero (truncate) */
16363 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
16364 slot = SLOT_CW_TRUNC;
16365 break;
16366
16367 case I387_CW_FLOOR:
16368 /* round down toward -oo */
16369 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16370 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
16371 slot = SLOT_CW_FLOOR;
16372 break;
16373
16374 case I387_CW_CEIL:
16375 /* round up toward +oo */
16376 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16377 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
16378 slot = SLOT_CW_CEIL;
16379 break;
16380
16381 case I387_CW_MASK_PM:
16382 /* mask precision exception for nearbyint() */
16383 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16384 slot = SLOT_CW_MASK_PM;
16385 break;
16386
16387 default:
16388 gcc_unreachable ();
16389 }
16390 }
16391 else
16392 {
16393 switch (mode)
16394 {
16395 case I387_CW_TRUNC:
16396 /* round toward zero (truncate) */
16397 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
16398 slot = SLOT_CW_TRUNC;
16399 break;
16400
16401 case I387_CW_FLOOR:
16402 /* round down toward -oo */
16403 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
16404 slot = SLOT_CW_FLOOR;
16405 break;
16406
16407 case I387_CW_CEIL:
16408 /* round up toward +oo */
16409 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
16410 slot = SLOT_CW_CEIL;
16411 break;
16412
16413 case I387_CW_MASK_PM:
16414 /* mask precision exception for nearbyint() */
16415 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16416 slot = SLOT_CW_MASK_PM;
16417 break;
16418
16419 default:
16420 gcc_unreachable ();
16421 }
16422 }
16423
16424 gcc_assert (slot < MAX_386_STACK_LOCALS);
16425
16426 new_mode = assign_386_stack_local (HImode, slot);
16427 emit_move_insn (new_mode, reg);
16428 }
16429
16430 /* Emit vzeroupper. */
16431
16432 void
16433 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live)
16434 {
16435 int i;
16436
16437 /* Cancel automatic vzeroupper insertion if there are
16438 live call-saved SSE registers at the insertion point. */
16439
16440 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
16441 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16442 return;
16443
16444 if (TARGET_64BIT)
16445 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
16446 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16447 return;
16448
16449 emit_insn (gen_avx_vzeroupper ());
16450 }
16451
16452 /* Generate one or more insns to set ENTITY to MODE. */
16453
16454 /* Generate one or more insns to set ENTITY to MODE. HARD_REG_LIVE
16455 is the set of hard registers live at the point where the insn(s)
16456 are to be inserted. */
16457
16458 static void
16459 ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED,
16460 HARD_REG_SET regs_live)
16461 {
16462 switch (entity)
16463 {
16464 case AVX_U128:
16465 if (mode == AVX_U128_CLEAN)
16466 ix86_avx_emit_vzeroupper (regs_live);
16467 break;
16468 case I387_TRUNC:
16469 case I387_FLOOR:
16470 case I387_CEIL:
16471 case I387_MASK_PM:
16472 if (mode != I387_CW_ANY
16473 && mode != I387_CW_UNINITIALIZED)
16474 emit_i387_cw_initialization (mode);
16475 break;
16476 default:
16477 gcc_unreachable ();
16478 }
16479 }
16480
16481 /* Output code for INSN to convert a float to a signed int. OPERANDS
16482 are the insn operands. The output may be [HSD]Imode and the input
16483 operand may be [SDX]Fmode. */
16484
16485 const char *
16486 output_fix_trunc (rtx insn, rtx *operands, bool fisttp)
16487 {
16488 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
16489 int dimode_p = GET_MODE (operands[0]) == DImode;
16490 int round_mode = get_attr_i387_cw (insn);
16491
16492 /* Jump through a hoop or two for DImode, since the hardware has no
16493 non-popping instruction. We used to do this a different way, but
16494 that was somewhat fragile and broke with post-reload splitters. */
16495 if ((dimode_p || fisttp) && !stack_top_dies)
16496 output_asm_insn ("fld\t%y1", operands);
16497
16498 gcc_assert (STACK_TOP_P (operands[1]));
16499 gcc_assert (MEM_P (operands[0]));
16500 gcc_assert (GET_MODE (operands[1]) != TFmode);
16501
16502 if (fisttp)
16503 output_asm_insn ("fisttp%Z0\t%0", operands);
16504 else
16505 {
16506 if (round_mode != I387_CW_ANY)
16507 output_asm_insn ("fldcw\t%3", operands);
16508 if (stack_top_dies || dimode_p)
16509 output_asm_insn ("fistp%Z0\t%0", operands);
16510 else
16511 output_asm_insn ("fist%Z0\t%0", operands);
16512 if (round_mode != I387_CW_ANY)
16513 output_asm_insn ("fldcw\t%2", operands);
16514 }
16515
16516 return "";
16517 }
16518
16519 /* Output code for x87 ffreep insn. The OPNO argument, which may only
16520 have the values zero or one, indicates the ffreep insn's operand
16521 from the OPERANDS array. */
16522
16523 static const char *
16524 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
16525 {
16526 if (TARGET_USE_FFREEP)
16527 #ifdef HAVE_AS_IX86_FFREEP
16528 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
16529 #else
16530 {
16531 static char retval[32];
16532 int regno = REGNO (operands[opno]);
16533
16534 gcc_assert (STACK_REGNO_P (regno));
16535
16536 regno -= FIRST_STACK_REG;
16537
16538 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
16539 return retval;
16540 }
16541 #endif
16542
16543 return opno ? "fstp\t%y1" : "fstp\t%y0";
16544 }
16545
16546
16547 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
16548 should be used. UNORDERED_P is true when fucom should be used. */
16549
16550 const char *
16551 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
16552 {
16553 int stack_top_dies;
16554 rtx cmp_op0, cmp_op1;
16555 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
16556
16557 if (eflags_p)
16558 {
16559 cmp_op0 = operands[0];
16560 cmp_op1 = operands[1];
16561 }
16562 else
16563 {
16564 cmp_op0 = operands[1];
16565 cmp_op1 = operands[2];
16566 }
16567
16568 if (is_sse)
16569 {
16570 if (GET_MODE (operands[0]) == SFmode)
16571 if (unordered_p)
16572 return "%vucomiss\t{%1, %0|%0, %1}";
16573 else
16574 return "%vcomiss\t{%1, %0|%0, %1}";
16575 else
16576 if (unordered_p)
16577 return "%vucomisd\t{%1, %0|%0, %1}";
16578 else
16579 return "%vcomisd\t{%1, %0|%0, %1}";
16580 }
16581
16582 gcc_assert (STACK_TOP_P (cmp_op0));
16583
16584 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
16585
16586 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
16587 {
16588 if (stack_top_dies)
16589 {
16590 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
16591 return output_387_ffreep (operands, 1);
16592 }
16593 else
16594 return "ftst\n\tfnstsw\t%0";
16595 }
16596
16597 if (STACK_REG_P (cmp_op1)
16598 && stack_top_dies
16599 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
16600 && REGNO (cmp_op1) != FIRST_STACK_REG)
16601 {
16602 /* If both the top of the 387 stack dies, and the other operand
16603 is also a stack register that dies, then this must be a
16604 `fcompp' float compare */
16605
16606 if (eflags_p)
16607 {
16608 /* There is no double popping fcomi variant. Fortunately,
16609 eflags is immune from the fstp's cc clobbering. */
16610 if (unordered_p)
16611 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
16612 else
16613 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
16614 return output_387_ffreep (operands, 0);
16615 }
16616 else
16617 {
16618 if (unordered_p)
16619 return "fucompp\n\tfnstsw\t%0";
16620 else
16621 return "fcompp\n\tfnstsw\t%0";
16622 }
16623 }
16624 else
16625 {
16626 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
16627
16628 static const char * const alt[16] =
16629 {
16630 "fcom%Z2\t%y2\n\tfnstsw\t%0",
16631 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
16632 "fucom%Z2\t%y2\n\tfnstsw\t%0",
16633 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
16634
16635 "ficom%Z2\t%y2\n\tfnstsw\t%0",
16636 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
16637 NULL,
16638 NULL,
16639
16640 "fcomi\t{%y1, %0|%0, %y1}",
16641 "fcomip\t{%y1, %0|%0, %y1}",
16642 "fucomi\t{%y1, %0|%0, %y1}",
16643 "fucomip\t{%y1, %0|%0, %y1}",
16644
16645 NULL,
16646 NULL,
16647 NULL,
16648 NULL
16649 };
16650
16651 int mask;
16652 const char *ret;
16653
16654 mask = eflags_p << 3;
16655 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
16656 mask |= unordered_p << 1;
16657 mask |= stack_top_dies;
16658
16659 gcc_assert (mask < 16);
16660 ret = alt[mask];
16661 gcc_assert (ret);
16662
16663 return ret;
16664 }
16665 }
16666
16667 void
16668 ix86_output_addr_vec_elt (FILE *file, int value)
16669 {
16670 const char *directive = ASM_LONG;
16671
16672 #ifdef ASM_QUAD
16673 if (TARGET_LP64)
16674 directive = ASM_QUAD;
16675 #else
16676 gcc_assert (!TARGET_64BIT);
16677 #endif
16678
16679 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
16680 }
16681
16682 void
16683 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
16684 {
16685 const char *directive = ASM_LONG;
16686
16687 #ifdef ASM_QUAD
16688 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
16689 directive = ASM_QUAD;
16690 #else
16691 gcc_assert (!TARGET_64BIT);
16692 #endif
16693 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
16694 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
16695 fprintf (file, "%s%s%d-%s%d\n",
16696 directive, LPREFIX, value, LPREFIX, rel);
16697 else if (HAVE_AS_GOTOFF_IN_DATA)
16698 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
16699 #if TARGET_MACHO
16700 else if (TARGET_MACHO)
16701 {
16702 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
16703 machopic_output_function_base_name (file);
16704 putc ('\n', file);
16705 }
16706 #endif
16707 else
16708 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
16709 GOT_SYMBOL_NAME, LPREFIX, value);
16710 }
16711 \f
16712 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
16713 for the target. */
16714
16715 void
16716 ix86_expand_clear (rtx dest)
16717 {
16718 rtx tmp;
16719
16720 /* We play register width games, which are only valid after reload. */
16721 gcc_assert (reload_completed);
16722
16723 /* Avoid HImode and its attendant prefix byte. */
16724 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
16725 dest = gen_rtx_REG (SImode, REGNO (dest));
16726 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
16727
16728 if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
16729 {
16730 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16731 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
16732 }
16733
16734 emit_insn (tmp);
16735 }
16736
16737 /* X is an unchanging MEM. If it is a constant pool reference, return
16738 the constant pool rtx, else NULL. */
16739
16740 rtx
16741 maybe_get_pool_constant (rtx x)
16742 {
16743 x = ix86_delegitimize_address (XEXP (x, 0));
16744
16745 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
16746 return get_pool_constant (x);
16747
16748 return NULL_RTX;
16749 }
16750
16751 void
16752 ix86_expand_move (enum machine_mode mode, rtx operands[])
16753 {
16754 rtx op0, op1;
16755 enum tls_model model;
16756
16757 op0 = operands[0];
16758 op1 = operands[1];
16759
16760 if (GET_CODE (op1) == SYMBOL_REF)
16761 {
16762 rtx tmp;
16763
16764 model = SYMBOL_REF_TLS_MODEL (op1);
16765 if (model)
16766 {
16767 op1 = legitimize_tls_address (op1, model, true);
16768 op1 = force_operand (op1, op0);
16769 if (op1 == op0)
16770 return;
16771 op1 = convert_to_mode (mode, op1, 1);
16772 }
16773 else if ((tmp = legitimize_pe_coff_symbol (op1, false)) != NULL_RTX)
16774 op1 = tmp;
16775 }
16776 else if (GET_CODE (op1) == CONST
16777 && GET_CODE (XEXP (op1, 0)) == PLUS
16778 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
16779 {
16780 rtx addend = XEXP (XEXP (op1, 0), 1);
16781 rtx symbol = XEXP (XEXP (op1, 0), 0);
16782 rtx tmp;
16783
16784 model = SYMBOL_REF_TLS_MODEL (symbol);
16785 if (model)
16786 tmp = legitimize_tls_address (symbol, model, true);
16787 else
16788 tmp = legitimize_pe_coff_symbol (symbol, true);
16789
16790 if (tmp)
16791 {
16792 tmp = force_operand (tmp, NULL);
16793 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
16794 op0, 1, OPTAB_DIRECT);
16795 if (tmp == op0)
16796 return;
16797 op1 = convert_to_mode (mode, tmp, 1);
16798 }
16799 }
16800
16801 if ((flag_pic || MACHOPIC_INDIRECT)
16802 && symbolic_operand (op1, mode))
16803 {
16804 if (TARGET_MACHO && !TARGET_64BIT)
16805 {
16806 #if TARGET_MACHO
16807 /* dynamic-no-pic */
16808 if (MACHOPIC_INDIRECT)
16809 {
16810 rtx temp = ((reload_in_progress
16811 || ((op0 && REG_P (op0))
16812 && mode == Pmode))
16813 ? op0 : gen_reg_rtx (Pmode));
16814 op1 = machopic_indirect_data_reference (op1, temp);
16815 if (MACHOPIC_PURE)
16816 op1 = machopic_legitimize_pic_address (op1, mode,
16817 temp == op1 ? 0 : temp);
16818 }
16819 if (op0 != op1 && GET_CODE (op0) != MEM)
16820 {
16821 rtx insn = gen_rtx_SET (VOIDmode, op0, op1);
16822 emit_insn (insn);
16823 return;
16824 }
16825 if (GET_CODE (op0) == MEM)
16826 op1 = force_reg (Pmode, op1);
16827 else
16828 {
16829 rtx temp = op0;
16830 if (GET_CODE (temp) != REG)
16831 temp = gen_reg_rtx (Pmode);
16832 temp = legitimize_pic_address (op1, temp);
16833 if (temp == op0)
16834 return;
16835 op1 = temp;
16836 }
16837 /* dynamic-no-pic */
16838 #endif
16839 }
16840 else
16841 {
16842 if (MEM_P (op0))
16843 op1 = force_reg (mode, op1);
16844 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
16845 {
16846 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
16847 op1 = legitimize_pic_address (op1, reg);
16848 if (op0 == op1)
16849 return;
16850 op1 = convert_to_mode (mode, op1, 1);
16851 }
16852 }
16853 }
16854 else
16855 {
16856 if (MEM_P (op0)
16857 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
16858 || !push_operand (op0, mode))
16859 && MEM_P (op1))
16860 op1 = force_reg (mode, op1);
16861
16862 if (push_operand (op0, mode)
16863 && ! general_no_elim_operand (op1, mode))
16864 op1 = copy_to_mode_reg (mode, op1);
16865
16866 /* Force large constants in 64bit compilation into register
16867 to get them CSEed. */
16868 if (can_create_pseudo_p ()
16869 && (mode == DImode) && TARGET_64BIT
16870 && immediate_operand (op1, mode)
16871 && !x86_64_zext_immediate_operand (op1, VOIDmode)
16872 && !register_operand (op0, mode)
16873 && optimize)
16874 op1 = copy_to_mode_reg (mode, op1);
16875
16876 if (can_create_pseudo_p ()
16877 && FLOAT_MODE_P (mode)
16878 && GET_CODE (op1) == CONST_DOUBLE)
16879 {
16880 /* If we are loading a floating point constant to a register,
16881 force the value to memory now, since we'll get better code
16882 out the back end. */
16883
16884 op1 = validize_mem (force_const_mem (mode, op1));
16885 if (!register_operand (op0, mode))
16886 {
16887 rtx temp = gen_reg_rtx (mode);
16888 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
16889 emit_move_insn (op0, temp);
16890 return;
16891 }
16892 }
16893 }
16894
16895 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
16896 }
16897
16898 void
16899 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
16900 {
16901 rtx op0 = operands[0], op1 = operands[1];
16902 unsigned int align = GET_MODE_ALIGNMENT (mode);
16903
16904 if (push_operand (op0, VOIDmode))
16905 op0 = emit_move_resolve_push (mode, op0);
16906
16907 /* Force constants other than zero into memory. We do not know how
16908 the instructions used to build constants modify the upper 64 bits
16909 of the register, once we have that information we may be able
16910 to handle some of them more efficiently. */
16911 if (can_create_pseudo_p ()
16912 && register_operand (op0, mode)
16913 && (CONSTANT_P (op1)
16914 || (GET_CODE (op1) == SUBREG
16915 && CONSTANT_P (SUBREG_REG (op1))))
16916 && !standard_sse_constant_p (op1))
16917 op1 = validize_mem (force_const_mem (mode, op1));
16918
16919 /* We need to check memory alignment for SSE mode since attribute
16920 can make operands unaligned. */
16921 if (can_create_pseudo_p ()
16922 && SSE_REG_MODE_P (mode)
16923 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
16924 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
16925 {
16926 rtx tmp[2];
16927
16928 /* ix86_expand_vector_move_misalign() does not like constants ... */
16929 if (CONSTANT_P (op1)
16930 || (GET_CODE (op1) == SUBREG
16931 && CONSTANT_P (SUBREG_REG (op1))))
16932 op1 = validize_mem (force_const_mem (mode, op1));
16933
16934 /* ... nor both arguments in memory. */
16935 if (!register_operand (op0, mode)
16936 && !register_operand (op1, mode))
16937 op1 = force_reg (mode, op1);
16938
16939 tmp[0] = op0; tmp[1] = op1;
16940 ix86_expand_vector_move_misalign (mode, tmp);
16941 return;
16942 }
16943
16944 /* Make operand1 a register if it isn't already. */
16945 if (can_create_pseudo_p ()
16946 && !register_operand (op0, mode)
16947 && !register_operand (op1, mode))
16948 {
16949 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
16950 return;
16951 }
16952
16953 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
16954 }
16955
16956 /* Split 32-byte AVX unaligned load and store if needed. */
16957
16958 static void
16959 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
16960 {
16961 rtx m;
16962 rtx (*extract) (rtx, rtx, rtx);
16963 rtx (*load_unaligned) (rtx, rtx);
16964 rtx (*store_unaligned) (rtx, rtx);
16965 enum machine_mode mode;
16966
16967 switch (GET_MODE (op0))
16968 {
16969 default:
16970 gcc_unreachable ();
16971 case V32QImode:
16972 extract = gen_avx_vextractf128v32qi;
16973 load_unaligned = gen_avx_loaddquv32qi;
16974 store_unaligned = gen_avx_storedquv32qi;
16975 mode = V16QImode;
16976 break;
16977 case V8SFmode:
16978 extract = gen_avx_vextractf128v8sf;
16979 load_unaligned = gen_avx_loadups256;
16980 store_unaligned = gen_avx_storeups256;
16981 mode = V4SFmode;
16982 break;
16983 case V4DFmode:
16984 extract = gen_avx_vextractf128v4df;
16985 load_unaligned = gen_avx_loadupd256;
16986 store_unaligned = gen_avx_storeupd256;
16987 mode = V2DFmode;
16988 break;
16989 }
16990
16991 if (MEM_P (op1))
16992 {
16993 if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
16994 {
16995 rtx r = gen_reg_rtx (mode);
16996 m = adjust_address (op1, mode, 0);
16997 emit_move_insn (r, m);
16998 m = adjust_address (op1, mode, 16);
16999 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
17000 emit_move_insn (op0, r);
17001 }
17002 /* Normal *mov<mode>_internal pattern will handle
17003 unaligned loads just fine if misaligned_operand
17004 is true, and without the UNSPEC it can be combined
17005 with arithmetic instructions. */
17006 else if (misaligned_operand (op1, GET_MODE (op1)))
17007 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17008 else
17009 emit_insn (load_unaligned (op0, op1));
17010 }
17011 else if (MEM_P (op0))
17012 {
17013 if (TARGET_AVX256_SPLIT_UNALIGNED_STORE)
17014 {
17015 m = adjust_address (op0, mode, 0);
17016 emit_insn (extract (m, op1, const0_rtx));
17017 m = adjust_address (op0, mode, 16);
17018 emit_insn (extract (m, op1, const1_rtx));
17019 }
17020 else
17021 emit_insn (store_unaligned (op0, op1));
17022 }
17023 else
17024 gcc_unreachable ();
17025 }
17026
17027 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
17028 straight to ix86_expand_vector_move. */
17029 /* Code generation for scalar reg-reg moves of single and double precision data:
17030 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
17031 movaps reg, reg
17032 else
17033 movss reg, reg
17034 if (x86_sse_partial_reg_dependency == true)
17035 movapd reg, reg
17036 else
17037 movsd reg, reg
17038
17039 Code generation for scalar loads of double precision data:
17040 if (x86_sse_split_regs == true)
17041 movlpd mem, reg (gas syntax)
17042 else
17043 movsd mem, reg
17044
17045 Code generation for unaligned packed loads of single precision data
17046 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
17047 if (x86_sse_unaligned_move_optimal)
17048 movups mem, reg
17049
17050 if (x86_sse_partial_reg_dependency == true)
17051 {
17052 xorps reg, reg
17053 movlps mem, reg
17054 movhps mem+8, reg
17055 }
17056 else
17057 {
17058 movlps mem, reg
17059 movhps mem+8, reg
17060 }
17061
17062 Code generation for unaligned packed loads of double precision data
17063 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
17064 if (x86_sse_unaligned_move_optimal)
17065 movupd mem, reg
17066
17067 if (x86_sse_split_regs == true)
17068 {
17069 movlpd mem, reg
17070 movhpd mem+8, reg
17071 }
17072 else
17073 {
17074 movsd mem, reg
17075 movhpd mem+8, reg
17076 }
17077 */
17078
17079 void
17080 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
17081 {
17082 rtx op0, op1, orig_op0 = NULL_RTX, m;
17083 rtx (*load_unaligned) (rtx, rtx);
17084 rtx (*store_unaligned) (rtx, rtx);
17085
17086 op0 = operands[0];
17087 op1 = operands[1];
17088
17089 if (GET_MODE_SIZE (mode) == 64)
17090 {
17091 switch (GET_MODE_CLASS (mode))
17092 {
17093 case MODE_VECTOR_INT:
17094 case MODE_INT:
17095 if (GET_MODE (op0) != V16SImode)
17096 {
17097 if (!MEM_P (op0))
17098 {
17099 orig_op0 = op0;
17100 op0 = gen_reg_rtx (V16SImode);
17101 }
17102 else
17103 op0 = gen_lowpart (V16SImode, op0);
17104 }
17105 op1 = gen_lowpart (V16SImode, op1);
17106 /* FALLTHRU */
17107
17108 case MODE_VECTOR_FLOAT:
17109 switch (GET_MODE (op0))
17110 {
17111 default:
17112 gcc_unreachable ();
17113 case V16SImode:
17114 load_unaligned = gen_avx512f_loaddquv16si;
17115 store_unaligned = gen_avx512f_storedquv16si;
17116 break;
17117 case V16SFmode:
17118 load_unaligned = gen_avx512f_loadups512;
17119 store_unaligned = gen_avx512f_storeups512;
17120 break;
17121 case V8DFmode:
17122 load_unaligned = gen_avx512f_loadupd512;
17123 store_unaligned = gen_avx512f_storeupd512;
17124 break;
17125 }
17126
17127 if (MEM_P (op1))
17128 emit_insn (load_unaligned (op0, op1));
17129 else if (MEM_P (op0))
17130 emit_insn (store_unaligned (op0, op1));
17131 else
17132 gcc_unreachable ();
17133 if (orig_op0)
17134 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17135 break;
17136
17137 default:
17138 gcc_unreachable ();
17139 }
17140
17141 return;
17142 }
17143
17144 if (TARGET_AVX
17145 && GET_MODE_SIZE (mode) == 32)
17146 {
17147 switch (GET_MODE_CLASS (mode))
17148 {
17149 case MODE_VECTOR_INT:
17150 case MODE_INT:
17151 if (GET_MODE (op0) != V32QImode)
17152 {
17153 if (!MEM_P (op0))
17154 {
17155 orig_op0 = op0;
17156 op0 = gen_reg_rtx (V32QImode);
17157 }
17158 else
17159 op0 = gen_lowpart (V32QImode, op0);
17160 }
17161 op1 = gen_lowpart (V32QImode, op1);
17162 /* FALLTHRU */
17163
17164 case MODE_VECTOR_FLOAT:
17165 ix86_avx256_split_vector_move_misalign (op0, op1);
17166 if (orig_op0)
17167 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17168 break;
17169
17170 default:
17171 gcc_unreachable ();
17172 }
17173
17174 return;
17175 }
17176
17177 if (MEM_P (op1))
17178 {
17179 /* Normal *mov<mode>_internal pattern will handle
17180 unaligned loads just fine if misaligned_operand
17181 is true, and without the UNSPEC it can be combined
17182 with arithmetic instructions. */
17183 if (TARGET_AVX
17184 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
17185 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
17186 && misaligned_operand (op1, GET_MODE (op1)))
17187 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17188 /* ??? If we have typed data, then it would appear that using
17189 movdqu is the only way to get unaligned data loaded with
17190 integer type. */
17191 else if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17192 {
17193 if (GET_MODE (op0) != V16QImode)
17194 {
17195 orig_op0 = op0;
17196 op0 = gen_reg_rtx (V16QImode);
17197 }
17198 op1 = gen_lowpart (V16QImode, op1);
17199 /* We will eventually emit movups based on insn attributes. */
17200 emit_insn (gen_sse2_loaddquv16qi (op0, op1));
17201 if (orig_op0)
17202 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17203 }
17204 else if (TARGET_SSE2 && mode == V2DFmode)
17205 {
17206 rtx zero;
17207
17208 if (TARGET_AVX
17209 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17210 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17211 || optimize_insn_for_size_p ())
17212 {
17213 /* We will eventually emit movups based on insn attributes. */
17214 emit_insn (gen_sse2_loadupd (op0, op1));
17215 return;
17216 }
17217
17218 /* When SSE registers are split into halves, we can avoid
17219 writing to the top half twice. */
17220 if (TARGET_SSE_SPLIT_REGS)
17221 {
17222 emit_clobber (op0);
17223 zero = op0;
17224 }
17225 else
17226 {
17227 /* ??? Not sure about the best option for the Intel chips.
17228 The following would seem to satisfy; the register is
17229 entirely cleared, breaking the dependency chain. We
17230 then store to the upper half, with a dependency depth
17231 of one. A rumor has it that Intel recommends two movsd
17232 followed by an unpacklpd, but this is unconfirmed. And
17233 given that the dependency depth of the unpacklpd would
17234 still be one, I'm not sure why this would be better. */
17235 zero = CONST0_RTX (V2DFmode);
17236 }
17237
17238 m = adjust_address (op1, DFmode, 0);
17239 emit_insn (gen_sse2_loadlpd (op0, zero, m));
17240 m = adjust_address (op1, DFmode, 8);
17241 emit_insn (gen_sse2_loadhpd (op0, op0, m));
17242 }
17243 else
17244 {
17245 rtx t;
17246
17247 if (TARGET_AVX
17248 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17249 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17250 || optimize_insn_for_size_p ())
17251 {
17252 if (GET_MODE (op0) != V4SFmode)
17253 {
17254 orig_op0 = op0;
17255 op0 = gen_reg_rtx (V4SFmode);
17256 }
17257 op1 = gen_lowpart (V4SFmode, op1);
17258 emit_insn (gen_sse_loadups (op0, op1));
17259 if (orig_op0)
17260 emit_move_insn (orig_op0,
17261 gen_lowpart (GET_MODE (orig_op0), op0));
17262 return;
17263 }
17264
17265 if (mode != V4SFmode)
17266 t = gen_reg_rtx (V4SFmode);
17267 else
17268 t = op0;
17269
17270 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
17271 emit_move_insn (t, CONST0_RTX (V4SFmode));
17272 else
17273 emit_clobber (t);
17274
17275 m = adjust_address (op1, V2SFmode, 0);
17276 emit_insn (gen_sse_loadlps (t, t, m));
17277 m = adjust_address (op1, V2SFmode, 8);
17278 emit_insn (gen_sse_loadhps (t, t, m));
17279 if (mode != V4SFmode)
17280 emit_move_insn (op0, gen_lowpart (mode, t));
17281 }
17282 }
17283 else if (MEM_P (op0))
17284 {
17285 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17286 {
17287 op0 = gen_lowpart (V16QImode, op0);
17288 op1 = gen_lowpart (V16QImode, op1);
17289 /* We will eventually emit movups based on insn attributes. */
17290 emit_insn (gen_sse2_storedquv16qi (op0, op1));
17291 }
17292 else if (TARGET_SSE2 && mode == V2DFmode)
17293 {
17294 if (TARGET_AVX
17295 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17296 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17297 || optimize_insn_for_size_p ())
17298 /* We will eventually emit movups based on insn attributes. */
17299 emit_insn (gen_sse2_storeupd (op0, op1));
17300 else
17301 {
17302 m = adjust_address (op0, DFmode, 0);
17303 emit_insn (gen_sse2_storelpd (m, op1));
17304 m = adjust_address (op0, DFmode, 8);
17305 emit_insn (gen_sse2_storehpd (m, op1));
17306 }
17307 }
17308 else
17309 {
17310 if (mode != V4SFmode)
17311 op1 = gen_lowpart (V4SFmode, op1);
17312
17313 if (TARGET_AVX
17314 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17315 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17316 || optimize_insn_for_size_p ())
17317 {
17318 op0 = gen_lowpart (V4SFmode, op0);
17319 emit_insn (gen_sse_storeups (op0, op1));
17320 }
17321 else
17322 {
17323 m = adjust_address (op0, V2SFmode, 0);
17324 emit_insn (gen_sse_storelps (m, op1));
17325 m = adjust_address (op0, V2SFmode, 8);
17326 emit_insn (gen_sse_storehps (m, op1));
17327 }
17328 }
17329 }
17330 else
17331 gcc_unreachable ();
17332 }
17333
17334 /* Helper function of ix86_fixup_binary_operands to canonicalize
17335 operand order. Returns true if the operands should be swapped. */
17336
17337 static bool
17338 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
17339 rtx operands[])
17340 {
17341 rtx dst = operands[0];
17342 rtx src1 = operands[1];
17343 rtx src2 = operands[2];
17344
17345 /* If the operation is not commutative, we can't do anything. */
17346 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
17347 return false;
17348
17349 /* Highest priority is that src1 should match dst. */
17350 if (rtx_equal_p (dst, src1))
17351 return false;
17352 if (rtx_equal_p (dst, src2))
17353 return true;
17354
17355 /* Next highest priority is that immediate constants come second. */
17356 if (immediate_operand (src2, mode))
17357 return false;
17358 if (immediate_operand (src1, mode))
17359 return true;
17360
17361 /* Lowest priority is that memory references should come second. */
17362 if (MEM_P (src2))
17363 return false;
17364 if (MEM_P (src1))
17365 return true;
17366
17367 return false;
17368 }
17369
17370
17371 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
17372 destination to use for the operation. If different from the true
17373 destination in operands[0], a copy operation will be required. */
17374
17375 rtx
17376 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
17377 rtx operands[])
17378 {
17379 rtx dst = operands[0];
17380 rtx src1 = operands[1];
17381 rtx src2 = operands[2];
17382
17383 /* Canonicalize operand order. */
17384 if (ix86_swap_binary_operands_p (code, mode, operands))
17385 {
17386 rtx temp;
17387
17388 /* It is invalid to swap operands of different modes. */
17389 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
17390
17391 temp = src1;
17392 src1 = src2;
17393 src2 = temp;
17394 }
17395
17396 /* Both source operands cannot be in memory. */
17397 if (MEM_P (src1) && MEM_P (src2))
17398 {
17399 /* Optimization: Only read from memory once. */
17400 if (rtx_equal_p (src1, src2))
17401 {
17402 src2 = force_reg (mode, src2);
17403 src1 = src2;
17404 }
17405 else if (rtx_equal_p (dst, src1))
17406 src2 = force_reg (mode, src2);
17407 else
17408 src1 = force_reg (mode, src1);
17409 }
17410
17411 /* If the destination is memory, and we do not have matching source
17412 operands, do things in registers. */
17413 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
17414 dst = gen_reg_rtx (mode);
17415
17416 /* Source 1 cannot be a constant. */
17417 if (CONSTANT_P (src1))
17418 src1 = force_reg (mode, src1);
17419
17420 /* Source 1 cannot be a non-matching memory. */
17421 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
17422 src1 = force_reg (mode, src1);
17423
17424 /* Improve address combine. */
17425 if (code == PLUS
17426 && GET_MODE_CLASS (mode) == MODE_INT
17427 && MEM_P (src2))
17428 src2 = force_reg (mode, src2);
17429
17430 operands[1] = src1;
17431 operands[2] = src2;
17432 return dst;
17433 }
17434
17435 /* Similarly, but assume that the destination has already been
17436 set up properly. */
17437
17438 void
17439 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
17440 enum machine_mode mode, rtx operands[])
17441 {
17442 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
17443 gcc_assert (dst == operands[0]);
17444 }
17445
17446 /* Attempt to expand a binary operator. Make the expansion closer to the
17447 actual machine, then just general_operand, which will allow 3 separate
17448 memory references (one output, two input) in a single insn. */
17449
17450 void
17451 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
17452 rtx operands[])
17453 {
17454 rtx src1, src2, dst, op, clob;
17455
17456 dst = ix86_fixup_binary_operands (code, mode, operands);
17457 src1 = operands[1];
17458 src2 = operands[2];
17459
17460 /* Emit the instruction. */
17461
17462 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
17463 if (reload_in_progress)
17464 {
17465 /* Reload doesn't know about the flags register, and doesn't know that
17466 it doesn't want to clobber it. We can only do this with PLUS. */
17467 gcc_assert (code == PLUS);
17468 emit_insn (op);
17469 }
17470 else if (reload_completed
17471 && code == PLUS
17472 && !rtx_equal_p (dst, src1))
17473 {
17474 /* This is going to be an LEA; avoid splitting it later. */
17475 emit_insn (op);
17476 }
17477 else
17478 {
17479 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17480 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
17481 }
17482
17483 /* Fix up the destination if needed. */
17484 if (dst != operands[0])
17485 emit_move_insn (operands[0], dst);
17486 }
17487
17488 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
17489 the given OPERANDS. */
17490
17491 void
17492 ix86_expand_vector_logical_operator (enum rtx_code code, enum machine_mode mode,
17493 rtx operands[])
17494 {
17495 rtx op1 = NULL_RTX, op2 = NULL_RTX;
17496 if (GET_CODE (operands[1]) == SUBREG)
17497 {
17498 op1 = operands[1];
17499 op2 = operands[2];
17500 }
17501 else if (GET_CODE (operands[2]) == SUBREG)
17502 {
17503 op1 = operands[2];
17504 op2 = operands[1];
17505 }
17506 /* Optimize (__m128i) d | (__m128i) e and similar code
17507 when d and e are float vectors into float vector logical
17508 insn. In C/C++ without using intrinsics there is no other way
17509 to express vector logical operation on float vectors than
17510 to cast them temporarily to integer vectors. */
17511 if (op1
17512 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17513 && ((GET_CODE (op2) == SUBREG || GET_CODE (op2) == CONST_VECTOR))
17514 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1))) == MODE_VECTOR_FLOAT
17515 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1))) == GET_MODE_SIZE (mode)
17516 && SUBREG_BYTE (op1) == 0
17517 && (GET_CODE (op2) == CONST_VECTOR
17518 || (GET_MODE (SUBREG_REG (op1)) == GET_MODE (SUBREG_REG (op2))
17519 && SUBREG_BYTE (op2) == 0))
17520 && can_create_pseudo_p ())
17521 {
17522 rtx dst;
17523 switch (GET_MODE (SUBREG_REG (op1)))
17524 {
17525 case V4SFmode:
17526 case V8SFmode:
17527 case V2DFmode:
17528 case V4DFmode:
17529 dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1)));
17530 if (GET_CODE (op2) == CONST_VECTOR)
17531 {
17532 op2 = gen_lowpart (GET_MODE (dst), op2);
17533 op2 = force_reg (GET_MODE (dst), op2);
17534 }
17535 else
17536 {
17537 op1 = operands[1];
17538 op2 = SUBREG_REG (operands[2]);
17539 if (!nonimmediate_operand (op2, GET_MODE (dst)))
17540 op2 = force_reg (GET_MODE (dst), op2);
17541 }
17542 op1 = SUBREG_REG (op1);
17543 if (!nonimmediate_operand (op1, GET_MODE (dst)))
17544 op1 = force_reg (GET_MODE (dst), op1);
17545 emit_insn (gen_rtx_SET (VOIDmode, dst,
17546 gen_rtx_fmt_ee (code, GET_MODE (dst),
17547 op1, op2)));
17548 emit_move_insn (operands[0], gen_lowpart (mode, dst));
17549 return;
17550 default:
17551 break;
17552 }
17553 }
17554 if (!nonimmediate_operand (operands[1], mode))
17555 operands[1] = force_reg (mode, operands[1]);
17556 if (!nonimmediate_operand (operands[2], mode))
17557 operands[2] = force_reg (mode, operands[2]);
17558 ix86_fixup_binary_operands_no_copy (code, mode, operands);
17559 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
17560 gen_rtx_fmt_ee (code, mode, operands[1],
17561 operands[2])));
17562 }
17563
17564 /* Return TRUE or FALSE depending on whether the binary operator meets the
17565 appropriate constraints. */
17566
17567 bool
17568 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
17569 rtx operands[3])
17570 {
17571 rtx dst = operands[0];
17572 rtx src1 = operands[1];
17573 rtx src2 = operands[2];
17574
17575 /* Both source operands cannot be in memory. */
17576 if (MEM_P (src1) && MEM_P (src2))
17577 return false;
17578
17579 /* Canonicalize operand order for commutative operators. */
17580 if (ix86_swap_binary_operands_p (code, mode, operands))
17581 {
17582 rtx temp = src1;
17583 src1 = src2;
17584 src2 = temp;
17585 }
17586
17587 /* If the destination is memory, we must have a matching source operand. */
17588 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
17589 return false;
17590
17591 /* Source 1 cannot be a constant. */
17592 if (CONSTANT_P (src1))
17593 return false;
17594
17595 /* Source 1 cannot be a non-matching memory. */
17596 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
17597 /* Support "andhi/andsi/anddi" as a zero-extending move. */
17598 return (code == AND
17599 && (mode == HImode
17600 || mode == SImode
17601 || (TARGET_64BIT && mode == DImode))
17602 && satisfies_constraint_L (src2));
17603
17604 return true;
17605 }
17606
17607 /* Attempt to expand a unary operator. Make the expansion closer to the
17608 actual machine, then just general_operand, which will allow 2 separate
17609 memory references (one output, one input) in a single insn. */
17610
17611 void
17612 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
17613 rtx operands[])
17614 {
17615 int matching_memory;
17616 rtx src, dst, op, clob;
17617
17618 dst = operands[0];
17619 src = operands[1];
17620
17621 /* If the destination is memory, and we do not have matching source
17622 operands, do things in registers. */
17623 matching_memory = 0;
17624 if (MEM_P (dst))
17625 {
17626 if (rtx_equal_p (dst, src))
17627 matching_memory = 1;
17628 else
17629 dst = gen_reg_rtx (mode);
17630 }
17631
17632 /* When source operand is memory, destination must match. */
17633 if (MEM_P (src) && !matching_memory)
17634 src = force_reg (mode, src);
17635
17636 /* Emit the instruction. */
17637
17638 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
17639 if (reload_in_progress || code == NOT)
17640 {
17641 /* Reload doesn't know about the flags register, and doesn't know that
17642 it doesn't want to clobber it. */
17643 gcc_assert (code == NOT);
17644 emit_insn (op);
17645 }
17646 else
17647 {
17648 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17649 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
17650 }
17651
17652 /* Fix up the destination if needed. */
17653 if (dst != operands[0])
17654 emit_move_insn (operands[0], dst);
17655 }
17656
17657 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
17658 divisor are within the range [0-255]. */
17659
17660 void
17661 ix86_split_idivmod (enum machine_mode mode, rtx operands[],
17662 bool signed_p)
17663 {
17664 rtx end_label, qimode_label;
17665 rtx insn, div, mod;
17666 rtx scratch, tmp0, tmp1, tmp2;
17667 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
17668 rtx (*gen_zero_extend) (rtx, rtx);
17669 rtx (*gen_test_ccno_1) (rtx, rtx);
17670
17671 switch (mode)
17672 {
17673 case SImode:
17674 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
17675 gen_test_ccno_1 = gen_testsi_ccno_1;
17676 gen_zero_extend = gen_zero_extendqisi2;
17677 break;
17678 case DImode:
17679 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
17680 gen_test_ccno_1 = gen_testdi_ccno_1;
17681 gen_zero_extend = gen_zero_extendqidi2;
17682 break;
17683 default:
17684 gcc_unreachable ();
17685 }
17686
17687 end_label = gen_label_rtx ();
17688 qimode_label = gen_label_rtx ();
17689
17690 scratch = gen_reg_rtx (mode);
17691
17692 /* Use 8bit unsigned divimod if dividend and divisor are within
17693 the range [0-255]. */
17694 emit_move_insn (scratch, operands[2]);
17695 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
17696 scratch, 1, OPTAB_DIRECT);
17697 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
17698 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
17699 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
17700 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
17701 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
17702 pc_rtx);
17703 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
17704 predict_jump (REG_BR_PROB_BASE * 50 / 100);
17705 JUMP_LABEL (insn) = qimode_label;
17706
17707 /* Generate original signed/unsigned divimod. */
17708 div = gen_divmod4_1 (operands[0], operands[1],
17709 operands[2], operands[3]);
17710 emit_insn (div);
17711
17712 /* Branch to the end. */
17713 emit_jump_insn (gen_jump (end_label));
17714 emit_barrier ();
17715
17716 /* Generate 8bit unsigned divide. */
17717 emit_label (qimode_label);
17718 /* Don't use operands[0] for result of 8bit divide since not all
17719 registers support QImode ZERO_EXTRACT. */
17720 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
17721 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
17722 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
17723 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
17724
17725 if (signed_p)
17726 {
17727 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
17728 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
17729 }
17730 else
17731 {
17732 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
17733 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
17734 }
17735
17736 /* Extract remainder from AH. */
17737 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
17738 if (REG_P (operands[1]))
17739 insn = emit_move_insn (operands[1], tmp1);
17740 else
17741 {
17742 /* Need a new scratch register since the old one has result
17743 of 8bit divide. */
17744 scratch = gen_reg_rtx (mode);
17745 emit_move_insn (scratch, tmp1);
17746 insn = emit_move_insn (operands[1], scratch);
17747 }
17748 set_unique_reg_note (insn, REG_EQUAL, mod);
17749
17750 /* Zero extend quotient from AL. */
17751 tmp1 = gen_lowpart (QImode, tmp0);
17752 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
17753 set_unique_reg_note (insn, REG_EQUAL, div);
17754
17755 emit_label (end_label);
17756 }
17757
17758 /* Whether it is OK to emit CFI directives when emitting asm code. */
17759
17760 bool
17761 ix86_emit_cfi ()
17762 {
17763 return dwarf2out_do_cfi_asm ();
17764 }
17765
17766 #define LEA_MAX_STALL (3)
17767 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
17768
17769 /* Increase given DISTANCE in half-cycles according to
17770 dependencies between PREV and NEXT instructions.
17771 Add 1 half-cycle if there is no dependency and
17772 go to next cycle if there is some dependecy. */
17773
17774 static unsigned int
17775 increase_distance (rtx prev, rtx next, unsigned int distance)
17776 {
17777 df_ref def, use;
17778
17779 if (!prev || !next)
17780 return distance + (distance & 1) + 2;
17781
17782 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
17783 return distance + 1;
17784
17785 FOR_EACH_INSN_USE (use, next)
17786 FOR_EACH_INSN_DEF (def, prev)
17787 if (!DF_REF_IS_ARTIFICIAL (def)
17788 && DF_REF_REGNO (use) == DF_REF_REGNO (def))
17789 return distance + (distance & 1) + 2;
17790
17791 return distance + 1;
17792 }
17793
17794 /* Function checks if instruction INSN defines register number
17795 REGNO1 or REGNO2. */
17796
17797 static bool
17798 insn_defines_reg (unsigned int regno1, unsigned int regno2,
17799 rtx insn)
17800 {
17801 df_ref def;
17802
17803 FOR_EACH_INSN_DEF (def, insn)
17804 if (DF_REF_REG_DEF_P (def)
17805 && !DF_REF_IS_ARTIFICIAL (def)
17806 && (regno1 == DF_REF_REGNO (def)
17807 || regno2 == DF_REF_REGNO (def)))
17808 return true;
17809
17810 return false;
17811 }
17812
17813 /* Function checks if instruction INSN uses register number
17814 REGNO as a part of address expression. */
17815
17816 static bool
17817 insn_uses_reg_mem (unsigned int regno, rtx insn)
17818 {
17819 df_ref use;
17820
17821 FOR_EACH_INSN_USE (use, insn)
17822 if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use))
17823 return true;
17824
17825 return false;
17826 }
17827
17828 /* Search backward for non-agu definition of register number REGNO1
17829 or register number REGNO2 in basic block starting from instruction
17830 START up to head of basic block or instruction INSN.
17831
17832 Function puts true value into *FOUND var if definition was found
17833 and false otherwise.
17834
17835 Distance in half-cycles between START and found instruction or head
17836 of BB is added to DISTANCE and returned. */
17837
17838 static int
17839 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
17840 rtx insn, int distance,
17841 rtx start, bool *found)
17842 {
17843 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
17844 rtx prev = start;
17845 rtx next = NULL;
17846
17847 *found = false;
17848
17849 while (prev
17850 && prev != insn
17851 && distance < LEA_SEARCH_THRESHOLD)
17852 {
17853 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
17854 {
17855 distance = increase_distance (prev, next, distance);
17856 if (insn_defines_reg (regno1, regno2, prev))
17857 {
17858 if (recog_memoized (prev) < 0
17859 || get_attr_type (prev) != TYPE_LEA)
17860 {
17861 *found = true;
17862 return distance;
17863 }
17864 }
17865
17866 next = prev;
17867 }
17868 if (prev == BB_HEAD (bb))
17869 break;
17870
17871 prev = PREV_INSN (prev);
17872 }
17873
17874 return distance;
17875 }
17876
17877 /* Search backward for non-agu definition of register number REGNO1
17878 or register number REGNO2 in INSN's basic block until
17879 1. Pass LEA_SEARCH_THRESHOLD instructions, or
17880 2. Reach neighbour BBs boundary, or
17881 3. Reach agu definition.
17882 Returns the distance between the non-agu definition point and INSN.
17883 If no definition point, returns -1. */
17884
17885 static int
17886 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
17887 rtx insn)
17888 {
17889 basic_block bb = BLOCK_FOR_INSN (insn);
17890 int distance = 0;
17891 bool found = false;
17892
17893 if (insn != BB_HEAD (bb))
17894 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
17895 distance, PREV_INSN (insn),
17896 &found);
17897
17898 if (!found && distance < LEA_SEARCH_THRESHOLD)
17899 {
17900 edge e;
17901 edge_iterator ei;
17902 bool simple_loop = false;
17903
17904 FOR_EACH_EDGE (e, ei, bb->preds)
17905 if (e->src == bb)
17906 {
17907 simple_loop = true;
17908 break;
17909 }
17910
17911 if (simple_loop)
17912 distance = distance_non_agu_define_in_bb (regno1, regno2,
17913 insn, distance,
17914 BB_END (bb), &found);
17915 else
17916 {
17917 int shortest_dist = -1;
17918 bool found_in_bb = false;
17919
17920 FOR_EACH_EDGE (e, ei, bb->preds)
17921 {
17922 int bb_dist
17923 = distance_non_agu_define_in_bb (regno1, regno2,
17924 insn, distance,
17925 BB_END (e->src),
17926 &found_in_bb);
17927 if (found_in_bb)
17928 {
17929 if (shortest_dist < 0)
17930 shortest_dist = bb_dist;
17931 else if (bb_dist > 0)
17932 shortest_dist = MIN (bb_dist, shortest_dist);
17933
17934 found = true;
17935 }
17936 }
17937
17938 distance = shortest_dist;
17939 }
17940 }
17941
17942 /* get_attr_type may modify recog data. We want to make sure
17943 that recog data is valid for instruction INSN, on which
17944 distance_non_agu_define is called. INSN is unchanged here. */
17945 extract_insn_cached (insn);
17946
17947 if (!found)
17948 return -1;
17949
17950 return distance >> 1;
17951 }
17952
17953 /* Return the distance in half-cycles between INSN and the next
17954 insn that uses register number REGNO in memory address added
17955 to DISTANCE. Return -1 if REGNO0 is set.
17956
17957 Put true value into *FOUND if register usage was found and
17958 false otherwise.
17959 Put true value into *REDEFINED if register redefinition was
17960 found and false otherwise. */
17961
17962 static int
17963 distance_agu_use_in_bb (unsigned int regno,
17964 rtx insn, int distance, rtx start,
17965 bool *found, bool *redefined)
17966 {
17967 basic_block bb = NULL;
17968 rtx next = start;
17969 rtx prev = NULL;
17970
17971 *found = false;
17972 *redefined = false;
17973
17974 if (start != NULL_RTX)
17975 {
17976 bb = BLOCK_FOR_INSN (start);
17977 if (start != BB_HEAD (bb))
17978 /* If insn and start belong to the same bb, set prev to insn,
17979 so the call to increase_distance will increase the distance
17980 between insns by 1. */
17981 prev = insn;
17982 }
17983
17984 while (next
17985 && next != insn
17986 && distance < LEA_SEARCH_THRESHOLD)
17987 {
17988 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
17989 {
17990 distance = increase_distance(prev, next, distance);
17991 if (insn_uses_reg_mem (regno, next))
17992 {
17993 /* Return DISTANCE if OP0 is used in memory
17994 address in NEXT. */
17995 *found = true;
17996 return distance;
17997 }
17998
17999 if (insn_defines_reg (regno, INVALID_REGNUM, next))
18000 {
18001 /* Return -1 if OP0 is set in NEXT. */
18002 *redefined = true;
18003 return -1;
18004 }
18005
18006 prev = next;
18007 }
18008
18009 if (next == BB_END (bb))
18010 break;
18011
18012 next = NEXT_INSN (next);
18013 }
18014
18015 return distance;
18016 }
18017
18018 /* Return the distance between INSN and the next insn that uses
18019 register number REGNO0 in memory address. Return -1 if no such
18020 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
18021
18022 static int
18023 distance_agu_use (unsigned int regno0, rtx insn)
18024 {
18025 basic_block bb = BLOCK_FOR_INSN (insn);
18026 int distance = 0;
18027 bool found = false;
18028 bool redefined = false;
18029
18030 if (insn != BB_END (bb))
18031 distance = distance_agu_use_in_bb (regno0, insn, distance,
18032 NEXT_INSN (insn),
18033 &found, &redefined);
18034
18035 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
18036 {
18037 edge e;
18038 edge_iterator ei;
18039 bool simple_loop = false;
18040
18041 FOR_EACH_EDGE (e, ei, bb->succs)
18042 if (e->dest == bb)
18043 {
18044 simple_loop = true;
18045 break;
18046 }
18047
18048 if (simple_loop)
18049 distance = distance_agu_use_in_bb (regno0, insn,
18050 distance, BB_HEAD (bb),
18051 &found, &redefined);
18052 else
18053 {
18054 int shortest_dist = -1;
18055 bool found_in_bb = false;
18056 bool redefined_in_bb = false;
18057
18058 FOR_EACH_EDGE (e, ei, bb->succs)
18059 {
18060 int bb_dist
18061 = distance_agu_use_in_bb (regno0, insn,
18062 distance, BB_HEAD (e->dest),
18063 &found_in_bb, &redefined_in_bb);
18064 if (found_in_bb)
18065 {
18066 if (shortest_dist < 0)
18067 shortest_dist = bb_dist;
18068 else if (bb_dist > 0)
18069 shortest_dist = MIN (bb_dist, shortest_dist);
18070
18071 found = true;
18072 }
18073 }
18074
18075 distance = shortest_dist;
18076 }
18077 }
18078
18079 if (!found || redefined)
18080 return -1;
18081
18082 return distance >> 1;
18083 }
18084
18085 /* Define this macro to tune LEA priority vs ADD, it take effect when
18086 there is a dilemma of choicing LEA or ADD
18087 Negative value: ADD is more preferred than LEA
18088 Zero: Netrual
18089 Positive value: LEA is more preferred than ADD*/
18090 #define IX86_LEA_PRIORITY 0
18091
18092 /* Return true if usage of lea INSN has performance advantage
18093 over a sequence of instructions. Instructions sequence has
18094 SPLIT_COST cycles higher latency than lea latency. */
18095
18096 static bool
18097 ix86_lea_outperforms (rtx insn, unsigned int regno0, unsigned int regno1,
18098 unsigned int regno2, int split_cost, bool has_scale)
18099 {
18100 int dist_define, dist_use;
18101
18102 /* For Silvermont if using a 2-source or 3-source LEA for
18103 non-destructive destination purposes, or due to wanting
18104 ability to use SCALE, the use of LEA is justified. */
18105 if (TARGET_SILVERMONT || TARGET_INTEL)
18106 {
18107 if (has_scale)
18108 return true;
18109 if (split_cost < 1)
18110 return false;
18111 if (regno0 == regno1 || regno0 == regno2)
18112 return false;
18113 return true;
18114 }
18115
18116 dist_define = distance_non_agu_define (regno1, regno2, insn);
18117 dist_use = distance_agu_use (regno0, insn);
18118
18119 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
18120 {
18121 /* If there is no non AGU operand definition, no AGU
18122 operand usage and split cost is 0 then both lea
18123 and non lea variants have same priority. Currently
18124 we prefer lea for 64 bit code and non lea on 32 bit
18125 code. */
18126 if (dist_use < 0 && split_cost == 0)
18127 return TARGET_64BIT || IX86_LEA_PRIORITY;
18128 else
18129 return true;
18130 }
18131
18132 /* With longer definitions distance lea is more preferable.
18133 Here we change it to take into account splitting cost and
18134 lea priority. */
18135 dist_define += split_cost + IX86_LEA_PRIORITY;
18136
18137 /* If there is no use in memory addess then we just check
18138 that split cost exceeds AGU stall. */
18139 if (dist_use < 0)
18140 return dist_define > LEA_MAX_STALL;
18141
18142 /* If this insn has both backward non-agu dependence and forward
18143 agu dependence, the one with short distance takes effect. */
18144 return dist_define >= dist_use;
18145 }
18146
18147 /* Return true if it is legal to clobber flags by INSN and
18148 false otherwise. */
18149
18150 static bool
18151 ix86_ok_to_clobber_flags (rtx insn)
18152 {
18153 basic_block bb = BLOCK_FOR_INSN (insn);
18154 df_ref use;
18155 bitmap live;
18156
18157 while (insn)
18158 {
18159 if (NONDEBUG_INSN_P (insn))
18160 {
18161 FOR_EACH_INSN_USE (use, insn)
18162 if (DF_REF_REG_USE_P (use) && DF_REF_REGNO (use) == FLAGS_REG)
18163 return false;
18164
18165 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
18166 return true;
18167 }
18168
18169 if (insn == BB_END (bb))
18170 break;
18171
18172 insn = NEXT_INSN (insn);
18173 }
18174
18175 live = df_get_live_out(bb);
18176 return !REGNO_REG_SET_P (live, FLAGS_REG);
18177 }
18178
18179 /* Return true if we need to split op0 = op1 + op2 into a sequence of
18180 move and add to avoid AGU stalls. */
18181
18182 bool
18183 ix86_avoid_lea_for_add (rtx insn, rtx operands[])
18184 {
18185 unsigned int regno0, regno1, regno2;
18186
18187 /* Check if we need to optimize. */
18188 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18189 return false;
18190
18191 /* Check it is correct to split here. */
18192 if (!ix86_ok_to_clobber_flags(insn))
18193 return false;
18194
18195 regno0 = true_regnum (operands[0]);
18196 regno1 = true_regnum (operands[1]);
18197 regno2 = true_regnum (operands[2]);
18198
18199 /* We need to split only adds with non destructive
18200 destination operand. */
18201 if (regno0 == regno1 || regno0 == regno2)
18202 return false;
18203 else
18204 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
18205 }
18206
18207 /* Return true if we should emit lea instruction instead of mov
18208 instruction. */
18209
18210 bool
18211 ix86_use_lea_for_mov (rtx insn, rtx operands[])
18212 {
18213 unsigned int regno0, regno1;
18214
18215 /* Check if we need to optimize. */
18216 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18217 return false;
18218
18219 /* Use lea for reg to reg moves only. */
18220 if (!REG_P (operands[0]) || !REG_P (operands[1]))
18221 return false;
18222
18223 regno0 = true_regnum (operands[0]);
18224 regno1 = true_regnum (operands[1]);
18225
18226 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
18227 }
18228
18229 /* Return true if we need to split lea into a sequence of
18230 instructions to avoid AGU stalls. */
18231
18232 bool
18233 ix86_avoid_lea_for_addr (rtx insn, rtx operands[])
18234 {
18235 unsigned int regno0, regno1, regno2;
18236 int split_cost;
18237 struct ix86_address parts;
18238 int ok;
18239
18240 /* Check we need to optimize. */
18241 if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
18242 return false;
18243
18244 /* The "at least two components" test below might not catch simple
18245 move or zero extension insns if parts.base is non-NULL and parts.disp
18246 is const0_rtx as the only components in the address, e.g. if the
18247 register is %rbp or %r13. As this test is much cheaper and moves or
18248 zero extensions are the common case, do this check first. */
18249 if (REG_P (operands[1])
18250 || (SImode_address_operand (operands[1], VOIDmode)
18251 && REG_P (XEXP (operands[1], 0))))
18252 return false;
18253
18254 /* Check if it is OK to split here. */
18255 if (!ix86_ok_to_clobber_flags (insn))
18256 return false;
18257
18258 ok = ix86_decompose_address (operands[1], &parts);
18259 gcc_assert (ok);
18260
18261 /* There should be at least two components in the address. */
18262 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
18263 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
18264 return false;
18265
18266 /* We should not split into add if non legitimate pic
18267 operand is used as displacement. */
18268 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
18269 return false;
18270
18271 regno0 = true_regnum (operands[0]) ;
18272 regno1 = INVALID_REGNUM;
18273 regno2 = INVALID_REGNUM;
18274
18275 if (parts.base)
18276 regno1 = true_regnum (parts.base);
18277 if (parts.index)
18278 regno2 = true_regnum (parts.index);
18279
18280 split_cost = 0;
18281
18282 /* Compute how many cycles we will add to execution time
18283 if split lea into a sequence of instructions. */
18284 if (parts.base || parts.index)
18285 {
18286 /* Have to use mov instruction if non desctructive
18287 destination form is used. */
18288 if (regno1 != regno0 && regno2 != regno0)
18289 split_cost += 1;
18290
18291 /* Have to add index to base if both exist. */
18292 if (parts.base && parts.index)
18293 split_cost += 1;
18294
18295 /* Have to use shift and adds if scale is 2 or greater. */
18296 if (parts.scale > 1)
18297 {
18298 if (regno0 != regno1)
18299 split_cost += 1;
18300 else if (regno2 == regno0)
18301 split_cost += 4;
18302 else
18303 split_cost += parts.scale;
18304 }
18305
18306 /* Have to use add instruction with immediate if
18307 disp is non zero. */
18308 if (parts.disp && parts.disp != const0_rtx)
18309 split_cost += 1;
18310
18311 /* Subtract the price of lea. */
18312 split_cost -= 1;
18313 }
18314
18315 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
18316 parts.scale > 1);
18317 }
18318
18319 /* Emit x86 binary operand CODE in mode MODE, where the first operand
18320 matches destination. RTX includes clobber of FLAGS_REG. */
18321
18322 static void
18323 ix86_emit_binop (enum rtx_code code, enum machine_mode mode,
18324 rtx dst, rtx src)
18325 {
18326 rtx op, clob;
18327
18328 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, dst, src));
18329 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18330
18331 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18332 }
18333
18334 /* Return true if regno1 def is nearest to the insn. */
18335
18336 static bool
18337 find_nearest_reg_def (rtx insn, int regno1, int regno2)
18338 {
18339 rtx prev = insn;
18340 rtx start = BB_HEAD (BLOCK_FOR_INSN (insn));
18341
18342 if (insn == start)
18343 return false;
18344 while (prev && prev != start)
18345 {
18346 if (!INSN_P (prev) || !NONDEBUG_INSN_P (prev))
18347 {
18348 prev = PREV_INSN (prev);
18349 continue;
18350 }
18351 if (insn_defines_reg (regno1, INVALID_REGNUM, prev))
18352 return true;
18353 else if (insn_defines_reg (regno2, INVALID_REGNUM, prev))
18354 return false;
18355 prev = PREV_INSN (prev);
18356 }
18357
18358 /* None of the regs is defined in the bb. */
18359 return false;
18360 }
18361
18362 /* Split lea instructions into a sequence of instructions
18363 which are executed on ALU to avoid AGU stalls.
18364 It is assumed that it is allowed to clobber flags register
18365 at lea position. */
18366
18367 void
18368 ix86_split_lea_for_addr (rtx insn, rtx operands[], enum machine_mode mode)
18369 {
18370 unsigned int regno0, regno1, regno2;
18371 struct ix86_address parts;
18372 rtx target, tmp;
18373 int ok, adds;
18374
18375 ok = ix86_decompose_address (operands[1], &parts);
18376 gcc_assert (ok);
18377
18378 target = gen_lowpart (mode, operands[0]);
18379
18380 regno0 = true_regnum (target);
18381 regno1 = INVALID_REGNUM;
18382 regno2 = INVALID_REGNUM;
18383
18384 if (parts.base)
18385 {
18386 parts.base = gen_lowpart (mode, parts.base);
18387 regno1 = true_regnum (parts.base);
18388 }
18389
18390 if (parts.index)
18391 {
18392 parts.index = gen_lowpart (mode, parts.index);
18393 regno2 = true_regnum (parts.index);
18394 }
18395
18396 if (parts.disp)
18397 parts.disp = gen_lowpart (mode, parts.disp);
18398
18399 if (parts.scale > 1)
18400 {
18401 /* Case r1 = r1 + ... */
18402 if (regno1 == regno0)
18403 {
18404 /* If we have a case r1 = r1 + C * r2 then we
18405 should use multiplication which is very
18406 expensive. Assume cost model is wrong if we
18407 have such case here. */
18408 gcc_assert (regno2 != regno0);
18409
18410 for (adds = parts.scale; adds > 0; adds--)
18411 ix86_emit_binop (PLUS, mode, target, parts.index);
18412 }
18413 else
18414 {
18415 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
18416 if (regno0 != regno2)
18417 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18418
18419 /* Use shift for scaling. */
18420 ix86_emit_binop (ASHIFT, mode, target,
18421 GEN_INT (exact_log2 (parts.scale)));
18422
18423 if (parts.base)
18424 ix86_emit_binop (PLUS, mode, target, parts.base);
18425
18426 if (parts.disp && parts.disp != const0_rtx)
18427 ix86_emit_binop (PLUS, mode, target, parts.disp);
18428 }
18429 }
18430 else if (!parts.base && !parts.index)
18431 {
18432 gcc_assert(parts.disp);
18433 emit_insn (gen_rtx_SET (VOIDmode, target, parts.disp));
18434 }
18435 else
18436 {
18437 if (!parts.base)
18438 {
18439 if (regno0 != regno2)
18440 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18441 }
18442 else if (!parts.index)
18443 {
18444 if (regno0 != regno1)
18445 emit_insn (gen_rtx_SET (VOIDmode, target, parts.base));
18446 }
18447 else
18448 {
18449 if (regno0 == regno1)
18450 tmp = parts.index;
18451 else if (regno0 == regno2)
18452 tmp = parts.base;
18453 else
18454 {
18455 rtx tmp1;
18456
18457 /* Find better operand for SET instruction, depending
18458 on which definition is farther from the insn. */
18459 if (find_nearest_reg_def (insn, regno1, regno2))
18460 tmp = parts.index, tmp1 = parts.base;
18461 else
18462 tmp = parts.base, tmp1 = parts.index;
18463
18464 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18465
18466 if (parts.disp && parts.disp != const0_rtx)
18467 ix86_emit_binop (PLUS, mode, target, parts.disp);
18468
18469 ix86_emit_binop (PLUS, mode, target, tmp1);
18470 return;
18471 }
18472
18473 ix86_emit_binop (PLUS, mode, target, tmp);
18474 }
18475
18476 if (parts.disp && parts.disp != const0_rtx)
18477 ix86_emit_binop (PLUS, mode, target, parts.disp);
18478 }
18479 }
18480
18481 /* Return true if it is ok to optimize an ADD operation to LEA
18482 operation to avoid flag register consumation. For most processors,
18483 ADD is faster than LEA. For the processors like BONNELL, if the
18484 destination register of LEA holds an actual address which will be
18485 used soon, LEA is better and otherwise ADD is better. */
18486
18487 bool
18488 ix86_lea_for_add_ok (rtx insn, rtx operands[])
18489 {
18490 unsigned int regno0 = true_regnum (operands[0]);
18491 unsigned int regno1 = true_regnum (operands[1]);
18492 unsigned int regno2 = true_regnum (operands[2]);
18493
18494 /* If a = b + c, (a!=b && a!=c), must use lea form. */
18495 if (regno0 != regno1 && regno0 != regno2)
18496 return true;
18497
18498 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18499 return false;
18500
18501 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
18502 }
18503
18504 /* Return true if destination reg of SET_BODY is shift count of
18505 USE_BODY. */
18506
18507 static bool
18508 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
18509 {
18510 rtx set_dest;
18511 rtx shift_rtx;
18512 int i;
18513
18514 /* Retrieve destination of SET_BODY. */
18515 switch (GET_CODE (set_body))
18516 {
18517 case SET:
18518 set_dest = SET_DEST (set_body);
18519 if (!set_dest || !REG_P (set_dest))
18520 return false;
18521 break;
18522 case PARALLEL:
18523 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
18524 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
18525 use_body))
18526 return true;
18527 default:
18528 return false;
18529 break;
18530 }
18531
18532 /* Retrieve shift count of USE_BODY. */
18533 switch (GET_CODE (use_body))
18534 {
18535 case SET:
18536 shift_rtx = XEXP (use_body, 1);
18537 break;
18538 case PARALLEL:
18539 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
18540 if (ix86_dep_by_shift_count_body (set_body,
18541 XVECEXP (use_body, 0, i)))
18542 return true;
18543 default:
18544 return false;
18545 break;
18546 }
18547
18548 if (shift_rtx
18549 && (GET_CODE (shift_rtx) == ASHIFT
18550 || GET_CODE (shift_rtx) == LSHIFTRT
18551 || GET_CODE (shift_rtx) == ASHIFTRT
18552 || GET_CODE (shift_rtx) == ROTATE
18553 || GET_CODE (shift_rtx) == ROTATERT))
18554 {
18555 rtx shift_count = XEXP (shift_rtx, 1);
18556
18557 /* Return true if shift count is dest of SET_BODY. */
18558 if (REG_P (shift_count))
18559 {
18560 /* Add check since it can be invoked before register
18561 allocation in pre-reload schedule. */
18562 if (reload_completed
18563 && true_regnum (set_dest) == true_regnum (shift_count))
18564 return true;
18565 else if (REGNO(set_dest) == REGNO(shift_count))
18566 return true;
18567 }
18568 }
18569
18570 return false;
18571 }
18572
18573 /* Return true if destination reg of SET_INSN is shift count of
18574 USE_INSN. */
18575
18576 bool
18577 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
18578 {
18579 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
18580 PATTERN (use_insn));
18581 }
18582
18583 /* Return TRUE or FALSE depending on whether the unary operator meets the
18584 appropriate constraints. */
18585
18586 bool
18587 ix86_unary_operator_ok (enum rtx_code,
18588 enum machine_mode,
18589 rtx operands[2])
18590 {
18591 /* If one of operands is memory, source and destination must match. */
18592 if ((MEM_P (operands[0])
18593 || MEM_P (operands[1]))
18594 && ! rtx_equal_p (operands[0], operands[1]))
18595 return false;
18596 return true;
18597 }
18598
18599 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
18600 are ok, keeping in mind the possible movddup alternative. */
18601
18602 bool
18603 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
18604 {
18605 if (MEM_P (operands[0]))
18606 return rtx_equal_p (operands[0], operands[1 + high]);
18607 if (MEM_P (operands[1]) && MEM_P (operands[2]))
18608 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
18609 return true;
18610 }
18611
18612 /* Post-reload splitter for converting an SF or DFmode value in an
18613 SSE register into an unsigned SImode. */
18614
18615 void
18616 ix86_split_convert_uns_si_sse (rtx operands[])
18617 {
18618 enum machine_mode vecmode;
18619 rtx value, large, zero_or_two31, input, two31, x;
18620
18621 large = operands[1];
18622 zero_or_two31 = operands[2];
18623 input = operands[3];
18624 two31 = operands[4];
18625 vecmode = GET_MODE (large);
18626 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
18627
18628 /* Load up the value into the low element. We must ensure that the other
18629 elements are valid floats -- zero is the easiest such value. */
18630 if (MEM_P (input))
18631 {
18632 if (vecmode == V4SFmode)
18633 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
18634 else
18635 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
18636 }
18637 else
18638 {
18639 input = gen_rtx_REG (vecmode, REGNO (input));
18640 emit_move_insn (value, CONST0_RTX (vecmode));
18641 if (vecmode == V4SFmode)
18642 emit_insn (gen_sse_movss (value, value, input));
18643 else
18644 emit_insn (gen_sse2_movsd (value, value, input));
18645 }
18646
18647 emit_move_insn (large, two31);
18648 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
18649
18650 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
18651 emit_insn (gen_rtx_SET (VOIDmode, large, x));
18652
18653 x = gen_rtx_AND (vecmode, zero_or_two31, large);
18654 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
18655
18656 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
18657 emit_insn (gen_rtx_SET (VOIDmode, value, x));
18658
18659 large = gen_rtx_REG (V4SImode, REGNO (large));
18660 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
18661
18662 x = gen_rtx_REG (V4SImode, REGNO (value));
18663 if (vecmode == V4SFmode)
18664 emit_insn (gen_fix_truncv4sfv4si2 (x, value));
18665 else
18666 emit_insn (gen_sse2_cvttpd2dq (x, value));
18667 value = x;
18668
18669 emit_insn (gen_xorv4si3 (value, value, large));
18670 }
18671
18672 /* Convert an unsigned DImode value into a DFmode, using only SSE.
18673 Expects the 64-bit DImode to be supplied in a pair of integral
18674 registers. Requires SSE2; will use SSE3 if available. For x86_32,
18675 -mfpmath=sse, !optimize_size only. */
18676
18677 void
18678 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
18679 {
18680 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
18681 rtx int_xmm, fp_xmm;
18682 rtx biases, exponents;
18683 rtx x;
18684
18685 int_xmm = gen_reg_rtx (V4SImode);
18686 if (TARGET_INTER_UNIT_MOVES_TO_VEC)
18687 emit_insn (gen_movdi_to_sse (int_xmm, input));
18688 else if (TARGET_SSE_SPLIT_REGS)
18689 {
18690 emit_clobber (int_xmm);
18691 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
18692 }
18693 else
18694 {
18695 x = gen_reg_rtx (V2DImode);
18696 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
18697 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
18698 }
18699
18700 x = gen_rtx_CONST_VECTOR (V4SImode,
18701 gen_rtvec (4, GEN_INT (0x43300000UL),
18702 GEN_INT (0x45300000UL),
18703 const0_rtx, const0_rtx));
18704 exponents = validize_mem (force_const_mem (V4SImode, x));
18705
18706 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
18707 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
18708
18709 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
18710 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
18711 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
18712 (0x1.0p84 + double(fp_value_hi_xmm)).
18713 Note these exponents differ by 32. */
18714
18715 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
18716
18717 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
18718 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
18719 real_ldexp (&bias_lo_rvt, &dconst1, 52);
18720 real_ldexp (&bias_hi_rvt, &dconst1, 84);
18721 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
18722 x = const_double_from_real_value (bias_hi_rvt, DFmode);
18723 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
18724 biases = validize_mem (force_const_mem (V2DFmode, biases));
18725 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
18726
18727 /* Add the upper and lower DFmode values together. */
18728 if (TARGET_SSE3)
18729 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
18730 else
18731 {
18732 x = copy_to_mode_reg (V2DFmode, fp_xmm);
18733 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
18734 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
18735 }
18736
18737 ix86_expand_vector_extract (false, target, fp_xmm, 0);
18738 }
18739
18740 /* Not used, but eases macroization of patterns. */
18741 void
18742 ix86_expand_convert_uns_sixf_sse (rtx, rtx)
18743 {
18744 gcc_unreachable ();
18745 }
18746
18747 /* Convert an unsigned SImode value into a DFmode. Only currently used
18748 for SSE, but applicable anywhere. */
18749
18750 void
18751 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
18752 {
18753 REAL_VALUE_TYPE TWO31r;
18754 rtx x, fp;
18755
18756 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
18757 NULL, 1, OPTAB_DIRECT);
18758
18759 fp = gen_reg_rtx (DFmode);
18760 emit_insn (gen_floatsidf2 (fp, x));
18761
18762 real_ldexp (&TWO31r, &dconst1, 31);
18763 x = const_double_from_real_value (TWO31r, DFmode);
18764
18765 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
18766 if (x != target)
18767 emit_move_insn (target, x);
18768 }
18769
18770 /* Convert a signed DImode value into a DFmode. Only used for SSE in
18771 32-bit mode; otherwise we have a direct convert instruction. */
18772
18773 void
18774 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
18775 {
18776 REAL_VALUE_TYPE TWO32r;
18777 rtx fp_lo, fp_hi, x;
18778
18779 fp_lo = gen_reg_rtx (DFmode);
18780 fp_hi = gen_reg_rtx (DFmode);
18781
18782 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
18783
18784 real_ldexp (&TWO32r, &dconst1, 32);
18785 x = const_double_from_real_value (TWO32r, DFmode);
18786 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
18787
18788 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
18789
18790 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
18791 0, OPTAB_DIRECT);
18792 if (x != target)
18793 emit_move_insn (target, x);
18794 }
18795
18796 /* Convert an unsigned SImode value into a SFmode, using only SSE.
18797 For x86_32, -mfpmath=sse, !optimize_size only. */
18798 void
18799 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
18800 {
18801 REAL_VALUE_TYPE ONE16r;
18802 rtx fp_hi, fp_lo, int_hi, int_lo, x;
18803
18804 real_ldexp (&ONE16r, &dconst1, 16);
18805 x = const_double_from_real_value (ONE16r, SFmode);
18806 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
18807 NULL, 0, OPTAB_DIRECT);
18808 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
18809 NULL, 0, OPTAB_DIRECT);
18810 fp_hi = gen_reg_rtx (SFmode);
18811 fp_lo = gen_reg_rtx (SFmode);
18812 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
18813 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
18814 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
18815 0, OPTAB_DIRECT);
18816 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
18817 0, OPTAB_DIRECT);
18818 if (!rtx_equal_p (target, fp_hi))
18819 emit_move_insn (target, fp_hi);
18820 }
18821
18822 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
18823 a vector of unsigned ints VAL to vector of floats TARGET. */
18824
18825 void
18826 ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
18827 {
18828 rtx tmp[8];
18829 REAL_VALUE_TYPE TWO16r;
18830 enum machine_mode intmode = GET_MODE (val);
18831 enum machine_mode fltmode = GET_MODE (target);
18832 rtx (*cvt) (rtx, rtx);
18833
18834 if (intmode == V4SImode)
18835 cvt = gen_floatv4siv4sf2;
18836 else
18837 cvt = gen_floatv8siv8sf2;
18838 tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
18839 tmp[0] = force_reg (intmode, tmp[0]);
18840 tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
18841 OPTAB_DIRECT);
18842 tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
18843 NULL_RTX, 1, OPTAB_DIRECT);
18844 tmp[3] = gen_reg_rtx (fltmode);
18845 emit_insn (cvt (tmp[3], tmp[1]));
18846 tmp[4] = gen_reg_rtx (fltmode);
18847 emit_insn (cvt (tmp[4], tmp[2]));
18848 real_ldexp (&TWO16r, &dconst1, 16);
18849 tmp[5] = const_double_from_real_value (TWO16r, SFmode);
18850 tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
18851 tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
18852 OPTAB_DIRECT);
18853 tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
18854 OPTAB_DIRECT);
18855 if (tmp[7] != target)
18856 emit_move_insn (target, tmp[7]);
18857 }
18858
18859 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
18860 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
18861 This is done by doing just signed conversion if < 0x1p31, and otherwise by
18862 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
18863
18864 rtx
18865 ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
18866 {
18867 REAL_VALUE_TYPE TWO31r;
18868 rtx two31r, tmp[4];
18869 enum machine_mode mode = GET_MODE (val);
18870 enum machine_mode scalarmode = GET_MODE_INNER (mode);
18871 enum machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
18872 rtx (*cmp) (rtx, rtx, rtx, rtx);
18873 int i;
18874
18875 for (i = 0; i < 3; i++)
18876 tmp[i] = gen_reg_rtx (mode);
18877 real_ldexp (&TWO31r, &dconst1, 31);
18878 two31r = const_double_from_real_value (TWO31r, scalarmode);
18879 two31r = ix86_build_const_vector (mode, 1, two31r);
18880 two31r = force_reg (mode, two31r);
18881 switch (mode)
18882 {
18883 case V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
18884 case V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
18885 case V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
18886 case V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
18887 default: gcc_unreachable ();
18888 }
18889 tmp[3] = gen_rtx_LE (mode, two31r, val);
18890 emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
18891 tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
18892 0, OPTAB_DIRECT);
18893 if (intmode == V4SImode || TARGET_AVX2)
18894 *xorp = expand_simple_binop (intmode, ASHIFT,
18895 gen_lowpart (intmode, tmp[0]),
18896 GEN_INT (31), NULL_RTX, 0,
18897 OPTAB_DIRECT);
18898 else
18899 {
18900 rtx two31 = GEN_INT ((unsigned HOST_WIDE_INT) 1 << 31);
18901 two31 = ix86_build_const_vector (intmode, 1, two31);
18902 *xorp = expand_simple_binop (intmode, AND,
18903 gen_lowpart (intmode, tmp[0]),
18904 two31, NULL_RTX, 0,
18905 OPTAB_DIRECT);
18906 }
18907 return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
18908 0, OPTAB_DIRECT);
18909 }
18910
18911 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
18912 then replicate the value for all elements of the vector
18913 register. */
18914
18915 rtx
18916 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
18917 {
18918 int i, n_elt;
18919 rtvec v;
18920 enum machine_mode scalar_mode;
18921
18922 switch (mode)
18923 {
18924 case V64QImode:
18925 case V32QImode:
18926 case V16QImode:
18927 case V32HImode:
18928 case V16HImode:
18929 case V8HImode:
18930 case V16SImode:
18931 case V8SImode:
18932 case V4SImode:
18933 case V8DImode:
18934 case V4DImode:
18935 case V2DImode:
18936 gcc_assert (vect);
18937 case V16SFmode:
18938 case V8SFmode:
18939 case V4SFmode:
18940 case V8DFmode:
18941 case V4DFmode:
18942 case V2DFmode:
18943 n_elt = GET_MODE_NUNITS (mode);
18944 v = rtvec_alloc (n_elt);
18945 scalar_mode = GET_MODE_INNER (mode);
18946
18947 RTVEC_ELT (v, 0) = value;
18948
18949 for (i = 1; i < n_elt; ++i)
18950 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
18951
18952 return gen_rtx_CONST_VECTOR (mode, v);
18953
18954 default:
18955 gcc_unreachable ();
18956 }
18957 }
18958
18959 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
18960 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
18961 for an SSE register. If VECT is true, then replicate the mask for
18962 all elements of the vector register. If INVERT is true, then create
18963 a mask excluding the sign bit. */
18964
18965 rtx
18966 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
18967 {
18968 enum machine_mode vec_mode, imode;
18969 HOST_WIDE_INT hi, lo;
18970 int shift = 63;
18971 rtx v;
18972 rtx mask;
18973
18974 /* Find the sign bit, sign extended to 2*HWI. */
18975 switch (mode)
18976 {
18977 case V16SImode:
18978 case V16SFmode:
18979 case V8SImode:
18980 case V4SImode:
18981 case V8SFmode:
18982 case V4SFmode:
18983 vec_mode = mode;
18984 mode = GET_MODE_INNER (mode);
18985 imode = SImode;
18986 lo = 0x80000000, hi = lo < 0;
18987 break;
18988
18989 case V8DImode:
18990 case V4DImode:
18991 case V2DImode:
18992 case V8DFmode:
18993 case V4DFmode:
18994 case V2DFmode:
18995 vec_mode = mode;
18996 mode = GET_MODE_INNER (mode);
18997 imode = DImode;
18998 if (HOST_BITS_PER_WIDE_INT >= 64)
18999 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
19000 else
19001 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
19002 break;
19003
19004 case TImode:
19005 case TFmode:
19006 vec_mode = VOIDmode;
19007 if (HOST_BITS_PER_WIDE_INT >= 64)
19008 {
19009 imode = TImode;
19010 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
19011 }
19012 else
19013 {
19014 rtvec vec;
19015
19016 imode = DImode;
19017 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
19018
19019 if (invert)
19020 {
19021 lo = ~lo, hi = ~hi;
19022 v = constm1_rtx;
19023 }
19024 else
19025 v = const0_rtx;
19026
19027 mask = immed_double_const (lo, hi, imode);
19028
19029 vec = gen_rtvec (2, v, mask);
19030 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
19031 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
19032
19033 return v;
19034 }
19035 break;
19036
19037 default:
19038 gcc_unreachable ();
19039 }
19040
19041 if (invert)
19042 lo = ~lo, hi = ~hi;
19043
19044 /* Force this value into the low part of a fp vector constant. */
19045 mask = immed_double_const (lo, hi, imode);
19046 mask = gen_lowpart (mode, mask);
19047
19048 if (vec_mode == VOIDmode)
19049 return force_reg (mode, mask);
19050
19051 v = ix86_build_const_vector (vec_mode, vect, mask);
19052 return force_reg (vec_mode, v);
19053 }
19054
19055 /* Generate code for floating point ABS or NEG. */
19056
19057 void
19058 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
19059 rtx operands[])
19060 {
19061 rtx mask, set, dst, src;
19062 bool use_sse = false;
19063 bool vector_mode = VECTOR_MODE_P (mode);
19064 enum machine_mode vmode = mode;
19065
19066 if (vector_mode)
19067 use_sse = true;
19068 else if (mode == TFmode)
19069 use_sse = true;
19070 else if (TARGET_SSE_MATH)
19071 {
19072 use_sse = SSE_FLOAT_MODE_P (mode);
19073 if (mode == SFmode)
19074 vmode = V4SFmode;
19075 else if (mode == DFmode)
19076 vmode = V2DFmode;
19077 }
19078
19079 /* NEG and ABS performed with SSE use bitwise mask operations.
19080 Create the appropriate mask now. */
19081 if (use_sse)
19082 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
19083 else
19084 mask = NULL_RTX;
19085
19086 dst = operands[0];
19087 src = operands[1];
19088
19089 set = gen_rtx_fmt_e (code, mode, src);
19090 set = gen_rtx_SET (VOIDmode, dst, set);
19091
19092 if (mask)
19093 {
19094 rtx use, clob;
19095 rtvec par;
19096
19097 use = gen_rtx_USE (VOIDmode, mask);
19098 if (vector_mode)
19099 par = gen_rtvec (2, set, use);
19100 else
19101 {
19102 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
19103 par = gen_rtvec (3, set, use, clob);
19104 }
19105 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
19106 }
19107 else
19108 emit_insn (set);
19109 }
19110
19111 /* Expand a copysign operation. Special case operand 0 being a constant. */
19112
19113 void
19114 ix86_expand_copysign (rtx operands[])
19115 {
19116 enum machine_mode mode, vmode;
19117 rtx dest, op0, op1, mask, nmask;
19118
19119 dest = operands[0];
19120 op0 = operands[1];
19121 op1 = operands[2];
19122
19123 mode = GET_MODE (dest);
19124
19125 if (mode == SFmode)
19126 vmode = V4SFmode;
19127 else if (mode == DFmode)
19128 vmode = V2DFmode;
19129 else
19130 vmode = mode;
19131
19132 if (GET_CODE (op0) == CONST_DOUBLE)
19133 {
19134 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
19135
19136 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
19137 op0 = simplify_unary_operation (ABS, mode, op0, mode);
19138
19139 if (mode == SFmode || mode == DFmode)
19140 {
19141 if (op0 == CONST0_RTX (mode))
19142 op0 = CONST0_RTX (vmode);
19143 else
19144 {
19145 rtx v = ix86_build_const_vector (vmode, false, op0);
19146
19147 op0 = force_reg (vmode, v);
19148 }
19149 }
19150 else if (op0 != CONST0_RTX (mode))
19151 op0 = force_reg (mode, op0);
19152
19153 mask = ix86_build_signbit_mask (vmode, 0, 0);
19154
19155 if (mode == SFmode)
19156 copysign_insn = gen_copysignsf3_const;
19157 else if (mode == DFmode)
19158 copysign_insn = gen_copysigndf3_const;
19159 else
19160 copysign_insn = gen_copysigntf3_const;
19161
19162 emit_insn (copysign_insn (dest, op0, op1, mask));
19163 }
19164 else
19165 {
19166 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
19167
19168 nmask = ix86_build_signbit_mask (vmode, 0, 1);
19169 mask = ix86_build_signbit_mask (vmode, 0, 0);
19170
19171 if (mode == SFmode)
19172 copysign_insn = gen_copysignsf3_var;
19173 else if (mode == DFmode)
19174 copysign_insn = gen_copysigndf3_var;
19175 else
19176 copysign_insn = gen_copysigntf3_var;
19177
19178 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
19179 }
19180 }
19181
19182 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
19183 be a constant, and so has already been expanded into a vector constant. */
19184
19185 void
19186 ix86_split_copysign_const (rtx operands[])
19187 {
19188 enum machine_mode mode, vmode;
19189 rtx dest, op0, mask, x;
19190
19191 dest = operands[0];
19192 op0 = operands[1];
19193 mask = operands[3];
19194
19195 mode = GET_MODE (dest);
19196 vmode = GET_MODE (mask);
19197
19198 dest = simplify_gen_subreg (vmode, dest, mode, 0);
19199 x = gen_rtx_AND (vmode, dest, mask);
19200 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19201
19202 if (op0 != CONST0_RTX (vmode))
19203 {
19204 x = gen_rtx_IOR (vmode, dest, op0);
19205 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19206 }
19207 }
19208
19209 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
19210 so we have to do two masks. */
19211
19212 void
19213 ix86_split_copysign_var (rtx operands[])
19214 {
19215 enum machine_mode mode, vmode;
19216 rtx dest, scratch, op0, op1, mask, nmask, x;
19217
19218 dest = operands[0];
19219 scratch = operands[1];
19220 op0 = operands[2];
19221 op1 = operands[3];
19222 nmask = operands[4];
19223 mask = operands[5];
19224
19225 mode = GET_MODE (dest);
19226 vmode = GET_MODE (mask);
19227
19228 if (rtx_equal_p (op0, op1))
19229 {
19230 /* Shouldn't happen often (it's useless, obviously), but when it does
19231 we'd generate incorrect code if we continue below. */
19232 emit_move_insn (dest, op0);
19233 return;
19234 }
19235
19236 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
19237 {
19238 gcc_assert (REGNO (op1) == REGNO (scratch));
19239
19240 x = gen_rtx_AND (vmode, scratch, mask);
19241 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19242
19243 dest = mask;
19244 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19245 x = gen_rtx_NOT (vmode, dest);
19246 x = gen_rtx_AND (vmode, x, op0);
19247 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19248 }
19249 else
19250 {
19251 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
19252 {
19253 x = gen_rtx_AND (vmode, scratch, mask);
19254 }
19255 else /* alternative 2,4 */
19256 {
19257 gcc_assert (REGNO (mask) == REGNO (scratch));
19258 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
19259 x = gen_rtx_AND (vmode, scratch, op1);
19260 }
19261 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19262
19263 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
19264 {
19265 dest = simplify_gen_subreg (vmode, op0, mode, 0);
19266 x = gen_rtx_AND (vmode, dest, nmask);
19267 }
19268 else /* alternative 3,4 */
19269 {
19270 gcc_assert (REGNO (nmask) == REGNO (dest));
19271 dest = nmask;
19272 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19273 x = gen_rtx_AND (vmode, dest, op0);
19274 }
19275 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19276 }
19277
19278 x = gen_rtx_IOR (vmode, dest, scratch);
19279 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19280 }
19281
19282 /* Return TRUE or FALSE depending on whether the first SET in INSN
19283 has source and destination with matching CC modes, and that the
19284 CC mode is at least as constrained as REQ_MODE. */
19285
19286 bool
19287 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
19288 {
19289 rtx set;
19290 enum machine_mode set_mode;
19291
19292 set = PATTERN (insn);
19293 if (GET_CODE (set) == PARALLEL)
19294 set = XVECEXP (set, 0, 0);
19295 gcc_assert (GET_CODE (set) == SET);
19296 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
19297
19298 set_mode = GET_MODE (SET_DEST (set));
19299 switch (set_mode)
19300 {
19301 case CCNOmode:
19302 if (req_mode != CCNOmode
19303 && (req_mode != CCmode
19304 || XEXP (SET_SRC (set), 1) != const0_rtx))
19305 return false;
19306 break;
19307 case CCmode:
19308 if (req_mode == CCGCmode)
19309 return false;
19310 /* FALLTHRU */
19311 case CCGCmode:
19312 if (req_mode == CCGOCmode || req_mode == CCNOmode)
19313 return false;
19314 /* FALLTHRU */
19315 case CCGOCmode:
19316 if (req_mode == CCZmode)
19317 return false;
19318 /* FALLTHRU */
19319 case CCZmode:
19320 break;
19321
19322 case CCAmode:
19323 case CCCmode:
19324 case CCOmode:
19325 case CCSmode:
19326 if (set_mode != req_mode)
19327 return false;
19328 break;
19329
19330 default:
19331 gcc_unreachable ();
19332 }
19333
19334 return GET_MODE (SET_SRC (set)) == set_mode;
19335 }
19336
19337 /* Generate insn patterns to do an integer compare of OPERANDS. */
19338
19339 static rtx
19340 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
19341 {
19342 enum machine_mode cmpmode;
19343 rtx tmp, flags;
19344
19345 cmpmode = SELECT_CC_MODE (code, op0, op1);
19346 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
19347
19348 /* This is very simple, but making the interface the same as in the
19349 FP case makes the rest of the code easier. */
19350 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
19351 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
19352
19353 /* Return the test that should be put into the flags user, i.e.
19354 the bcc, scc, or cmov instruction. */
19355 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
19356 }
19357
19358 /* Figure out whether to use ordered or unordered fp comparisons.
19359 Return the appropriate mode to use. */
19360
19361 enum machine_mode
19362 ix86_fp_compare_mode (enum rtx_code)
19363 {
19364 /* ??? In order to make all comparisons reversible, we do all comparisons
19365 non-trapping when compiling for IEEE. Once gcc is able to distinguish
19366 all forms trapping and nontrapping comparisons, we can make inequality
19367 comparisons trapping again, since it results in better code when using
19368 FCOM based compares. */
19369 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
19370 }
19371
19372 enum machine_mode
19373 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
19374 {
19375 enum machine_mode mode = GET_MODE (op0);
19376
19377 if (SCALAR_FLOAT_MODE_P (mode))
19378 {
19379 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
19380 return ix86_fp_compare_mode (code);
19381 }
19382
19383 switch (code)
19384 {
19385 /* Only zero flag is needed. */
19386 case EQ: /* ZF=0 */
19387 case NE: /* ZF!=0 */
19388 return CCZmode;
19389 /* Codes needing carry flag. */
19390 case GEU: /* CF=0 */
19391 case LTU: /* CF=1 */
19392 /* Detect overflow checks. They need just the carry flag. */
19393 if (GET_CODE (op0) == PLUS
19394 && rtx_equal_p (op1, XEXP (op0, 0)))
19395 return CCCmode;
19396 else
19397 return CCmode;
19398 case GTU: /* CF=0 & ZF=0 */
19399 case LEU: /* CF=1 | ZF=1 */
19400 return CCmode;
19401 /* Codes possibly doable only with sign flag when
19402 comparing against zero. */
19403 case GE: /* SF=OF or SF=0 */
19404 case LT: /* SF<>OF or SF=1 */
19405 if (op1 == const0_rtx)
19406 return CCGOCmode;
19407 else
19408 /* For other cases Carry flag is not required. */
19409 return CCGCmode;
19410 /* Codes doable only with sign flag when comparing
19411 against zero, but we miss jump instruction for it
19412 so we need to use relational tests against overflow
19413 that thus needs to be zero. */
19414 case GT: /* ZF=0 & SF=OF */
19415 case LE: /* ZF=1 | SF<>OF */
19416 if (op1 == const0_rtx)
19417 return CCNOmode;
19418 else
19419 return CCGCmode;
19420 /* strcmp pattern do (use flags) and combine may ask us for proper
19421 mode. */
19422 case USE:
19423 return CCmode;
19424 default:
19425 gcc_unreachable ();
19426 }
19427 }
19428
19429 /* Return the fixed registers used for condition codes. */
19430
19431 static bool
19432 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
19433 {
19434 *p1 = FLAGS_REG;
19435 *p2 = FPSR_REG;
19436 return true;
19437 }
19438
19439 /* If two condition code modes are compatible, return a condition code
19440 mode which is compatible with both. Otherwise, return
19441 VOIDmode. */
19442
19443 static enum machine_mode
19444 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
19445 {
19446 if (m1 == m2)
19447 return m1;
19448
19449 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
19450 return VOIDmode;
19451
19452 if ((m1 == CCGCmode && m2 == CCGOCmode)
19453 || (m1 == CCGOCmode && m2 == CCGCmode))
19454 return CCGCmode;
19455
19456 if (m1 == CCZmode && (m2 == CCGCmode || m2 == CCGOCmode))
19457 return m2;
19458 else if (m2 == CCZmode && (m1 == CCGCmode || m1 == CCGOCmode))
19459 return m1;
19460
19461 switch (m1)
19462 {
19463 default:
19464 gcc_unreachable ();
19465
19466 case CCmode:
19467 case CCGCmode:
19468 case CCGOCmode:
19469 case CCNOmode:
19470 case CCAmode:
19471 case CCCmode:
19472 case CCOmode:
19473 case CCSmode:
19474 case CCZmode:
19475 switch (m2)
19476 {
19477 default:
19478 return VOIDmode;
19479
19480 case CCmode:
19481 case CCGCmode:
19482 case CCGOCmode:
19483 case CCNOmode:
19484 case CCAmode:
19485 case CCCmode:
19486 case CCOmode:
19487 case CCSmode:
19488 case CCZmode:
19489 return CCmode;
19490 }
19491
19492 case CCFPmode:
19493 case CCFPUmode:
19494 /* These are only compatible with themselves, which we already
19495 checked above. */
19496 return VOIDmode;
19497 }
19498 }
19499
19500
19501 /* Return a comparison we can do and that it is equivalent to
19502 swap_condition (code) apart possibly from orderedness.
19503 But, never change orderedness if TARGET_IEEE_FP, returning
19504 UNKNOWN in that case if necessary. */
19505
19506 static enum rtx_code
19507 ix86_fp_swap_condition (enum rtx_code code)
19508 {
19509 switch (code)
19510 {
19511 case GT: /* GTU - CF=0 & ZF=0 */
19512 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
19513 case GE: /* GEU - CF=0 */
19514 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
19515 case UNLT: /* LTU - CF=1 */
19516 return TARGET_IEEE_FP ? UNKNOWN : GT;
19517 case UNLE: /* LEU - CF=1 | ZF=1 */
19518 return TARGET_IEEE_FP ? UNKNOWN : GE;
19519 default:
19520 return swap_condition (code);
19521 }
19522 }
19523
19524 /* Return cost of comparison CODE using the best strategy for performance.
19525 All following functions do use number of instructions as a cost metrics.
19526 In future this should be tweaked to compute bytes for optimize_size and
19527 take into account performance of various instructions on various CPUs. */
19528
19529 static int
19530 ix86_fp_comparison_cost (enum rtx_code code)
19531 {
19532 int arith_cost;
19533
19534 /* The cost of code using bit-twiddling on %ah. */
19535 switch (code)
19536 {
19537 case UNLE:
19538 case UNLT:
19539 case LTGT:
19540 case GT:
19541 case GE:
19542 case UNORDERED:
19543 case ORDERED:
19544 case UNEQ:
19545 arith_cost = 4;
19546 break;
19547 case LT:
19548 case NE:
19549 case EQ:
19550 case UNGE:
19551 arith_cost = TARGET_IEEE_FP ? 5 : 4;
19552 break;
19553 case LE:
19554 case UNGT:
19555 arith_cost = TARGET_IEEE_FP ? 6 : 4;
19556 break;
19557 default:
19558 gcc_unreachable ();
19559 }
19560
19561 switch (ix86_fp_comparison_strategy (code))
19562 {
19563 case IX86_FPCMP_COMI:
19564 return arith_cost > 4 ? 3 : 2;
19565 case IX86_FPCMP_SAHF:
19566 return arith_cost > 4 ? 4 : 3;
19567 default:
19568 return arith_cost;
19569 }
19570 }
19571
19572 /* Return strategy to use for floating-point. We assume that fcomi is always
19573 preferrable where available, since that is also true when looking at size
19574 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
19575
19576 enum ix86_fpcmp_strategy
19577 ix86_fp_comparison_strategy (enum rtx_code)
19578 {
19579 /* Do fcomi/sahf based test when profitable. */
19580
19581 if (TARGET_CMOVE)
19582 return IX86_FPCMP_COMI;
19583
19584 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
19585 return IX86_FPCMP_SAHF;
19586
19587 return IX86_FPCMP_ARITH;
19588 }
19589
19590 /* Swap, force into registers, or otherwise massage the two operands
19591 to a fp comparison. The operands are updated in place; the new
19592 comparison code is returned. */
19593
19594 static enum rtx_code
19595 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
19596 {
19597 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
19598 rtx op0 = *pop0, op1 = *pop1;
19599 enum machine_mode op_mode = GET_MODE (op0);
19600 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
19601
19602 /* All of the unordered compare instructions only work on registers.
19603 The same is true of the fcomi compare instructions. The XFmode
19604 compare instructions require registers except when comparing
19605 against zero or when converting operand 1 from fixed point to
19606 floating point. */
19607
19608 if (!is_sse
19609 && (fpcmp_mode == CCFPUmode
19610 || (op_mode == XFmode
19611 && ! (standard_80387_constant_p (op0) == 1
19612 || standard_80387_constant_p (op1) == 1)
19613 && GET_CODE (op1) != FLOAT)
19614 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
19615 {
19616 op0 = force_reg (op_mode, op0);
19617 op1 = force_reg (op_mode, op1);
19618 }
19619 else
19620 {
19621 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
19622 things around if they appear profitable, otherwise force op0
19623 into a register. */
19624
19625 if (standard_80387_constant_p (op0) == 0
19626 || (MEM_P (op0)
19627 && ! (standard_80387_constant_p (op1) == 0
19628 || MEM_P (op1))))
19629 {
19630 enum rtx_code new_code = ix86_fp_swap_condition (code);
19631 if (new_code != UNKNOWN)
19632 {
19633 rtx tmp;
19634 tmp = op0, op0 = op1, op1 = tmp;
19635 code = new_code;
19636 }
19637 }
19638
19639 if (!REG_P (op0))
19640 op0 = force_reg (op_mode, op0);
19641
19642 if (CONSTANT_P (op1))
19643 {
19644 int tmp = standard_80387_constant_p (op1);
19645 if (tmp == 0)
19646 op1 = validize_mem (force_const_mem (op_mode, op1));
19647 else if (tmp == 1)
19648 {
19649 if (TARGET_CMOVE)
19650 op1 = force_reg (op_mode, op1);
19651 }
19652 else
19653 op1 = force_reg (op_mode, op1);
19654 }
19655 }
19656
19657 /* Try to rearrange the comparison to make it cheaper. */
19658 if (ix86_fp_comparison_cost (code)
19659 > ix86_fp_comparison_cost (swap_condition (code))
19660 && (REG_P (op1) || can_create_pseudo_p ()))
19661 {
19662 rtx tmp;
19663 tmp = op0, op0 = op1, op1 = tmp;
19664 code = swap_condition (code);
19665 if (!REG_P (op0))
19666 op0 = force_reg (op_mode, op0);
19667 }
19668
19669 *pop0 = op0;
19670 *pop1 = op1;
19671 return code;
19672 }
19673
19674 /* Convert comparison codes we use to represent FP comparison to integer
19675 code that will result in proper branch. Return UNKNOWN if no such code
19676 is available. */
19677
19678 enum rtx_code
19679 ix86_fp_compare_code_to_integer (enum rtx_code code)
19680 {
19681 switch (code)
19682 {
19683 case GT:
19684 return GTU;
19685 case GE:
19686 return GEU;
19687 case ORDERED:
19688 case UNORDERED:
19689 return code;
19690 break;
19691 case UNEQ:
19692 return EQ;
19693 break;
19694 case UNLT:
19695 return LTU;
19696 break;
19697 case UNLE:
19698 return LEU;
19699 break;
19700 case LTGT:
19701 return NE;
19702 break;
19703 default:
19704 return UNKNOWN;
19705 }
19706 }
19707
19708 /* Generate insn patterns to do a floating point compare of OPERANDS. */
19709
19710 static rtx
19711 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
19712 {
19713 enum machine_mode fpcmp_mode, intcmp_mode;
19714 rtx tmp, tmp2;
19715
19716 fpcmp_mode = ix86_fp_compare_mode (code);
19717 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
19718
19719 /* Do fcomi/sahf based test when profitable. */
19720 switch (ix86_fp_comparison_strategy (code))
19721 {
19722 case IX86_FPCMP_COMI:
19723 intcmp_mode = fpcmp_mode;
19724 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19725 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
19726 tmp);
19727 emit_insn (tmp);
19728 break;
19729
19730 case IX86_FPCMP_SAHF:
19731 intcmp_mode = fpcmp_mode;
19732 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19733 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
19734 tmp);
19735
19736 if (!scratch)
19737 scratch = gen_reg_rtx (HImode);
19738 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
19739 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
19740 break;
19741
19742 case IX86_FPCMP_ARITH:
19743 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
19744 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19745 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
19746 if (!scratch)
19747 scratch = gen_reg_rtx (HImode);
19748 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
19749
19750 /* In the unordered case, we have to check C2 for NaN's, which
19751 doesn't happen to work out to anything nice combination-wise.
19752 So do some bit twiddling on the value we've got in AH to come
19753 up with an appropriate set of condition codes. */
19754
19755 intcmp_mode = CCNOmode;
19756 switch (code)
19757 {
19758 case GT:
19759 case UNGT:
19760 if (code == GT || !TARGET_IEEE_FP)
19761 {
19762 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
19763 code = EQ;
19764 }
19765 else
19766 {
19767 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19768 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
19769 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
19770 intcmp_mode = CCmode;
19771 code = GEU;
19772 }
19773 break;
19774 case LT:
19775 case UNLT:
19776 if (code == LT && TARGET_IEEE_FP)
19777 {
19778 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19779 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
19780 intcmp_mode = CCmode;
19781 code = EQ;
19782 }
19783 else
19784 {
19785 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
19786 code = NE;
19787 }
19788 break;
19789 case GE:
19790 case UNGE:
19791 if (code == GE || !TARGET_IEEE_FP)
19792 {
19793 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
19794 code = EQ;
19795 }
19796 else
19797 {
19798 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19799 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
19800 code = NE;
19801 }
19802 break;
19803 case LE:
19804 case UNLE:
19805 if (code == LE && TARGET_IEEE_FP)
19806 {
19807 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19808 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
19809 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
19810 intcmp_mode = CCmode;
19811 code = LTU;
19812 }
19813 else
19814 {
19815 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
19816 code = NE;
19817 }
19818 break;
19819 case EQ:
19820 case UNEQ:
19821 if (code == EQ && TARGET_IEEE_FP)
19822 {
19823 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19824 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
19825 intcmp_mode = CCmode;
19826 code = EQ;
19827 }
19828 else
19829 {
19830 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
19831 code = NE;
19832 }
19833 break;
19834 case NE:
19835 case LTGT:
19836 if (code == NE && TARGET_IEEE_FP)
19837 {
19838 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19839 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
19840 GEN_INT (0x40)));
19841 code = NE;
19842 }
19843 else
19844 {
19845 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
19846 code = EQ;
19847 }
19848 break;
19849
19850 case UNORDERED:
19851 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
19852 code = NE;
19853 break;
19854 case ORDERED:
19855 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
19856 code = EQ;
19857 break;
19858
19859 default:
19860 gcc_unreachable ();
19861 }
19862 break;
19863
19864 default:
19865 gcc_unreachable();
19866 }
19867
19868 /* Return the test that should be put into the flags user, i.e.
19869 the bcc, scc, or cmov instruction. */
19870 return gen_rtx_fmt_ee (code, VOIDmode,
19871 gen_rtx_REG (intcmp_mode, FLAGS_REG),
19872 const0_rtx);
19873 }
19874
19875 static rtx
19876 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
19877 {
19878 rtx ret;
19879
19880 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
19881 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
19882
19883 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
19884 {
19885 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
19886 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
19887 }
19888 else
19889 ret = ix86_expand_int_compare (code, op0, op1);
19890
19891 return ret;
19892 }
19893
19894 void
19895 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
19896 {
19897 enum machine_mode mode = GET_MODE (op0);
19898 rtx tmp;
19899
19900 switch (mode)
19901 {
19902 case SFmode:
19903 case DFmode:
19904 case XFmode:
19905 case QImode:
19906 case HImode:
19907 case SImode:
19908 simple:
19909 tmp = ix86_expand_compare (code, op0, op1);
19910 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
19911 gen_rtx_LABEL_REF (VOIDmode, label),
19912 pc_rtx);
19913 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
19914 return;
19915
19916 case DImode:
19917 if (TARGET_64BIT)
19918 goto simple;
19919 case TImode:
19920 /* Expand DImode branch into multiple compare+branch. */
19921 {
19922 rtx lo[2], hi[2], label2;
19923 enum rtx_code code1, code2, code3;
19924 enum machine_mode submode;
19925
19926 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
19927 {
19928 tmp = op0, op0 = op1, op1 = tmp;
19929 code = swap_condition (code);
19930 }
19931
19932 split_double_mode (mode, &op0, 1, lo+0, hi+0);
19933 split_double_mode (mode, &op1, 1, lo+1, hi+1);
19934
19935 submode = mode == DImode ? SImode : DImode;
19936
19937 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
19938 avoid two branches. This costs one extra insn, so disable when
19939 optimizing for size. */
19940
19941 if ((code == EQ || code == NE)
19942 && (!optimize_insn_for_size_p ()
19943 || hi[1] == const0_rtx || lo[1] == const0_rtx))
19944 {
19945 rtx xor0, xor1;
19946
19947 xor1 = hi[0];
19948 if (hi[1] != const0_rtx)
19949 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
19950 NULL_RTX, 0, OPTAB_WIDEN);
19951
19952 xor0 = lo[0];
19953 if (lo[1] != const0_rtx)
19954 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
19955 NULL_RTX, 0, OPTAB_WIDEN);
19956
19957 tmp = expand_binop (submode, ior_optab, xor1, xor0,
19958 NULL_RTX, 0, OPTAB_WIDEN);
19959
19960 ix86_expand_branch (code, tmp, const0_rtx, label);
19961 return;
19962 }
19963
19964 /* Otherwise, if we are doing less-than or greater-or-equal-than,
19965 op1 is a constant and the low word is zero, then we can just
19966 examine the high word. Similarly for low word -1 and
19967 less-or-equal-than or greater-than. */
19968
19969 if (CONST_INT_P (hi[1]))
19970 switch (code)
19971 {
19972 case LT: case LTU: case GE: case GEU:
19973 if (lo[1] == const0_rtx)
19974 {
19975 ix86_expand_branch (code, hi[0], hi[1], label);
19976 return;
19977 }
19978 break;
19979 case LE: case LEU: case GT: case GTU:
19980 if (lo[1] == constm1_rtx)
19981 {
19982 ix86_expand_branch (code, hi[0], hi[1], label);
19983 return;
19984 }
19985 break;
19986 default:
19987 break;
19988 }
19989
19990 /* Otherwise, we need two or three jumps. */
19991
19992 label2 = gen_label_rtx ();
19993
19994 code1 = code;
19995 code2 = swap_condition (code);
19996 code3 = unsigned_condition (code);
19997
19998 switch (code)
19999 {
20000 case LT: case GT: case LTU: case GTU:
20001 break;
20002
20003 case LE: code1 = LT; code2 = GT; break;
20004 case GE: code1 = GT; code2 = LT; break;
20005 case LEU: code1 = LTU; code2 = GTU; break;
20006 case GEU: code1 = GTU; code2 = LTU; break;
20007
20008 case EQ: code1 = UNKNOWN; code2 = NE; break;
20009 case NE: code2 = UNKNOWN; break;
20010
20011 default:
20012 gcc_unreachable ();
20013 }
20014
20015 /*
20016 * a < b =>
20017 * if (hi(a) < hi(b)) goto true;
20018 * if (hi(a) > hi(b)) goto false;
20019 * if (lo(a) < lo(b)) goto true;
20020 * false:
20021 */
20022
20023 if (code1 != UNKNOWN)
20024 ix86_expand_branch (code1, hi[0], hi[1], label);
20025 if (code2 != UNKNOWN)
20026 ix86_expand_branch (code2, hi[0], hi[1], label2);
20027
20028 ix86_expand_branch (code3, lo[0], lo[1], label);
20029
20030 if (code2 != UNKNOWN)
20031 emit_label (label2);
20032 return;
20033 }
20034
20035 default:
20036 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
20037 goto simple;
20038 }
20039 }
20040
20041 /* Split branch based on floating point condition. */
20042 void
20043 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
20044 rtx target1, rtx target2, rtx tmp)
20045 {
20046 rtx condition;
20047 rtx i;
20048
20049 if (target2 != pc_rtx)
20050 {
20051 rtx tmp = target2;
20052 code = reverse_condition_maybe_unordered (code);
20053 target2 = target1;
20054 target1 = tmp;
20055 }
20056
20057 condition = ix86_expand_fp_compare (code, op1, op2,
20058 tmp);
20059
20060 i = emit_jump_insn (gen_rtx_SET
20061 (VOIDmode, pc_rtx,
20062 gen_rtx_IF_THEN_ELSE (VOIDmode,
20063 condition, target1, target2)));
20064 if (split_branch_probability >= 0)
20065 add_int_reg_note (i, REG_BR_PROB, split_branch_probability);
20066 }
20067
20068 void
20069 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
20070 {
20071 rtx ret;
20072
20073 gcc_assert (GET_MODE (dest) == QImode);
20074
20075 ret = ix86_expand_compare (code, op0, op1);
20076 PUT_MODE (ret, QImode);
20077 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
20078 }
20079
20080 /* Expand comparison setting or clearing carry flag. Return true when
20081 successful and set pop for the operation. */
20082 static bool
20083 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
20084 {
20085 enum machine_mode mode =
20086 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
20087
20088 /* Do not handle double-mode compares that go through special path. */
20089 if (mode == (TARGET_64BIT ? TImode : DImode))
20090 return false;
20091
20092 if (SCALAR_FLOAT_MODE_P (mode))
20093 {
20094 rtx compare_op, compare_seq;
20095
20096 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
20097
20098 /* Shortcut: following common codes never translate
20099 into carry flag compares. */
20100 if (code == EQ || code == NE || code == UNEQ || code == LTGT
20101 || code == ORDERED || code == UNORDERED)
20102 return false;
20103
20104 /* These comparisons require zero flag; swap operands so they won't. */
20105 if ((code == GT || code == UNLE || code == LE || code == UNGT)
20106 && !TARGET_IEEE_FP)
20107 {
20108 rtx tmp = op0;
20109 op0 = op1;
20110 op1 = tmp;
20111 code = swap_condition (code);
20112 }
20113
20114 /* Try to expand the comparison and verify that we end up with
20115 carry flag based comparison. This fails to be true only when
20116 we decide to expand comparison using arithmetic that is not
20117 too common scenario. */
20118 start_sequence ();
20119 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
20120 compare_seq = get_insns ();
20121 end_sequence ();
20122
20123 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
20124 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
20125 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
20126 else
20127 code = GET_CODE (compare_op);
20128
20129 if (code != LTU && code != GEU)
20130 return false;
20131
20132 emit_insn (compare_seq);
20133 *pop = compare_op;
20134 return true;
20135 }
20136
20137 if (!INTEGRAL_MODE_P (mode))
20138 return false;
20139
20140 switch (code)
20141 {
20142 case LTU:
20143 case GEU:
20144 break;
20145
20146 /* Convert a==0 into (unsigned)a<1. */
20147 case EQ:
20148 case NE:
20149 if (op1 != const0_rtx)
20150 return false;
20151 op1 = const1_rtx;
20152 code = (code == EQ ? LTU : GEU);
20153 break;
20154
20155 /* Convert a>b into b<a or a>=b-1. */
20156 case GTU:
20157 case LEU:
20158 if (CONST_INT_P (op1))
20159 {
20160 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
20161 /* Bail out on overflow. We still can swap operands but that
20162 would force loading of the constant into register. */
20163 if (op1 == const0_rtx
20164 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
20165 return false;
20166 code = (code == GTU ? GEU : LTU);
20167 }
20168 else
20169 {
20170 rtx tmp = op1;
20171 op1 = op0;
20172 op0 = tmp;
20173 code = (code == GTU ? LTU : GEU);
20174 }
20175 break;
20176
20177 /* Convert a>=0 into (unsigned)a<0x80000000. */
20178 case LT:
20179 case GE:
20180 if (mode == DImode || op1 != const0_rtx)
20181 return false;
20182 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20183 code = (code == LT ? GEU : LTU);
20184 break;
20185 case LE:
20186 case GT:
20187 if (mode == DImode || op1 != constm1_rtx)
20188 return false;
20189 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20190 code = (code == LE ? GEU : LTU);
20191 break;
20192
20193 default:
20194 return false;
20195 }
20196 /* Swapping operands may cause constant to appear as first operand. */
20197 if (!nonimmediate_operand (op0, VOIDmode))
20198 {
20199 if (!can_create_pseudo_p ())
20200 return false;
20201 op0 = force_reg (mode, op0);
20202 }
20203 *pop = ix86_expand_compare (code, op0, op1);
20204 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
20205 return true;
20206 }
20207
20208 bool
20209 ix86_expand_int_movcc (rtx operands[])
20210 {
20211 enum rtx_code code = GET_CODE (operands[1]), compare_code;
20212 rtx compare_seq, compare_op;
20213 enum machine_mode mode = GET_MODE (operands[0]);
20214 bool sign_bit_compare_p = false;
20215 rtx op0 = XEXP (operands[1], 0);
20216 rtx op1 = XEXP (operands[1], 1);
20217
20218 if (GET_MODE (op0) == TImode
20219 || (GET_MODE (op0) == DImode
20220 && !TARGET_64BIT))
20221 return false;
20222
20223 start_sequence ();
20224 compare_op = ix86_expand_compare (code, op0, op1);
20225 compare_seq = get_insns ();
20226 end_sequence ();
20227
20228 compare_code = GET_CODE (compare_op);
20229
20230 if ((op1 == const0_rtx && (code == GE || code == LT))
20231 || (op1 == constm1_rtx && (code == GT || code == LE)))
20232 sign_bit_compare_p = true;
20233
20234 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
20235 HImode insns, we'd be swallowed in word prefix ops. */
20236
20237 if ((mode != HImode || TARGET_FAST_PREFIX)
20238 && (mode != (TARGET_64BIT ? TImode : DImode))
20239 && CONST_INT_P (operands[2])
20240 && CONST_INT_P (operands[3]))
20241 {
20242 rtx out = operands[0];
20243 HOST_WIDE_INT ct = INTVAL (operands[2]);
20244 HOST_WIDE_INT cf = INTVAL (operands[3]);
20245 HOST_WIDE_INT diff;
20246
20247 diff = ct - cf;
20248 /* Sign bit compares are better done using shifts than we do by using
20249 sbb. */
20250 if (sign_bit_compare_p
20251 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
20252 {
20253 /* Detect overlap between destination and compare sources. */
20254 rtx tmp = out;
20255
20256 if (!sign_bit_compare_p)
20257 {
20258 rtx flags;
20259 bool fpcmp = false;
20260
20261 compare_code = GET_CODE (compare_op);
20262
20263 flags = XEXP (compare_op, 0);
20264
20265 if (GET_MODE (flags) == CCFPmode
20266 || GET_MODE (flags) == CCFPUmode)
20267 {
20268 fpcmp = true;
20269 compare_code
20270 = ix86_fp_compare_code_to_integer (compare_code);
20271 }
20272
20273 /* To simplify rest of code, restrict to the GEU case. */
20274 if (compare_code == LTU)
20275 {
20276 HOST_WIDE_INT tmp = ct;
20277 ct = cf;
20278 cf = tmp;
20279 compare_code = reverse_condition (compare_code);
20280 code = reverse_condition (code);
20281 }
20282 else
20283 {
20284 if (fpcmp)
20285 PUT_CODE (compare_op,
20286 reverse_condition_maybe_unordered
20287 (GET_CODE (compare_op)));
20288 else
20289 PUT_CODE (compare_op,
20290 reverse_condition (GET_CODE (compare_op)));
20291 }
20292 diff = ct - cf;
20293
20294 if (reg_overlap_mentioned_p (out, op0)
20295 || reg_overlap_mentioned_p (out, op1))
20296 tmp = gen_reg_rtx (mode);
20297
20298 if (mode == DImode)
20299 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
20300 else
20301 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
20302 flags, compare_op));
20303 }
20304 else
20305 {
20306 if (code == GT || code == GE)
20307 code = reverse_condition (code);
20308 else
20309 {
20310 HOST_WIDE_INT tmp = ct;
20311 ct = cf;
20312 cf = tmp;
20313 diff = ct - cf;
20314 }
20315 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
20316 }
20317
20318 if (diff == 1)
20319 {
20320 /*
20321 * cmpl op0,op1
20322 * sbbl dest,dest
20323 * [addl dest, ct]
20324 *
20325 * Size 5 - 8.
20326 */
20327 if (ct)
20328 tmp = expand_simple_binop (mode, PLUS,
20329 tmp, GEN_INT (ct),
20330 copy_rtx (tmp), 1, OPTAB_DIRECT);
20331 }
20332 else if (cf == -1)
20333 {
20334 /*
20335 * cmpl op0,op1
20336 * sbbl dest,dest
20337 * orl $ct, dest
20338 *
20339 * Size 8.
20340 */
20341 tmp = expand_simple_binop (mode, IOR,
20342 tmp, GEN_INT (ct),
20343 copy_rtx (tmp), 1, OPTAB_DIRECT);
20344 }
20345 else if (diff == -1 && ct)
20346 {
20347 /*
20348 * cmpl op0,op1
20349 * sbbl dest,dest
20350 * notl dest
20351 * [addl dest, cf]
20352 *
20353 * Size 8 - 11.
20354 */
20355 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20356 if (cf)
20357 tmp = expand_simple_binop (mode, PLUS,
20358 copy_rtx (tmp), GEN_INT (cf),
20359 copy_rtx (tmp), 1, OPTAB_DIRECT);
20360 }
20361 else
20362 {
20363 /*
20364 * cmpl op0,op1
20365 * sbbl dest,dest
20366 * [notl dest]
20367 * andl cf - ct, dest
20368 * [addl dest, ct]
20369 *
20370 * Size 8 - 11.
20371 */
20372
20373 if (cf == 0)
20374 {
20375 cf = ct;
20376 ct = 0;
20377 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20378 }
20379
20380 tmp = expand_simple_binop (mode, AND,
20381 copy_rtx (tmp),
20382 gen_int_mode (cf - ct, mode),
20383 copy_rtx (tmp), 1, OPTAB_DIRECT);
20384 if (ct)
20385 tmp = expand_simple_binop (mode, PLUS,
20386 copy_rtx (tmp), GEN_INT (ct),
20387 copy_rtx (tmp), 1, OPTAB_DIRECT);
20388 }
20389
20390 if (!rtx_equal_p (tmp, out))
20391 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
20392
20393 return true;
20394 }
20395
20396 if (diff < 0)
20397 {
20398 enum machine_mode cmp_mode = GET_MODE (op0);
20399
20400 HOST_WIDE_INT tmp;
20401 tmp = ct, ct = cf, cf = tmp;
20402 diff = -diff;
20403
20404 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20405 {
20406 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20407
20408 /* We may be reversing unordered compare to normal compare, that
20409 is not valid in general (we may convert non-trapping condition
20410 to trapping one), however on i386 we currently emit all
20411 comparisons unordered. */
20412 compare_code = reverse_condition_maybe_unordered (compare_code);
20413 code = reverse_condition_maybe_unordered (code);
20414 }
20415 else
20416 {
20417 compare_code = reverse_condition (compare_code);
20418 code = reverse_condition (code);
20419 }
20420 }
20421
20422 compare_code = UNKNOWN;
20423 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
20424 && CONST_INT_P (op1))
20425 {
20426 if (op1 == const0_rtx
20427 && (code == LT || code == GE))
20428 compare_code = code;
20429 else if (op1 == constm1_rtx)
20430 {
20431 if (code == LE)
20432 compare_code = LT;
20433 else if (code == GT)
20434 compare_code = GE;
20435 }
20436 }
20437
20438 /* Optimize dest = (op0 < 0) ? -1 : cf. */
20439 if (compare_code != UNKNOWN
20440 && GET_MODE (op0) == GET_MODE (out)
20441 && (cf == -1 || ct == -1))
20442 {
20443 /* If lea code below could be used, only optimize
20444 if it results in a 2 insn sequence. */
20445
20446 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
20447 || diff == 3 || diff == 5 || diff == 9)
20448 || (compare_code == LT && ct == -1)
20449 || (compare_code == GE && cf == -1))
20450 {
20451 /*
20452 * notl op1 (if necessary)
20453 * sarl $31, op1
20454 * orl cf, op1
20455 */
20456 if (ct != -1)
20457 {
20458 cf = ct;
20459 ct = -1;
20460 code = reverse_condition (code);
20461 }
20462
20463 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
20464
20465 out = expand_simple_binop (mode, IOR,
20466 out, GEN_INT (cf),
20467 out, 1, OPTAB_DIRECT);
20468 if (out != operands[0])
20469 emit_move_insn (operands[0], out);
20470
20471 return true;
20472 }
20473 }
20474
20475
20476 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
20477 || diff == 3 || diff == 5 || diff == 9)
20478 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
20479 && (mode != DImode
20480 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
20481 {
20482 /*
20483 * xorl dest,dest
20484 * cmpl op1,op2
20485 * setcc dest
20486 * lea cf(dest*(ct-cf)),dest
20487 *
20488 * Size 14.
20489 *
20490 * This also catches the degenerate setcc-only case.
20491 */
20492
20493 rtx tmp;
20494 int nops;
20495
20496 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
20497
20498 nops = 0;
20499 /* On x86_64 the lea instruction operates on Pmode, so we need
20500 to get arithmetics done in proper mode to match. */
20501 if (diff == 1)
20502 tmp = copy_rtx (out);
20503 else
20504 {
20505 rtx out1;
20506 out1 = copy_rtx (out);
20507 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
20508 nops++;
20509 if (diff & 1)
20510 {
20511 tmp = gen_rtx_PLUS (mode, tmp, out1);
20512 nops++;
20513 }
20514 }
20515 if (cf != 0)
20516 {
20517 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
20518 nops++;
20519 }
20520 if (!rtx_equal_p (tmp, out))
20521 {
20522 if (nops == 1)
20523 out = force_operand (tmp, copy_rtx (out));
20524 else
20525 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
20526 }
20527 if (!rtx_equal_p (out, operands[0]))
20528 emit_move_insn (operands[0], copy_rtx (out));
20529
20530 return true;
20531 }
20532
20533 /*
20534 * General case: Jumpful:
20535 * xorl dest,dest cmpl op1, op2
20536 * cmpl op1, op2 movl ct, dest
20537 * setcc dest jcc 1f
20538 * decl dest movl cf, dest
20539 * andl (cf-ct),dest 1:
20540 * addl ct,dest
20541 *
20542 * Size 20. Size 14.
20543 *
20544 * This is reasonably steep, but branch mispredict costs are
20545 * high on modern cpus, so consider failing only if optimizing
20546 * for space.
20547 */
20548
20549 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
20550 && BRANCH_COST (optimize_insn_for_speed_p (),
20551 false) >= 2)
20552 {
20553 if (cf == 0)
20554 {
20555 enum machine_mode cmp_mode = GET_MODE (op0);
20556
20557 cf = ct;
20558 ct = 0;
20559
20560 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20561 {
20562 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20563
20564 /* We may be reversing unordered compare to normal compare,
20565 that is not valid in general (we may convert non-trapping
20566 condition to trapping one), however on i386 we currently
20567 emit all comparisons unordered. */
20568 code = reverse_condition_maybe_unordered (code);
20569 }
20570 else
20571 {
20572 code = reverse_condition (code);
20573 if (compare_code != UNKNOWN)
20574 compare_code = reverse_condition (compare_code);
20575 }
20576 }
20577
20578 if (compare_code != UNKNOWN)
20579 {
20580 /* notl op1 (if needed)
20581 sarl $31, op1
20582 andl (cf-ct), op1
20583 addl ct, op1
20584
20585 For x < 0 (resp. x <= -1) there will be no notl,
20586 so if possible swap the constants to get rid of the
20587 complement.
20588 True/false will be -1/0 while code below (store flag
20589 followed by decrement) is 0/-1, so the constants need
20590 to be exchanged once more. */
20591
20592 if (compare_code == GE || !cf)
20593 {
20594 code = reverse_condition (code);
20595 compare_code = LT;
20596 }
20597 else
20598 {
20599 HOST_WIDE_INT tmp = cf;
20600 cf = ct;
20601 ct = tmp;
20602 }
20603
20604 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
20605 }
20606 else
20607 {
20608 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
20609
20610 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
20611 constm1_rtx,
20612 copy_rtx (out), 1, OPTAB_DIRECT);
20613 }
20614
20615 out = expand_simple_binop (mode, AND, copy_rtx (out),
20616 gen_int_mode (cf - ct, mode),
20617 copy_rtx (out), 1, OPTAB_DIRECT);
20618 if (ct)
20619 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
20620 copy_rtx (out), 1, OPTAB_DIRECT);
20621 if (!rtx_equal_p (out, operands[0]))
20622 emit_move_insn (operands[0], copy_rtx (out));
20623
20624 return true;
20625 }
20626 }
20627
20628 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
20629 {
20630 /* Try a few things more with specific constants and a variable. */
20631
20632 optab op;
20633 rtx var, orig_out, out, tmp;
20634
20635 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
20636 return false;
20637
20638 /* If one of the two operands is an interesting constant, load a
20639 constant with the above and mask it in with a logical operation. */
20640
20641 if (CONST_INT_P (operands[2]))
20642 {
20643 var = operands[3];
20644 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
20645 operands[3] = constm1_rtx, op = and_optab;
20646 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
20647 operands[3] = const0_rtx, op = ior_optab;
20648 else
20649 return false;
20650 }
20651 else if (CONST_INT_P (operands[3]))
20652 {
20653 var = operands[2];
20654 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
20655 operands[2] = constm1_rtx, op = and_optab;
20656 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
20657 operands[2] = const0_rtx, op = ior_optab;
20658 else
20659 return false;
20660 }
20661 else
20662 return false;
20663
20664 orig_out = operands[0];
20665 tmp = gen_reg_rtx (mode);
20666 operands[0] = tmp;
20667
20668 /* Recurse to get the constant loaded. */
20669 if (ix86_expand_int_movcc (operands) == 0)
20670 return false;
20671
20672 /* Mask in the interesting variable. */
20673 out = expand_binop (mode, op, var, tmp, orig_out, 0,
20674 OPTAB_WIDEN);
20675 if (!rtx_equal_p (out, orig_out))
20676 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
20677
20678 return true;
20679 }
20680
20681 /*
20682 * For comparison with above,
20683 *
20684 * movl cf,dest
20685 * movl ct,tmp
20686 * cmpl op1,op2
20687 * cmovcc tmp,dest
20688 *
20689 * Size 15.
20690 */
20691
20692 if (! nonimmediate_operand (operands[2], mode))
20693 operands[2] = force_reg (mode, operands[2]);
20694 if (! nonimmediate_operand (operands[3], mode))
20695 operands[3] = force_reg (mode, operands[3]);
20696
20697 if (! register_operand (operands[2], VOIDmode)
20698 && (mode == QImode
20699 || ! register_operand (operands[3], VOIDmode)))
20700 operands[2] = force_reg (mode, operands[2]);
20701
20702 if (mode == QImode
20703 && ! register_operand (operands[3], VOIDmode))
20704 operands[3] = force_reg (mode, operands[3]);
20705
20706 emit_insn (compare_seq);
20707 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
20708 gen_rtx_IF_THEN_ELSE (mode,
20709 compare_op, operands[2],
20710 operands[3])));
20711 return true;
20712 }
20713
20714 /* Swap, force into registers, or otherwise massage the two operands
20715 to an sse comparison with a mask result. Thus we differ a bit from
20716 ix86_prepare_fp_compare_args which expects to produce a flags result.
20717
20718 The DEST operand exists to help determine whether to commute commutative
20719 operators. The POP0/POP1 operands are updated in place. The new
20720 comparison code is returned, or UNKNOWN if not implementable. */
20721
20722 static enum rtx_code
20723 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
20724 rtx *pop0, rtx *pop1)
20725 {
20726 rtx tmp;
20727
20728 switch (code)
20729 {
20730 case LTGT:
20731 case UNEQ:
20732 /* AVX supports all the needed comparisons. */
20733 if (TARGET_AVX)
20734 break;
20735 /* We have no LTGT as an operator. We could implement it with
20736 NE & ORDERED, but this requires an extra temporary. It's
20737 not clear that it's worth it. */
20738 return UNKNOWN;
20739
20740 case LT:
20741 case LE:
20742 case UNGT:
20743 case UNGE:
20744 /* These are supported directly. */
20745 break;
20746
20747 case EQ:
20748 case NE:
20749 case UNORDERED:
20750 case ORDERED:
20751 /* AVX has 3 operand comparisons, no need to swap anything. */
20752 if (TARGET_AVX)
20753 break;
20754 /* For commutative operators, try to canonicalize the destination
20755 operand to be first in the comparison - this helps reload to
20756 avoid extra moves. */
20757 if (!dest || !rtx_equal_p (dest, *pop1))
20758 break;
20759 /* FALLTHRU */
20760
20761 case GE:
20762 case GT:
20763 case UNLE:
20764 case UNLT:
20765 /* These are not supported directly before AVX, and furthermore
20766 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
20767 comparison operands to transform into something that is
20768 supported. */
20769 tmp = *pop0;
20770 *pop0 = *pop1;
20771 *pop1 = tmp;
20772 code = swap_condition (code);
20773 break;
20774
20775 default:
20776 gcc_unreachable ();
20777 }
20778
20779 return code;
20780 }
20781
20782 /* Detect conditional moves that exactly match min/max operational
20783 semantics. Note that this is IEEE safe, as long as we don't
20784 interchange the operands.
20785
20786 Returns FALSE if this conditional move doesn't match a MIN/MAX,
20787 and TRUE if the operation is successful and instructions are emitted. */
20788
20789 static bool
20790 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
20791 rtx cmp_op1, rtx if_true, rtx if_false)
20792 {
20793 enum machine_mode mode;
20794 bool is_min;
20795 rtx tmp;
20796
20797 if (code == LT)
20798 ;
20799 else if (code == UNGE)
20800 {
20801 tmp = if_true;
20802 if_true = if_false;
20803 if_false = tmp;
20804 }
20805 else
20806 return false;
20807
20808 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
20809 is_min = true;
20810 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
20811 is_min = false;
20812 else
20813 return false;
20814
20815 mode = GET_MODE (dest);
20816
20817 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
20818 but MODE may be a vector mode and thus not appropriate. */
20819 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
20820 {
20821 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
20822 rtvec v;
20823
20824 if_true = force_reg (mode, if_true);
20825 v = gen_rtvec (2, if_true, if_false);
20826 tmp = gen_rtx_UNSPEC (mode, v, u);
20827 }
20828 else
20829 {
20830 code = is_min ? SMIN : SMAX;
20831 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
20832 }
20833
20834 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
20835 return true;
20836 }
20837
20838 /* Expand an sse vector comparison. Return the register with the result. */
20839
20840 static rtx
20841 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
20842 rtx op_true, rtx op_false)
20843 {
20844 enum machine_mode mode = GET_MODE (dest);
20845 enum machine_mode cmp_ops_mode = GET_MODE (cmp_op0);
20846
20847 /* In general case result of comparison can differ from operands' type. */
20848 enum machine_mode cmp_mode;
20849
20850 /* In AVX512F the result of comparison is an integer mask. */
20851 bool maskcmp = false;
20852 rtx x;
20853
20854 if (GET_MODE_SIZE (cmp_ops_mode) == 64)
20855 {
20856 cmp_mode = mode_for_size (GET_MODE_NUNITS (cmp_ops_mode), MODE_INT, 0);
20857 gcc_assert (cmp_mode != BLKmode);
20858
20859 maskcmp = true;
20860 }
20861 else
20862 cmp_mode = cmp_ops_mode;
20863
20864
20865 cmp_op0 = force_reg (cmp_ops_mode, cmp_op0);
20866 if (!nonimmediate_operand (cmp_op1, cmp_ops_mode))
20867 cmp_op1 = force_reg (cmp_ops_mode, cmp_op1);
20868
20869 if (optimize
20870 || reg_overlap_mentioned_p (dest, op_true)
20871 || reg_overlap_mentioned_p (dest, op_false))
20872 dest = gen_reg_rtx (maskcmp ? cmp_mode : mode);
20873
20874 /* Compare patterns for int modes are unspec in AVX512F only. */
20875 if (maskcmp && (code == GT || code == EQ))
20876 {
20877 rtx (*gen)(rtx, rtx, rtx);
20878
20879 switch (cmp_ops_mode)
20880 {
20881 case V16SImode:
20882 gen = code == GT ? gen_avx512f_gtv16si3 : gen_avx512f_eqv16si3_1;
20883 break;
20884 case V8DImode:
20885 gen = code == GT ? gen_avx512f_gtv8di3 : gen_avx512f_eqv8di3_1;
20886 break;
20887 default:
20888 gen = NULL;
20889 }
20890
20891 if (gen)
20892 {
20893 emit_insn (gen (dest, cmp_op0, cmp_op1));
20894 return dest;
20895 }
20896 }
20897 x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
20898
20899 if (cmp_mode != mode && !maskcmp)
20900 {
20901 x = force_reg (cmp_ops_mode, x);
20902 convert_move (dest, x, false);
20903 }
20904 else
20905 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20906
20907 return dest;
20908 }
20909
20910 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
20911 operations. This is used for both scalar and vector conditional moves. */
20912
20913 static void
20914 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
20915 {
20916 enum machine_mode mode = GET_MODE (dest);
20917 enum machine_mode cmpmode = GET_MODE (cmp);
20918
20919 /* In AVX512F the result of comparison is an integer mask. */
20920 bool maskcmp = (mode != cmpmode && TARGET_AVX512F);
20921
20922 rtx t2, t3, x;
20923
20924 if (vector_all_ones_operand (op_true, mode)
20925 && rtx_equal_p (op_false, CONST0_RTX (mode))
20926 && !maskcmp)
20927 {
20928 emit_insn (gen_rtx_SET (VOIDmode, dest, cmp));
20929 }
20930 else if (op_false == CONST0_RTX (mode)
20931 && !maskcmp)
20932 {
20933 op_true = force_reg (mode, op_true);
20934 x = gen_rtx_AND (mode, cmp, op_true);
20935 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20936 }
20937 else if (op_true == CONST0_RTX (mode)
20938 && !maskcmp)
20939 {
20940 op_false = force_reg (mode, op_false);
20941 x = gen_rtx_NOT (mode, cmp);
20942 x = gen_rtx_AND (mode, x, op_false);
20943 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20944 }
20945 else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode)
20946 && !maskcmp)
20947 {
20948 op_false = force_reg (mode, op_false);
20949 x = gen_rtx_IOR (mode, cmp, op_false);
20950 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20951 }
20952 else if (TARGET_XOP
20953 && !maskcmp)
20954 {
20955 op_true = force_reg (mode, op_true);
20956
20957 if (!nonimmediate_operand (op_false, mode))
20958 op_false = force_reg (mode, op_false);
20959
20960 emit_insn (gen_rtx_SET (mode, dest,
20961 gen_rtx_IF_THEN_ELSE (mode, cmp,
20962 op_true,
20963 op_false)));
20964 }
20965 else
20966 {
20967 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
20968 rtx d = dest;
20969
20970 if (!nonimmediate_operand (op_true, mode))
20971 op_true = force_reg (mode, op_true);
20972
20973 op_false = force_reg (mode, op_false);
20974
20975 switch (mode)
20976 {
20977 case V4SFmode:
20978 if (TARGET_SSE4_1)
20979 gen = gen_sse4_1_blendvps;
20980 break;
20981 case V2DFmode:
20982 if (TARGET_SSE4_1)
20983 gen = gen_sse4_1_blendvpd;
20984 break;
20985 case V16QImode:
20986 case V8HImode:
20987 case V4SImode:
20988 case V2DImode:
20989 if (TARGET_SSE4_1)
20990 {
20991 gen = gen_sse4_1_pblendvb;
20992 if (mode != V16QImode)
20993 d = gen_reg_rtx (V16QImode);
20994 op_false = gen_lowpart (V16QImode, op_false);
20995 op_true = gen_lowpart (V16QImode, op_true);
20996 cmp = gen_lowpart (V16QImode, cmp);
20997 }
20998 break;
20999 case V8SFmode:
21000 if (TARGET_AVX)
21001 gen = gen_avx_blendvps256;
21002 break;
21003 case V4DFmode:
21004 if (TARGET_AVX)
21005 gen = gen_avx_blendvpd256;
21006 break;
21007 case V32QImode:
21008 case V16HImode:
21009 case V8SImode:
21010 case V4DImode:
21011 if (TARGET_AVX2)
21012 {
21013 gen = gen_avx2_pblendvb;
21014 if (mode != V32QImode)
21015 d = gen_reg_rtx (V32QImode);
21016 op_false = gen_lowpart (V32QImode, op_false);
21017 op_true = gen_lowpart (V32QImode, op_true);
21018 cmp = gen_lowpart (V32QImode, cmp);
21019 }
21020 break;
21021
21022 case V16SImode:
21023 gen = gen_avx512f_blendmv16si;
21024 break;
21025 case V8DImode:
21026 gen = gen_avx512f_blendmv8di;
21027 break;
21028 case V8DFmode:
21029 gen = gen_avx512f_blendmv8df;
21030 break;
21031 case V16SFmode:
21032 gen = gen_avx512f_blendmv16sf;
21033 break;
21034
21035 default:
21036 break;
21037 }
21038
21039 if (gen != NULL)
21040 {
21041 emit_insn (gen (d, op_false, op_true, cmp));
21042 if (d != dest)
21043 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
21044 }
21045 else
21046 {
21047 op_true = force_reg (mode, op_true);
21048
21049 t2 = gen_reg_rtx (mode);
21050 if (optimize)
21051 t3 = gen_reg_rtx (mode);
21052 else
21053 t3 = dest;
21054
21055 x = gen_rtx_AND (mode, op_true, cmp);
21056 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
21057
21058 x = gen_rtx_NOT (mode, cmp);
21059 x = gen_rtx_AND (mode, x, op_false);
21060 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
21061
21062 x = gen_rtx_IOR (mode, t3, t2);
21063 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21064 }
21065 }
21066 }
21067
21068 /* Expand a floating-point conditional move. Return true if successful. */
21069
21070 bool
21071 ix86_expand_fp_movcc (rtx operands[])
21072 {
21073 enum machine_mode mode = GET_MODE (operands[0]);
21074 enum rtx_code code = GET_CODE (operands[1]);
21075 rtx tmp, compare_op;
21076 rtx op0 = XEXP (operands[1], 0);
21077 rtx op1 = XEXP (operands[1], 1);
21078
21079 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
21080 {
21081 enum machine_mode cmode;
21082
21083 /* Since we've no cmove for sse registers, don't force bad register
21084 allocation just to gain access to it. Deny movcc when the
21085 comparison mode doesn't match the move mode. */
21086 cmode = GET_MODE (op0);
21087 if (cmode == VOIDmode)
21088 cmode = GET_MODE (op1);
21089 if (cmode != mode)
21090 return false;
21091
21092 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
21093 if (code == UNKNOWN)
21094 return false;
21095
21096 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
21097 operands[2], operands[3]))
21098 return true;
21099
21100 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
21101 operands[2], operands[3]);
21102 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
21103 return true;
21104 }
21105
21106 if (GET_MODE (op0) == TImode
21107 || (GET_MODE (op0) == DImode
21108 && !TARGET_64BIT))
21109 return false;
21110
21111 /* The floating point conditional move instructions don't directly
21112 support conditions resulting from a signed integer comparison. */
21113
21114 compare_op = ix86_expand_compare (code, op0, op1);
21115 if (!fcmov_comparison_operator (compare_op, VOIDmode))
21116 {
21117 tmp = gen_reg_rtx (QImode);
21118 ix86_expand_setcc (tmp, code, op0, op1);
21119
21120 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
21121 }
21122
21123 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
21124 gen_rtx_IF_THEN_ELSE (mode, compare_op,
21125 operands[2], operands[3])));
21126
21127 return true;
21128 }
21129
21130 /* Expand a floating-point vector conditional move; a vcond operation
21131 rather than a movcc operation. */
21132
21133 bool
21134 ix86_expand_fp_vcond (rtx operands[])
21135 {
21136 enum rtx_code code = GET_CODE (operands[3]);
21137 rtx cmp;
21138
21139 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
21140 &operands[4], &operands[5]);
21141 if (code == UNKNOWN)
21142 {
21143 rtx temp;
21144 switch (GET_CODE (operands[3]))
21145 {
21146 case LTGT:
21147 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
21148 operands[5], operands[0], operands[0]);
21149 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
21150 operands[5], operands[1], operands[2]);
21151 code = AND;
21152 break;
21153 case UNEQ:
21154 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
21155 operands[5], operands[0], operands[0]);
21156 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
21157 operands[5], operands[1], operands[2]);
21158 code = IOR;
21159 break;
21160 default:
21161 gcc_unreachable ();
21162 }
21163 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
21164 OPTAB_DIRECT);
21165 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21166 return true;
21167 }
21168
21169 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
21170 operands[5], operands[1], operands[2]))
21171 return true;
21172
21173 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
21174 operands[1], operands[2]);
21175 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21176 return true;
21177 }
21178
21179 /* Expand a signed/unsigned integral vector conditional move. */
21180
21181 bool
21182 ix86_expand_int_vcond (rtx operands[])
21183 {
21184 enum machine_mode data_mode = GET_MODE (operands[0]);
21185 enum machine_mode mode = GET_MODE (operands[4]);
21186 enum rtx_code code = GET_CODE (operands[3]);
21187 bool negate = false;
21188 rtx x, cop0, cop1;
21189
21190 cop0 = operands[4];
21191 cop1 = operands[5];
21192
21193 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
21194 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
21195 if ((code == LT || code == GE)
21196 && data_mode == mode
21197 && cop1 == CONST0_RTX (mode)
21198 && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
21199 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) > 1
21200 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) <= 8
21201 && (GET_MODE_SIZE (data_mode) == 16
21202 || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
21203 {
21204 rtx negop = operands[2 - (code == LT)];
21205 int shift = GET_MODE_BITSIZE (GET_MODE_INNER (data_mode)) - 1;
21206 if (negop == CONST1_RTX (data_mode))
21207 {
21208 rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
21209 operands[0], 1, OPTAB_DIRECT);
21210 if (res != operands[0])
21211 emit_move_insn (operands[0], res);
21212 return true;
21213 }
21214 else if (GET_MODE_INNER (data_mode) != DImode
21215 && vector_all_ones_operand (negop, data_mode))
21216 {
21217 rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
21218 operands[0], 0, OPTAB_DIRECT);
21219 if (res != operands[0])
21220 emit_move_insn (operands[0], res);
21221 return true;
21222 }
21223 }
21224
21225 if (!nonimmediate_operand (cop1, mode))
21226 cop1 = force_reg (mode, cop1);
21227 if (!general_operand (operands[1], data_mode))
21228 operands[1] = force_reg (data_mode, operands[1]);
21229 if (!general_operand (operands[2], data_mode))
21230 operands[2] = force_reg (data_mode, operands[2]);
21231
21232 /* XOP supports all of the comparisons on all 128-bit vector int types. */
21233 if (TARGET_XOP
21234 && (mode == V16QImode || mode == V8HImode
21235 || mode == V4SImode || mode == V2DImode))
21236 ;
21237 else
21238 {
21239 /* Canonicalize the comparison to EQ, GT, GTU. */
21240 switch (code)
21241 {
21242 case EQ:
21243 case GT:
21244 case GTU:
21245 break;
21246
21247 case NE:
21248 case LE:
21249 case LEU:
21250 code = reverse_condition (code);
21251 negate = true;
21252 break;
21253
21254 case GE:
21255 case GEU:
21256 code = reverse_condition (code);
21257 negate = true;
21258 /* FALLTHRU */
21259
21260 case LT:
21261 case LTU:
21262 code = swap_condition (code);
21263 x = cop0, cop0 = cop1, cop1 = x;
21264 break;
21265
21266 default:
21267 gcc_unreachable ();
21268 }
21269
21270 /* Only SSE4.1/SSE4.2 supports V2DImode. */
21271 if (mode == V2DImode)
21272 {
21273 switch (code)
21274 {
21275 case EQ:
21276 /* SSE4.1 supports EQ. */
21277 if (!TARGET_SSE4_1)
21278 return false;
21279 break;
21280
21281 case GT:
21282 case GTU:
21283 /* SSE4.2 supports GT/GTU. */
21284 if (!TARGET_SSE4_2)
21285 return false;
21286 break;
21287
21288 default:
21289 gcc_unreachable ();
21290 }
21291 }
21292
21293 /* Unsigned parallel compare is not supported by the hardware.
21294 Play some tricks to turn this into a signed comparison
21295 against 0. */
21296 if (code == GTU)
21297 {
21298 cop0 = force_reg (mode, cop0);
21299
21300 switch (mode)
21301 {
21302 case V16SImode:
21303 case V8DImode:
21304 case V8SImode:
21305 case V4DImode:
21306 case V4SImode:
21307 case V2DImode:
21308 {
21309 rtx t1, t2, mask;
21310 rtx (*gen_sub3) (rtx, rtx, rtx);
21311
21312 switch (mode)
21313 {
21314 case V16SImode: gen_sub3 = gen_subv16si3; break;
21315 case V8DImode: gen_sub3 = gen_subv8di3; break;
21316 case V8SImode: gen_sub3 = gen_subv8si3; break;
21317 case V4DImode: gen_sub3 = gen_subv4di3; break;
21318 case V4SImode: gen_sub3 = gen_subv4si3; break;
21319 case V2DImode: gen_sub3 = gen_subv2di3; break;
21320 default:
21321 gcc_unreachable ();
21322 }
21323 /* Subtract (-(INT MAX) - 1) from both operands to make
21324 them signed. */
21325 mask = ix86_build_signbit_mask (mode, true, false);
21326 t1 = gen_reg_rtx (mode);
21327 emit_insn (gen_sub3 (t1, cop0, mask));
21328
21329 t2 = gen_reg_rtx (mode);
21330 emit_insn (gen_sub3 (t2, cop1, mask));
21331
21332 cop0 = t1;
21333 cop1 = t2;
21334 code = GT;
21335 }
21336 break;
21337
21338 case V32QImode:
21339 case V16HImode:
21340 case V16QImode:
21341 case V8HImode:
21342 /* Perform a parallel unsigned saturating subtraction. */
21343 x = gen_reg_rtx (mode);
21344 emit_insn (gen_rtx_SET (VOIDmode, x,
21345 gen_rtx_US_MINUS (mode, cop0, cop1)));
21346
21347 cop0 = x;
21348 cop1 = CONST0_RTX (mode);
21349 code = EQ;
21350 negate = !negate;
21351 break;
21352
21353 default:
21354 gcc_unreachable ();
21355 }
21356 }
21357 }
21358
21359 /* Allow the comparison to be done in one mode, but the movcc to
21360 happen in another mode. */
21361 if (data_mode == mode)
21362 {
21363 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
21364 operands[1+negate], operands[2-negate]);
21365 }
21366 else
21367 {
21368 gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
21369 x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1,
21370 operands[1+negate], operands[2-negate]);
21371 if (GET_MODE (x) == mode)
21372 x = gen_lowpart (data_mode, x);
21373 }
21374
21375 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
21376 operands[2-negate]);
21377 return true;
21378 }
21379
21380 static bool
21381 ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1)
21382 {
21383 enum machine_mode mode = GET_MODE (op0);
21384 switch (mode)
21385 {
21386 case V16SImode:
21387 emit_insn (gen_avx512f_vpermi2varv16si3 (target, op0,
21388 force_reg (V16SImode, mask),
21389 op1));
21390 return true;
21391 case V16SFmode:
21392 emit_insn (gen_avx512f_vpermi2varv16sf3 (target, op0,
21393 force_reg (V16SImode, mask),
21394 op1));
21395 return true;
21396 case V8DImode:
21397 emit_insn (gen_avx512f_vpermi2varv8di3 (target, op0,
21398 force_reg (V8DImode, mask), op1));
21399 return true;
21400 case V8DFmode:
21401 emit_insn (gen_avx512f_vpermi2varv8df3 (target, op0,
21402 force_reg (V8DImode, mask), op1));
21403 return true;
21404 default:
21405 return false;
21406 }
21407 }
21408
21409 /* Expand a variable vector permutation. */
21410
21411 void
21412 ix86_expand_vec_perm (rtx operands[])
21413 {
21414 rtx target = operands[0];
21415 rtx op0 = operands[1];
21416 rtx op1 = operands[2];
21417 rtx mask = operands[3];
21418 rtx t1, t2, t3, t4, t5, t6, t7, t8, vt, vt2, vec[32];
21419 enum machine_mode mode = GET_MODE (op0);
21420 enum machine_mode maskmode = GET_MODE (mask);
21421 int w, e, i;
21422 bool one_operand_shuffle = rtx_equal_p (op0, op1);
21423
21424 /* Number of elements in the vector. */
21425 w = GET_MODE_NUNITS (mode);
21426 e = GET_MODE_UNIT_SIZE (mode);
21427 gcc_assert (w <= 64);
21428
21429 if (ix86_expand_vec_perm_vpermi2 (target, op0, mask, op1))
21430 return;
21431
21432 if (TARGET_AVX2)
21433 {
21434 if (mode == V4DImode || mode == V4DFmode || mode == V16HImode)
21435 {
21436 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
21437 an constant shuffle operand. With a tiny bit of effort we can
21438 use VPERMD instead. A re-interpretation stall for V4DFmode is
21439 unfortunate but there's no avoiding it.
21440 Similarly for V16HImode we don't have instructions for variable
21441 shuffling, while for V32QImode we can use after preparing suitable
21442 masks vpshufb; vpshufb; vpermq; vpor. */
21443
21444 if (mode == V16HImode)
21445 {
21446 maskmode = mode = V32QImode;
21447 w = 32;
21448 e = 1;
21449 }
21450 else
21451 {
21452 maskmode = mode = V8SImode;
21453 w = 8;
21454 e = 4;
21455 }
21456 t1 = gen_reg_rtx (maskmode);
21457
21458 /* Replicate the low bits of the V4DImode mask into V8SImode:
21459 mask = { A B C D }
21460 t1 = { A A B B C C D D }. */
21461 for (i = 0; i < w / 2; ++i)
21462 vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
21463 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21464 vt = force_reg (maskmode, vt);
21465 mask = gen_lowpart (maskmode, mask);
21466 if (maskmode == V8SImode)
21467 emit_insn (gen_avx2_permvarv8si (t1, mask, vt));
21468 else
21469 emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
21470
21471 /* Multiply the shuffle indicies by two. */
21472 t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
21473 OPTAB_DIRECT);
21474
21475 /* Add one to the odd shuffle indicies:
21476 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
21477 for (i = 0; i < w / 2; ++i)
21478 {
21479 vec[i * 2] = const0_rtx;
21480 vec[i * 2 + 1] = const1_rtx;
21481 }
21482 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21483 vt = validize_mem (force_const_mem (maskmode, vt));
21484 t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
21485 OPTAB_DIRECT);
21486
21487 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
21488 operands[3] = mask = t1;
21489 target = gen_reg_rtx (mode);
21490 op0 = gen_lowpart (mode, op0);
21491 op1 = gen_lowpart (mode, op1);
21492 }
21493
21494 switch (mode)
21495 {
21496 case V8SImode:
21497 /* The VPERMD and VPERMPS instructions already properly ignore
21498 the high bits of the shuffle elements. No need for us to
21499 perform an AND ourselves. */
21500 if (one_operand_shuffle)
21501 {
21502 emit_insn (gen_avx2_permvarv8si (target, op0, mask));
21503 if (target != operands[0])
21504 emit_move_insn (operands[0],
21505 gen_lowpart (GET_MODE (operands[0]), target));
21506 }
21507 else
21508 {
21509 t1 = gen_reg_rtx (V8SImode);
21510 t2 = gen_reg_rtx (V8SImode);
21511 emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
21512 emit_insn (gen_avx2_permvarv8si (t2, op1, mask));
21513 goto merge_two;
21514 }
21515 return;
21516
21517 case V8SFmode:
21518 mask = gen_lowpart (V8SImode, mask);
21519 if (one_operand_shuffle)
21520 emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
21521 else
21522 {
21523 t1 = gen_reg_rtx (V8SFmode);
21524 t2 = gen_reg_rtx (V8SFmode);
21525 emit_insn (gen_avx2_permvarv8sf (t1, op0, mask));
21526 emit_insn (gen_avx2_permvarv8sf (t2, op1, mask));
21527 goto merge_two;
21528 }
21529 return;
21530
21531 case V4SImode:
21532 /* By combining the two 128-bit input vectors into one 256-bit
21533 input vector, we can use VPERMD and VPERMPS for the full
21534 two-operand shuffle. */
21535 t1 = gen_reg_rtx (V8SImode);
21536 t2 = gen_reg_rtx (V8SImode);
21537 emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
21538 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
21539 emit_insn (gen_avx2_permvarv8si (t1, t1, t2));
21540 emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
21541 return;
21542
21543 case V4SFmode:
21544 t1 = gen_reg_rtx (V8SFmode);
21545 t2 = gen_reg_rtx (V8SImode);
21546 mask = gen_lowpart (V4SImode, mask);
21547 emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
21548 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
21549 emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
21550 emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
21551 return;
21552
21553 case V32QImode:
21554 t1 = gen_reg_rtx (V32QImode);
21555 t2 = gen_reg_rtx (V32QImode);
21556 t3 = gen_reg_rtx (V32QImode);
21557 vt2 = GEN_INT (-128);
21558 for (i = 0; i < 32; i++)
21559 vec[i] = vt2;
21560 vt = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
21561 vt = force_reg (V32QImode, vt);
21562 for (i = 0; i < 32; i++)
21563 vec[i] = i < 16 ? vt2 : const0_rtx;
21564 vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
21565 vt2 = force_reg (V32QImode, vt2);
21566 /* From mask create two adjusted masks, which contain the same
21567 bits as mask in the low 7 bits of each vector element.
21568 The first mask will have the most significant bit clear
21569 if it requests element from the same 128-bit lane
21570 and MSB set if it requests element from the other 128-bit lane.
21571 The second mask will have the opposite values of the MSB,
21572 and additionally will have its 128-bit lanes swapped.
21573 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
21574 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
21575 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
21576 stands for other 12 bytes. */
21577 /* The bit whether element is from the same lane or the other
21578 lane is bit 4, so shift it up by 3 to the MSB position. */
21579 t5 = gen_reg_rtx (V4DImode);
21580 emit_insn (gen_ashlv4di3 (t5, gen_lowpart (V4DImode, mask),
21581 GEN_INT (3)));
21582 /* Clear MSB bits from the mask just in case it had them set. */
21583 emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
21584 /* After this t1 will have MSB set for elements from other lane. */
21585 emit_insn (gen_xorv32qi3 (t1, gen_lowpart (V32QImode, t5), vt2));
21586 /* Clear bits other than MSB. */
21587 emit_insn (gen_andv32qi3 (t1, t1, vt));
21588 /* Or in the lower bits from mask into t3. */
21589 emit_insn (gen_iorv32qi3 (t3, t1, t2));
21590 /* And invert MSB bits in t1, so MSB is set for elements from the same
21591 lane. */
21592 emit_insn (gen_xorv32qi3 (t1, t1, vt));
21593 /* Swap 128-bit lanes in t3. */
21594 t6 = gen_reg_rtx (V4DImode);
21595 emit_insn (gen_avx2_permv4di_1 (t6, gen_lowpart (V4DImode, t3),
21596 const2_rtx, GEN_INT (3),
21597 const0_rtx, const1_rtx));
21598 /* And or in the lower bits from mask into t1. */
21599 emit_insn (gen_iorv32qi3 (t1, t1, t2));
21600 if (one_operand_shuffle)
21601 {
21602 /* Each of these shuffles will put 0s in places where
21603 element from the other 128-bit lane is needed, otherwise
21604 will shuffle in the requested value. */
21605 emit_insn (gen_avx2_pshufbv32qi3 (t3, op0,
21606 gen_lowpart (V32QImode, t6)));
21607 emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
21608 /* For t3 the 128-bit lanes are swapped again. */
21609 t7 = gen_reg_rtx (V4DImode);
21610 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t3),
21611 const2_rtx, GEN_INT (3),
21612 const0_rtx, const1_rtx));
21613 /* And oring both together leads to the result. */
21614 emit_insn (gen_iorv32qi3 (target, t1,
21615 gen_lowpart (V32QImode, t7)));
21616 if (target != operands[0])
21617 emit_move_insn (operands[0],
21618 gen_lowpart (GET_MODE (operands[0]), target));
21619 return;
21620 }
21621
21622 t4 = gen_reg_rtx (V32QImode);
21623 /* Similarly to the above one_operand_shuffle code,
21624 just for repeated twice for each operand. merge_two:
21625 code will merge the two results together. */
21626 emit_insn (gen_avx2_pshufbv32qi3 (t4, op0,
21627 gen_lowpart (V32QImode, t6)));
21628 emit_insn (gen_avx2_pshufbv32qi3 (t3, op1,
21629 gen_lowpart (V32QImode, t6)));
21630 emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
21631 emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
21632 t7 = gen_reg_rtx (V4DImode);
21633 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t4),
21634 const2_rtx, GEN_INT (3),
21635 const0_rtx, const1_rtx));
21636 t8 = gen_reg_rtx (V4DImode);
21637 emit_insn (gen_avx2_permv4di_1 (t8, gen_lowpart (V4DImode, t3),
21638 const2_rtx, GEN_INT (3),
21639 const0_rtx, const1_rtx));
21640 emit_insn (gen_iorv32qi3 (t4, t2, gen_lowpart (V32QImode, t7)));
21641 emit_insn (gen_iorv32qi3 (t3, t1, gen_lowpart (V32QImode, t8)));
21642 t1 = t4;
21643 t2 = t3;
21644 goto merge_two;
21645
21646 default:
21647 gcc_assert (GET_MODE_SIZE (mode) <= 16);
21648 break;
21649 }
21650 }
21651
21652 if (TARGET_XOP)
21653 {
21654 /* The XOP VPPERM insn supports three inputs. By ignoring the
21655 one_operand_shuffle special case, we avoid creating another
21656 set of constant vectors in memory. */
21657 one_operand_shuffle = false;
21658
21659 /* mask = mask & {2*w-1, ...} */
21660 vt = GEN_INT (2*w - 1);
21661 }
21662 else
21663 {
21664 /* mask = mask & {w-1, ...} */
21665 vt = GEN_INT (w - 1);
21666 }
21667
21668 for (i = 0; i < w; i++)
21669 vec[i] = vt;
21670 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21671 mask = expand_simple_binop (maskmode, AND, mask, vt,
21672 NULL_RTX, 0, OPTAB_DIRECT);
21673
21674 /* For non-QImode operations, convert the word permutation control
21675 into a byte permutation control. */
21676 if (mode != V16QImode)
21677 {
21678 mask = expand_simple_binop (maskmode, ASHIFT, mask,
21679 GEN_INT (exact_log2 (e)),
21680 NULL_RTX, 0, OPTAB_DIRECT);
21681
21682 /* Convert mask to vector of chars. */
21683 mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
21684
21685 /* Replicate each of the input bytes into byte positions:
21686 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
21687 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
21688 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
21689 for (i = 0; i < 16; ++i)
21690 vec[i] = GEN_INT (i/e * e);
21691 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
21692 vt = validize_mem (force_const_mem (V16QImode, vt));
21693 if (TARGET_XOP)
21694 emit_insn (gen_xop_pperm (mask, mask, mask, vt));
21695 else
21696 emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
21697
21698 /* Convert it into the byte positions by doing
21699 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
21700 for (i = 0; i < 16; ++i)
21701 vec[i] = GEN_INT (i % e);
21702 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
21703 vt = validize_mem (force_const_mem (V16QImode, vt));
21704 emit_insn (gen_addv16qi3 (mask, mask, vt));
21705 }
21706
21707 /* The actual shuffle operations all operate on V16QImode. */
21708 op0 = gen_lowpart (V16QImode, op0);
21709 op1 = gen_lowpart (V16QImode, op1);
21710
21711 if (TARGET_XOP)
21712 {
21713 if (GET_MODE (target) != V16QImode)
21714 target = gen_reg_rtx (V16QImode);
21715 emit_insn (gen_xop_pperm (target, op0, op1, mask));
21716 if (target != operands[0])
21717 emit_move_insn (operands[0],
21718 gen_lowpart (GET_MODE (operands[0]), target));
21719 }
21720 else if (one_operand_shuffle)
21721 {
21722 if (GET_MODE (target) != V16QImode)
21723 target = gen_reg_rtx (V16QImode);
21724 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
21725 if (target != operands[0])
21726 emit_move_insn (operands[0],
21727 gen_lowpart (GET_MODE (operands[0]), target));
21728 }
21729 else
21730 {
21731 rtx xops[6];
21732 bool ok;
21733
21734 /* Shuffle the two input vectors independently. */
21735 t1 = gen_reg_rtx (V16QImode);
21736 t2 = gen_reg_rtx (V16QImode);
21737 emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
21738 emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
21739
21740 merge_two:
21741 /* Then merge them together. The key is whether any given control
21742 element contained a bit set that indicates the second word. */
21743 mask = operands[3];
21744 vt = GEN_INT (w);
21745 if (maskmode == V2DImode && !TARGET_SSE4_1)
21746 {
21747 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
21748 more shuffle to convert the V2DI input mask into a V4SI
21749 input mask. At which point the masking that expand_int_vcond
21750 will work as desired. */
21751 rtx t3 = gen_reg_rtx (V4SImode);
21752 emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
21753 const0_rtx, const0_rtx,
21754 const2_rtx, const2_rtx));
21755 mask = t3;
21756 maskmode = V4SImode;
21757 e = w = 4;
21758 }
21759
21760 for (i = 0; i < w; i++)
21761 vec[i] = vt;
21762 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21763 vt = force_reg (maskmode, vt);
21764 mask = expand_simple_binop (maskmode, AND, mask, vt,
21765 NULL_RTX, 0, OPTAB_DIRECT);
21766
21767 if (GET_MODE (target) != mode)
21768 target = gen_reg_rtx (mode);
21769 xops[0] = target;
21770 xops[1] = gen_lowpart (mode, t2);
21771 xops[2] = gen_lowpart (mode, t1);
21772 xops[3] = gen_rtx_EQ (maskmode, mask, vt);
21773 xops[4] = mask;
21774 xops[5] = vt;
21775 ok = ix86_expand_int_vcond (xops);
21776 gcc_assert (ok);
21777 if (target != operands[0])
21778 emit_move_insn (operands[0],
21779 gen_lowpart (GET_MODE (operands[0]), target));
21780 }
21781 }
21782
21783 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
21784 true if we should do zero extension, else sign extension. HIGH_P is
21785 true if we want the N/2 high elements, else the low elements. */
21786
21787 void
21788 ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
21789 {
21790 enum machine_mode imode = GET_MODE (src);
21791 rtx tmp;
21792
21793 if (TARGET_SSE4_1)
21794 {
21795 rtx (*unpack)(rtx, rtx);
21796 rtx (*extract)(rtx, rtx) = NULL;
21797 enum machine_mode halfmode = BLKmode;
21798
21799 switch (imode)
21800 {
21801 case V32QImode:
21802 if (unsigned_p)
21803 unpack = gen_avx2_zero_extendv16qiv16hi2;
21804 else
21805 unpack = gen_avx2_sign_extendv16qiv16hi2;
21806 halfmode = V16QImode;
21807 extract
21808 = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
21809 break;
21810 case V32HImode:
21811 if (unsigned_p)
21812 unpack = gen_avx512f_zero_extendv16hiv16si2;
21813 else
21814 unpack = gen_avx512f_sign_extendv16hiv16si2;
21815 halfmode = V16HImode;
21816 extract
21817 = high_p ? gen_vec_extract_hi_v32hi : gen_vec_extract_lo_v32hi;
21818 break;
21819 case V16HImode:
21820 if (unsigned_p)
21821 unpack = gen_avx2_zero_extendv8hiv8si2;
21822 else
21823 unpack = gen_avx2_sign_extendv8hiv8si2;
21824 halfmode = V8HImode;
21825 extract
21826 = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
21827 break;
21828 case V16SImode:
21829 if (unsigned_p)
21830 unpack = gen_avx512f_zero_extendv8siv8di2;
21831 else
21832 unpack = gen_avx512f_sign_extendv8siv8di2;
21833 halfmode = V8SImode;
21834 extract
21835 = high_p ? gen_vec_extract_hi_v16si : gen_vec_extract_lo_v16si;
21836 break;
21837 case V8SImode:
21838 if (unsigned_p)
21839 unpack = gen_avx2_zero_extendv4siv4di2;
21840 else
21841 unpack = gen_avx2_sign_extendv4siv4di2;
21842 halfmode = V4SImode;
21843 extract
21844 = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si;
21845 break;
21846 case V16QImode:
21847 if (unsigned_p)
21848 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
21849 else
21850 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
21851 break;
21852 case V8HImode:
21853 if (unsigned_p)
21854 unpack = gen_sse4_1_zero_extendv4hiv4si2;
21855 else
21856 unpack = gen_sse4_1_sign_extendv4hiv4si2;
21857 break;
21858 case V4SImode:
21859 if (unsigned_p)
21860 unpack = gen_sse4_1_zero_extendv2siv2di2;
21861 else
21862 unpack = gen_sse4_1_sign_extendv2siv2di2;
21863 break;
21864 default:
21865 gcc_unreachable ();
21866 }
21867
21868 if (GET_MODE_SIZE (imode) >= 32)
21869 {
21870 tmp = gen_reg_rtx (halfmode);
21871 emit_insn (extract (tmp, src));
21872 }
21873 else if (high_p)
21874 {
21875 /* Shift higher 8 bytes to lower 8 bytes. */
21876 tmp = gen_reg_rtx (V1TImode);
21877 emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, src),
21878 GEN_INT (64)));
21879 tmp = gen_lowpart (imode, tmp);
21880 }
21881 else
21882 tmp = src;
21883
21884 emit_insn (unpack (dest, tmp));
21885 }
21886 else
21887 {
21888 rtx (*unpack)(rtx, rtx, rtx);
21889
21890 switch (imode)
21891 {
21892 case V16QImode:
21893 if (high_p)
21894 unpack = gen_vec_interleave_highv16qi;
21895 else
21896 unpack = gen_vec_interleave_lowv16qi;
21897 break;
21898 case V8HImode:
21899 if (high_p)
21900 unpack = gen_vec_interleave_highv8hi;
21901 else
21902 unpack = gen_vec_interleave_lowv8hi;
21903 break;
21904 case V4SImode:
21905 if (high_p)
21906 unpack = gen_vec_interleave_highv4si;
21907 else
21908 unpack = gen_vec_interleave_lowv4si;
21909 break;
21910 default:
21911 gcc_unreachable ();
21912 }
21913
21914 if (unsigned_p)
21915 tmp = force_reg (imode, CONST0_RTX (imode));
21916 else
21917 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
21918 src, pc_rtx, pc_rtx);
21919
21920 rtx tmp2 = gen_reg_rtx (imode);
21921 emit_insn (unpack (tmp2, src, tmp));
21922 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), tmp2));
21923 }
21924 }
21925
21926 /* Expand conditional increment or decrement using adb/sbb instructions.
21927 The default case using setcc followed by the conditional move can be
21928 done by generic code. */
21929 bool
21930 ix86_expand_int_addcc (rtx operands[])
21931 {
21932 enum rtx_code code = GET_CODE (operands[1]);
21933 rtx flags;
21934 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
21935 rtx compare_op;
21936 rtx val = const0_rtx;
21937 bool fpcmp = false;
21938 enum machine_mode mode;
21939 rtx op0 = XEXP (operands[1], 0);
21940 rtx op1 = XEXP (operands[1], 1);
21941
21942 if (operands[3] != const1_rtx
21943 && operands[3] != constm1_rtx)
21944 return false;
21945 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
21946 return false;
21947 code = GET_CODE (compare_op);
21948
21949 flags = XEXP (compare_op, 0);
21950
21951 if (GET_MODE (flags) == CCFPmode
21952 || GET_MODE (flags) == CCFPUmode)
21953 {
21954 fpcmp = true;
21955 code = ix86_fp_compare_code_to_integer (code);
21956 }
21957
21958 if (code != LTU)
21959 {
21960 val = constm1_rtx;
21961 if (fpcmp)
21962 PUT_CODE (compare_op,
21963 reverse_condition_maybe_unordered
21964 (GET_CODE (compare_op)));
21965 else
21966 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
21967 }
21968
21969 mode = GET_MODE (operands[0]);
21970
21971 /* Construct either adc or sbb insn. */
21972 if ((code == LTU) == (operands[3] == constm1_rtx))
21973 {
21974 switch (mode)
21975 {
21976 case QImode:
21977 insn = gen_subqi3_carry;
21978 break;
21979 case HImode:
21980 insn = gen_subhi3_carry;
21981 break;
21982 case SImode:
21983 insn = gen_subsi3_carry;
21984 break;
21985 case DImode:
21986 insn = gen_subdi3_carry;
21987 break;
21988 default:
21989 gcc_unreachable ();
21990 }
21991 }
21992 else
21993 {
21994 switch (mode)
21995 {
21996 case QImode:
21997 insn = gen_addqi3_carry;
21998 break;
21999 case HImode:
22000 insn = gen_addhi3_carry;
22001 break;
22002 case SImode:
22003 insn = gen_addsi3_carry;
22004 break;
22005 case DImode:
22006 insn = gen_adddi3_carry;
22007 break;
22008 default:
22009 gcc_unreachable ();
22010 }
22011 }
22012 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
22013
22014 return true;
22015 }
22016
22017
22018 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
22019 but works for floating pointer parameters and nonoffsetable memories.
22020 For pushes, it returns just stack offsets; the values will be saved
22021 in the right order. Maximally three parts are generated. */
22022
22023 static int
22024 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
22025 {
22026 int size;
22027
22028 if (!TARGET_64BIT)
22029 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
22030 else
22031 size = (GET_MODE_SIZE (mode) + 4) / 8;
22032
22033 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
22034 gcc_assert (size >= 2 && size <= 4);
22035
22036 /* Optimize constant pool reference to immediates. This is used by fp
22037 moves, that force all constants to memory to allow combining. */
22038 if (MEM_P (operand) && MEM_READONLY_P (operand))
22039 {
22040 rtx tmp = maybe_get_pool_constant (operand);
22041 if (tmp)
22042 operand = tmp;
22043 }
22044
22045 if (MEM_P (operand) && !offsettable_memref_p (operand))
22046 {
22047 /* The only non-offsetable memories we handle are pushes. */
22048 int ok = push_operand (operand, VOIDmode);
22049
22050 gcc_assert (ok);
22051
22052 operand = copy_rtx (operand);
22053 PUT_MODE (operand, word_mode);
22054 parts[0] = parts[1] = parts[2] = parts[3] = operand;
22055 return size;
22056 }
22057
22058 if (GET_CODE (operand) == CONST_VECTOR)
22059 {
22060 enum machine_mode imode = int_mode_for_mode (mode);
22061 /* Caution: if we looked through a constant pool memory above,
22062 the operand may actually have a different mode now. That's
22063 ok, since we want to pun this all the way back to an integer. */
22064 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
22065 gcc_assert (operand != NULL);
22066 mode = imode;
22067 }
22068
22069 if (!TARGET_64BIT)
22070 {
22071 if (mode == DImode)
22072 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22073 else
22074 {
22075 int i;
22076
22077 if (REG_P (operand))
22078 {
22079 gcc_assert (reload_completed);
22080 for (i = 0; i < size; i++)
22081 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
22082 }
22083 else if (offsettable_memref_p (operand))
22084 {
22085 operand = adjust_address (operand, SImode, 0);
22086 parts[0] = operand;
22087 for (i = 1; i < size; i++)
22088 parts[i] = adjust_address (operand, SImode, 4 * i);
22089 }
22090 else if (GET_CODE (operand) == CONST_DOUBLE)
22091 {
22092 REAL_VALUE_TYPE r;
22093 long l[4];
22094
22095 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22096 switch (mode)
22097 {
22098 case TFmode:
22099 real_to_target (l, &r, mode);
22100 parts[3] = gen_int_mode (l[3], SImode);
22101 parts[2] = gen_int_mode (l[2], SImode);
22102 break;
22103 case XFmode:
22104 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
22105 long double may not be 80-bit. */
22106 real_to_target (l, &r, mode);
22107 parts[2] = gen_int_mode (l[2], SImode);
22108 break;
22109 case DFmode:
22110 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
22111 break;
22112 default:
22113 gcc_unreachable ();
22114 }
22115 parts[1] = gen_int_mode (l[1], SImode);
22116 parts[0] = gen_int_mode (l[0], SImode);
22117 }
22118 else
22119 gcc_unreachable ();
22120 }
22121 }
22122 else
22123 {
22124 if (mode == TImode)
22125 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22126 if (mode == XFmode || mode == TFmode)
22127 {
22128 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
22129 if (REG_P (operand))
22130 {
22131 gcc_assert (reload_completed);
22132 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
22133 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
22134 }
22135 else if (offsettable_memref_p (operand))
22136 {
22137 operand = adjust_address (operand, DImode, 0);
22138 parts[0] = operand;
22139 parts[1] = adjust_address (operand, upper_mode, 8);
22140 }
22141 else if (GET_CODE (operand) == CONST_DOUBLE)
22142 {
22143 REAL_VALUE_TYPE r;
22144 long l[4];
22145
22146 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22147 real_to_target (l, &r, mode);
22148
22149 /* Do not use shift by 32 to avoid warning on 32bit systems. */
22150 if (HOST_BITS_PER_WIDE_INT >= 64)
22151 parts[0]
22152 = gen_int_mode
22153 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
22154 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
22155 DImode);
22156 else
22157 parts[0] = immed_double_const (l[0], l[1], DImode);
22158
22159 if (upper_mode == SImode)
22160 parts[1] = gen_int_mode (l[2], SImode);
22161 else if (HOST_BITS_PER_WIDE_INT >= 64)
22162 parts[1]
22163 = gen_int_mode
22164 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
22165 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
22166 DImode);
22167 else
22168 parts[1] = immed_double_const (l[2], l[3], DImode);
22169 }
22170 else
22171 gcc_unreachable ();
22172 }
22173 }
22174
22175 return size;
22176 }
22177
22178 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
22179 Return false when normal moves are needed; true when all required
22180 insns have been emitted. Operands 2-4 contain the input values
22181 int the correct order; operands 5-7 contain the output values. */
22182
22183 void
22184 ix86_split_long_move (rtx operands[])
22185 {
22186 rtx part[2][4];
22187 int nparts, i, j;
22188 int push = 0;
22189 int collisions = 0;
22190 enum machine_mode mode = GET_MODE (operands[0]);
22191 bool collisionparts[4];
22192
22193 /* The DFmode expanders may ask us to move double.
22194 For 64bit target this is single move. By hiding the fact
22195 here we simplify i386.md splitters. */
22196 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
22197 {
22198 /* Optimize constant pool reference to immediates. This is used by
22199 fp moves, that force all constants to memory to allow combining. */
22200
22201 if (MEM_P (operands[1])
22202 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
22203 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
22204 operands[1] = get_pool_constant (XEXP (operands[1], 0));
22205 if (push_operand (operands[0], VOIDmode))
22206 {
22207 operands[0] = copy_rtx (operands[0]);
22208 PUT_MODE (operands[0], word_mode);
22209 }
22210 else
22211 operands[0] = gen_lowpart (DImode, operands[0]);
22212 operands[1] = gen_lowpart (DImode, operands[1]);
22213 emit_move_insn (operands[0], operands[1]);
22214 return;
22215 }
22216
22217 /* The only non-offsettable memory we handle is push. */
22218 if (push_operand (operands[0], VOIDmode))
22219 push = 1;
22220 else
22221 gcc_assert (!MEM_P (operands[0])
22222 || offsettable_memref_p (operands[0]));
22223
22224 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
22225 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
22226
22227 /* When emitting push, take care for source operands on the stack. */
22228 if (push && MEM_P (operands[1])
22229 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
22230 {
22231 rtx src_base = XEXP (part[1][nparts - 1], 0);
22232
22233 /* Compensate for the stack decrement by 4. */
22234 if (!TARGET_64BIT && nparts == 3
22235 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
22236 src_base = plus_constant (Pmode, src_base, 4);
22237
22238 /* src_base refers to the stack pointer and is
22239 automatically decreased by emitted push. */
22240 for (i = 0; i < nparts; i++)
22241 part[1][i] = change_address (part[1][i],
22242 GET_MODE (part[1][i]), src_base);
22243 }
22244
22245 /* We need to do copy in the right order in case an address register
22246 of the source overlaps the destination. */
22247 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
22248 {
22249 rtx tmp;
22250
22251 for (i = 0; i < nparts; i++)
22252 {
22253 collisionparts[i]
22254 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
22255 if (collisionparts[i])
22256 collisions++;
22257 }
22258
22259 /* Collision in the middle part can be handled by reordering. */
22260 if (collisions == 1 && nparts == 3 && collisionparts [1])
22261 {
22262 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
22263 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
22264 }
22265 else if (collisions == 1
22266 && nparts == 4
22267 && (collisionparts [1] || collisionparts [2]))
22268 {
22269 if (collisionparts [1])
22270 {
22271 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
22272 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
22273 }
22274 else
22275 {
22276 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
22277 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
22278 }
22279 }
22280
22281 /* If there are more collisions, we can't handle it by reordering.
22282 Do an lea to the last part and use only one colliding move. */
22283 else if (collisions > 1)
22284 {
22285 rtx base;
22286
22287 collisions = 1;
22288
22289 base = part[0][nparts - 1];
22290
22291 /* Handle the case when the last part isn't valid for lea.
22292 Happens in 64-bit mode storing the 12-byte XFmode. */
22293 if (GET_MODE (base) != Pmode)
22294 base = gen_rtx_REG (Pmode, REGNO (base));
22295
22296 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
22297 part[1][0] = replace_equiv_address (part[1][0], base);
22298 for (i = 1; i < nparts; i++)
22299 {
22300 tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i);
22301 part[1][i] = replace_equiv_address (part[1][i], tmp);
22302 }
22303 }
22304 }
22305
22306 if (push)
22307 {
22308 if (!TARGET_64BIT)
22309 {
22310 if (nparts == 3)
22311 {
22312 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
22313 emit_insn (ix86_gen_add3 (stack_pointer_rtx,
22314 stack_pointer_rtx, GEN_INT (-4)));
22315 emit_move_insn (part[0][2], part[1][2]);
22316 }
22317 else if (nparts == 4)
22318 {
22319 emit_move_insn (part[0][3], part[1][3]);
22320 emit_move_insn (part[0][2], part[1][2]);
22321 }
22322 }
22323 else
22324 {
22325 /* In 64bit mode we don't have 32bit push available. In case this is
22326 register, it is OK - we will just use larger counterpart. We also
22327 retype memory - these comes from attempt to avoid REX prefix on
22328 moving of second half of TFmode value. */
22329 if (GET_MODE (part[1][1]) == SImode)
22330 {
22331 switch (GET_CODE (part[1][1]))
22332 {
22333 case MEM:
22334 part[1][1] = adjust_address (part[1][1], DImode, 0);
22335 break;
22336
22337 case REG:
22338 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
22339 break;
22340
22341 default:
22342 gcc_unreachable ();
22343 }
22344
22345 if (GET_MODE (part[1][0]) == SImode)
22346 part[1][0] = part[1][1];
22347 }
22348 }
22349 emit_move_insn (part[0][1], part[1][1]);
22350 emit_move_insn (part[0][0], part[1][0]);
22351 return;
22352 }
22353
22354 /* Choose correct order to not overwrite the source before it is copied. */
22355 if ((REG_P (part[0][0])
22356 && REG_P (part[1][1])
22357 && (REGNO (part[0][0]) == REGNO (part[1][1])
22358 || (nparts == 3
22359 && REGNO (part[0][0]) == REGNO (part[1][2]))
22360 || (nparts == 4
22361 && REGNO (part[0][0]) == REGNO (part[1][3]))))
22362 || (collisions > 0
22363 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
22364 {
22365 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
22366 {
22367 operands[2 + i] = part[0][j];
22368 operands[6 + i] = part[1][j];
22369 }
22370 }
22371 else
22372 {
22373 for (i = 0; i < nparts; i++)
22374 {
22375 operands[2 + i] = part[0][i];
22376 operands[6 + i] = part[1][i];
22377 }
22378 }
22379
22380 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
22381 if (optimize_insn_for_size_p ())
22382 {
22383 for (j = 0; j < nparts - 1; j++)
22384 if (CONST_INT_P (operands[6 + j])
22385 && operands[6 + j] != const0_rtx
22386 && REG_P (operands[2 + j]))
22387 for (i = j; i < nparts - 1; i++)
22388 if (CONST_INT_P (operands[7 + i])
22389 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
22390 operands[7 + i] = operands[2 + j];
22391 }
22392
22393 for (i = 0; i < nparts; i++)
22394 emit_move_insn (operands[2 + i], operands[6 + i]);
22395
22396 return;
22397 }
22398
22399 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
22400 left shift by a constant, either using a single shift or
22401 a sequence of add instructions. */
22402
22403 static void
22404 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
22405 {
22406 rtx (*insn)(rtx, rtx, rtx);
22407
22408 if (count == 1
22409 || (count * ix86_cost->add <= ix86_cost->shift_const
22410 && !optimize_insn_for_size_p ()))
22411 {
22412 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
22413 while (count-- > 0)
22414 emit_insn (insn (operand, operand, operand));
22415 }
22416 else
22417 {
22418 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
22419 emit_insn (insn (operand, operand, GEN_INT (count)));
22420 }
22421 }
22422
22423 void
22424 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
22425 {
22426 rtx (*gen_ashl3)(rtx, rtx, rtx);
22427 rtx (*gen_shld)(rtx, rtx, rtx);
22428 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22429
22430 rtx low[2], high[2];
22431 int count;
22432
22433 if (CONST_INT_P (operands[2]))
22434 {
22435 split_double_mode (mode, operands, 2, low, high);
22436 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22437
22438 if (count >= half_width)
22439 {
22440 emit_move_insn (high[0], low[1]);
22441 emit_move_insn (low[0], const0_rtx);
22442
22443 if (count > half_width)
22444 ix86_expand_ashl_const (high[0], count - half_width, mode);
22445 }
22446 else
22447 {
22448 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
22449
22450 if (!rtx_equal_p (operands[0], operands[1]))
22451 emit_move_insn (operands[0], operands[1]);
22452
22453 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
22454 ix86_expand_ashl_const (low[0], count, mode);
22455 }
22456 return;
22457 }
22458
22459 split_double_mode (mode, operands, 1, low, high);
22460
22461 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
22462
22463 if (operands[1] == const1_rtx)
22464 {
22465 /* Assuming we've chosen a QImode capable registers, then 1 << N
22466 can be done with two 32/64-bit shifts, no branches, no cmoves. */
22467 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
22468 {
22469 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
22470
22471 ix86_expand_clear (low[0]);
22472 ix86_expand_clear (high[0]);
22473 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
22474
22475 d = gen_lowpart (QImode, low[0]);
22476 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
22477 s = gen_rtx_EQ (QImode, flags, const0_rtx);
22478 emit_insn (gen_rtx_SET (VOIDmode, d, s));
22479
22480 d = gen_lowpart (QImode, high[0]);
22481 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
22482 s = gen_rtx_NE (QImode, flags, const0_rtx);
22483 emit_insn (gen_rtx_SET (VOIDmode, d, s));
22484 }
22485
22486 /* Otherwise, we can get the same results by manually performing
22487 a bit extract operation on bit 5/6, and then performing the two
22488 shifts. The two methods of getting 0/1 into low/high are exactly
22489 the same size. Avoiding the shift in the bit extract case helps
22490 pentium4 a bit; no one else seems to care much either way. */
22491 else
22492 {
22493 enum machine_mode half_mode;
22494 rtx (*gen_lshr3)(rtx, rtx, rtx);
22495 rtx (*gen_and3)(rtx, rtx, rtx);
22496 rtx (*gen_xor3)(rtx, rtx, rtx);
22497 HOST_WIDE_INT bits;
22498 rtx x;
22499
22500 if (mode == DImode)
22501 {
22502 half_mode = SImode;
22503 gen_lshr3 = gen_lshrsi3;
22504 gen_and3 = gen_andsi3;
22505 gen_xor3 = gen_xorsi3;
22506 bits = 5;
22507 }
22508 else
22509 {
22510 half_mode = DImode;
22511 gen_lshr3 = gen_lshrdi3;
22512 gen_and3 = gen_anddi3;
22513 gen_xor3 = gen_xordi3;
22514 bits = 6;
22515 }
22516
22517 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
22518 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
22519 else
22520 x = gen_lowpart (half_mode, operands[2]);
22521 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
22522
22523 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
22524 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
22525 emit_move_insn (low[0], high[0]);
22526 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
22527 }
22528
22529 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
22530 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
22531 return;
22532 }
22533
22534 if (operands[1] == constm1_rtx)
22535 {
22536 /* For -1 << N, we can avoid the shld instruction, because we
22537 know that we're shifting 0...31/63 ones into a -1. */
22538 emit_move_insn (low[0], constm1_rtx);
22539 if (optimize_insn_for_size_p ())
22540 emit_move_insn (high[0], low[0]);
22541 else
22542 emit_move_insn (high[0], constm1_rtx);
22543 }
22544 else
22545 {
22546 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
22547
22548 if (!rtx_equal_p (operands[0], operands[1]))
22549 emit_move_insn (operands[0], operands[1]);
22550
22551 split_double_mode (mode, operands, 1, low, high);
22552 emit_insn (gen_shld (high[0], low[0], operands[2]));
22553 }
22554
22555 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
22556
22557 if (TARGET_CMOVE && scratch)
22558 {
22559 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22560 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22561
22562 ix86_expand_clear (scratch);
22563 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
22564 }
22565 else
22566 {
22567 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
22568 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
22569
22570 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
22571 }
22572 }
22573
22574 void
22575 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
22576 {
22577 rtx (*gen_ashr3)(rtx, rtx, rtx)
22578 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
22579 rtx (*gen_shrd)(rtx, rtx, rtx);
22580 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22581
22582 rtx low[2], high[2];
22583 int count;
22584
22585 if (CONST_INT_P (operands[2]))
22586 {
22587 split_double_mode (mode, operands, 2, low, high);
22588 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22589
22590 if (count == GET_MODE_BITSIZE (mode) - 1)
22591 {
22592 emit_move_insn (high[0], high[1]);
22593 emit_insn (gen_ashr3 (high[0], high[0],
22594 GEN_INT (half_width - 1)));
22595 emit_move_insn (low[0], high[0]);
22596
22597 }
22598 else if (count >= half_width)
22599 {
22600 emit_move_insn (low[0], high[1]);
22601 emit_move_insn (high[0], low[0]);
22602 emit_insn (gen_ashr3 (high[0], high[0],
22603 GEN_INT (half_width - 1)));
22604
22605 if (count > half_width)
22606 emit_insn (gen_ashr3 (low[0], low[0],
22607 GEN_INT (count - half_width)));
22608 }
22609 else
22610 {
22611 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22612
22613 if (!rtx_equal_p (operands[0], operands[1]))
22614 emit_move_insn (operands[0], operands[1]);
22615
22616 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
22617 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
22618 }
22619 }
22620 else
22621 {
22622 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22623
22624 if (!rtx_equal_p (operands[0], operands[1]))
22625 emit_move_insn (operands[0], operands[1]);
22626
22627 split_double_mode (mode, operands, 1, low, high);
22628
22629 emit_insn (gen_shrd (low[0], high[0], operands[2]));
22630 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
22631
22632 if (TARGET_CMOVE && scratch)
22633 {
22634 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22635 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22636
22637 emit_move_insn (scratch, high[0]);
22638 emit_insn (gen_ashr3 (scratch, scratch,
22639 GEN_INT (half_width - 1)));
22640 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
22641 scratch));
22642 }
22643 else
22644 {
22645 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
22646 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
22647
22648 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
22649 }
22650 }
22651 }
22652
22653 void
22654 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
22655 {
22656 rtx (*gen_lshr3)(rtx, rtx, rtx)
22657 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
22658 rtx (*gen_shrd)(rtx, rtx, rtx);
22659 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22660
22661 rtx low[2], high[2];
22662 int count;
22663
22664 if (CONST_INT_P (operands[2]))
22665 {
22666 split_double_mode (mode, operands, 2, low, high);
22667 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22668
22669 if (count >= half_width)
22670 {
22671 emit_move_insn (low[0], high[1]);
22672 ix86_expand_clear (high[0]);
22673
22674 if (count > half_width)
22675 emit_insn (gen_lshr3 (low[0], low[0],
22676 GEN_INT (count - half_width)));
22677 }
22678 else
22679 {
22680 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22681
22682 if (!rtx_equal_p (operands[0], operands[1]))
22683 emit_move_insn (operands[0], operands[1]);
22684
22685 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
22686 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
22687 }
22688 }
22689 else
22690 {
22691 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22692
22693 if (!rtx_equal_p (operands[0], operands[1]))
22694 emit_move_insn (operands[0], operands[1]);
22695
22696 split_double_mode (mode, operands, 1, low, high);
22697
22698 emit_insn (gen_shrd (low[0], high[0], operands[2]));
22699 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
22700
22701 if (TARGET_CMOVE && scratch)
22702 {
22703 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22704 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22705
22706 ix86_expand_clear (scratch);
22707 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
22708 scratch));
22709 }
22710 else
22711 {
22712 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
22713 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
22714
22715 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
22716 }
22717 }
22718 }
22719
22720 /* Predict just emitted jump instruction to be taken with probability PROB. */
22721 static void
22722 predict_jump (int prob)
22723 {
22724 rtx insn = get_last_insn ();
22725 gcc_assert (JUMP_P (insn));
22726 add_int_reg_note (insn, REG_BR_PROB, prob);
22727 }
22728
22729 /* Helper function for the string operations below. Dest VARIABLE whether
22730 it is aligned to VALUE bytes. If true, jump to the label. */
22731 static rtx
22732 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
22733 {
22734 rtx label = gen_label_rtx ();
22735 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
22736 if (GET_MODE (variable) == DImode)
22737 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
22738 else
22739 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
22740 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
22741 1, label);
22742 if (epilogue)
22743 predict_jump (REG_BR_PROB_BASE * 50 / 100);
22744 else
22745 predict_jump (REG_BR_PROB_BASE * 90 / 100);
22746 return label;
22747 }
22748
22749 /* Adjust COUNTER by the VALUE. */
22750 static void
22751 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
22752 {
22753 rtx (*gen_add)(rtx, rtx, rtx)
22754 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
22755
22756 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
22757 }
22758
22759 /* Zero extend possibly SImode EXP to Pmode register. */
22760 rtx
22761 ix86_zero_extend_to_Pmode (rtx exp)
22762 {
22763 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
22764 }
22765
22766 /* Divide COUNTREG by SCALE. */
22767 static rtx
22768 scale_counter (rtx countreg, int scale)
22769 {
22770 rtx sc;
22771
22772 if (scale == 1)
22773 return countreg;
22774 if (CONST_INT_P (countreg))
22775 return GEN_INT (INTVAL (countreg) / scale);
22776 gcc_assert (REG_P (countreg));
22777
22778 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
22779 GEN_INT (exact_log2 (scale)),
22780 NULL, 1, OPTAB_DIRECT);
22781 return sc;
22782 }
22783
22784 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
22785 DImode for constant loop counts. */
22786
22787 static enum machine_mode
22788 counter_mode (rtx count_exp)
22789 {
22790 if (GET_MODE (count_exp) != VOIDmode)
22791 return GET_MODE (count_exp);
22792 if (!CONST_INT_P (count_exp))
22793 return Pmode;
22794 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
22795 return DImode;
22796 return SImode;
22797 }
22798
22799 /* Copy the address to a Pmode register. This is used for x32 to
22800 truncate DImode TLS address to a SImode register. */
22801
22802 static rtx
22803 ix86_copy_addr_to_reg (rtx addr)
22804 {
22805 if (GET_MODE (addr) == Pmode || GET_MODE (addr) == VOIDmode)
22806 return copy_addr_to_reg (addr);
22807 else
22808 {
22809 gcc_assert (GET_MODE (addr) == DImode && Pmode == SImode);
22810 return gen_rtx_SUBREG (SImode, copy_to_mode_reg (DImode, addr), 0);
22811 }
22812 }
22813
22814 /* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR
22815 to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT
22816 specified in bytes. When ISSETMEM is TRUE, output the equivalent loop to set
22817 memory by VALUE (supposed to be in MODE).
22818
22819 The size is rounded down to whole number of chunk size moved at once.
22820 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
22821
22822
22823 static void
22824 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
22825 rtx destptr, rtx srcptr, rtx value,
22826 rtx count, enum machine_mode mode, int unroll,
22827 int expected_size, bool issetmem)
22828 {
22829 rtx out_label, top_label, iter, tmp;
22830 enum machine_mode iter_mode = counter_mode (count);
22831 int piece_size_n = GET_MODE_SIZE (mode) * unroll;
22832 rtx piece_size = GEN_INT (piece_size_n);
22833 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
22834 rtx size;
22835 int i;
22836
22837 top_label = gen_label_rtx ();
22838 out_label = gen_label_rtx ();
22839 iter = gen_reg_rtx (iter_mode);
22840
22841 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
22842 NULL, 1, OPTAB_DIRECT);
22843 /* Those two should combine. */
22844 if (piece_size == const1_rtx)
22845 {
22846 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
22847 true, out_label);
22848 predict_jump (REG_BR_PROB_BASE * 10 / 100);
22849 }
22850 emit_move_insn (iter, const0_rtx);
22851
22852 emit_label (top_label);
22853
22854 tmp = convert_modes (Pmode, iter_mode, iter, true);
22855
22856 /* This assert could be relaxed - in this case we'll need to compute
22857 smallest power of two, containing in PIECE_SIZE_N and pass it to
22858 offset_address. */
22859 gcc_assert ((piece_size_n & (piece_size_n - 1)) == 0);
22860 destmem = offset_address (destmem, tmp, piece_size_n);
22861 destmem = adjust_address (destmem, mode, 0);
22862
22863 if (!issetmem)
22864 {
22865 srcmem = offset_address (srcmem, copy_rtx (tmp), piece_size_n);
22866 srcmem = adjust_address (srcmem, mode, 0);
22867
22868 /* When unrolling for chips that reorder memory reads and writes,
22869 we can save registers by using single temporary.
22870 Also using 4 temporaries is overkill in 32bit mode. */
22871 if (!TARGET_64BIT && 0)
22872 {
22873 for (i = 0; i < unroll; i++)
22874 {
22875 if (i)
22876 {
22877 destmem =
22878 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
22879 srcmem =
22880 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
22881 }
22882 emit_move_insn (destmem, srcmem);
22883 }
22884 }
22885 else
22886 {
22887 rtx tmpreg[4];
22888 gcc_assert (unroll <= 4);
22889 for (i = 0; i < unroll; i++)
22890 {
22891 tmpreg[i] = gen_reg_rtx (mode);
22892 if (i)
22893 {
22894 srcmem =
22895 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
22896 }
22897 emit_move_insn (tmpreg[i], srcmem);
22898 }
22899 for (i = 0; i < unroll; i++)
22900 {
22901 if (i)
22902 {
22903 destmem =
22904 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
22905 }
22906 emit_move_insn (destmem, tmpreg[i]);
22907 }
22908 }
22909 }
22910 else
22911 for (i = 0; i < unroll; i++)
22912 {
22913 if (i)
22914 destmem =
22915 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
22916 emit_move_insn (destmem, value);
22917 }
22918
22919 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
22920 true, OPTAB_LIB_WIDEN);
22921 if (tmp != iter)
22922 emit_move_insn (iter, tmp);
22923
22924 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
22925 true, top_label);
22926 if (expected_size != -1)
22927 {
22928 expected_size /= GET_MODE_SIZE (mode) * unroll;
22929 if (expected_size == 0)
22930 predict_jump (0);
22931 else if (expected_size > REG_BR_PROB_BASE)
22932 predict_jump (REG_BR_PROB_BASE - 1);
22933 else
22934 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
22935 }
22936 else
22937 predict_jump (REG_BR_PROB_BASE * 80 / 100);
22938 iter = ix86_zero_extend_to_Pmode (iter);
22939 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
22940 true, OPTAB_LIB_WIDEN);
22941 if (tmp != destptr)
22942 emit_move_insn (destptr, tmp);
22943 if (!issetmem)
22944 {
22945 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
22946 true, OPTAB_LIB_WIDEN);
22947 if (tmp != srcptr)
22948 emit_move_insn (srcptr, tmp);
22949 }
22950 emit_label (out_label);
22951 }
22952
22953 /* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument.
22954 When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored.
22955 When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored.
22956 For setmem case, VALUE is a promoted to a wider size ORIG_VALUE.
22957 ORIG_VALUE is the original value passed to memset to fill the memory with.
22958 Other arguments have same meaning as for previous function. */
22959
22960 static void
22961 expand_set_or_movmem_via_rep (rtx destmem, rtx srcmem,
22962 rtx destptr, rtx srcptr, rtx value, rtx orig_value,
22963 rtx count,
22964 enum machine_mode mode, bool issetmem)
22965 {
22966 rtx destexp;
22967 rtx srcexp;
22968 rtx countreg;
22969 HOST_WIDE_INT rounded_count;
22970
22971 /* If possible, it is shorter to use rep movs.
22972 TODO: Maybe it is better to move this logic to decide_alg. */
22973 if (mode == QImode && CONST_INT_P (count) && !(INTVAL (count) & 3)
22974 && (!issetmem || orig_value == const0_rtx))
22975 mode = SImode;
22976
22977 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
22978 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
22979
22980 countreg = ix86_zero_extend_to_Pmode (scale_counter (count,
22981 GET_MODE_SIZE (mode)));
22982 if (mode != QImode)
22983 {
22984 destexp = gen_rtx_ASHIFT (Pmode, countreg,
22985 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
22986 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
22987 }
22988 else
22989 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
22990 if ((!issetmem || orig_value == const0_rtx) && CONST_INT_P (count))
22991 {
22992 rounded_count = (INTVAL (count)
22993 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
22994 destmem = shallow_copy_rtx (destmem);
22995 set_mem_size (destmem, rounded_count);
22996 }
22997 else if (MEM_SIZE_KNOWN_P (destmem))
22998 clear_mem_size (destmem);
22999
23000 if (issetmem)
23001 {
23002 value = force_reg (mode, gen_lowpart (mode, value));
23003 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
23004 }
23005 else
23006 {
23007 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
23008 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
23009 if (mode != QImode)
23010 {
23011 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
23012 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
23013 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
23014 }
23015 else
23016 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
23017 if (CONST_INT_P (count))
23018 {
23019 rounded_count = (INTVAL (count)
23020 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23021 srcmem = shallow_copy_rtx (srcmem);
23022 set_mem_size (srcmem, rounded_count);
23023 }
23024 else
23025 {
23026 if (MEM_SIZE_KNOWN_P (srcmem))
23027 clear_mem_size (srcmem);
23028 }
23029 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
23030 destexp, srcexp));
23031 }
23032 }
23033
23034 /* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to
23035 DESTMEM.
23036 SRC is passed by pointer to be updated on return.
23037 Return value is updated DST. */
23038 static rtx
23039 emit_memmov (rtx destmem, rtx *srcmem, rtx destptr, rtx srcptr,
23040 HOST_WIDE_INT size_to_move)
23041 {
23042 rtx dst = destmem, src = *srcmem, adjust, tempreg;
23043 enum insn_code code;
23044 enum machine_mode move_mode;
23045 int piece_size, i;
23046
23047 /* Find the widest mode in which we could perform moves.
23048 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23049 it until move of such size is supported. */
23050 piece_size = 1 << floor_log2 (size_to_move);
23051 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23052 code = optab_handler (mov_optab, move_mode);
23053 while (code == CODE_FOR_nothing && piece_size > 1)
23054 {
23055 piece_size >>= 1;
23056 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23057 code = optab_handler (mov_optab, move_mode);
23058 }
23059
23060 /* Find the corresponding vector mode with the same size as MOVE_MODE.
23061 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
23062 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
23063 {
23064 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
23065 move_mode = mode_for_vector (word_mode, nunits);
23066 code = optab_handler (mov_optab, move_mode);
23067 if (code == CODE_FOR_nothing)
23068 {
23069 move_mode = word_mode;
23070 piece_size = GET_MODE_SIZE (move_mode);
23071 code = optab_handler (mov_optab, move_mode);
23072 }
23073 }
23074 gcc_assert (code != CODE_FOR_nothing);
23075
23076 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23077 src = adjust_automodify_address_nv (src, move_mode, srcptr, 0);
23078
23079 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23080 gcc_assert (size_to_move % piece_size == 0);
23081 adjust = GEN_INT (piece_size);
23082 for (i = 0; i < size_to_move; i += piece_size)
23083 {
23084 /* We move from memory to memory, so we'll need to do it via
23085 a temporary register. */
23086 tempreg = gen_reg_rtx (move_mode);
23087 emit_insn (GEN_FCN (code) (tempreg, src));
23088 emit_insn (GEN_FCN (code) (dst, tempreg));
23089
23090 emit_move_insn (destptr,
23091 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23092 emit_move_insn (srcptr,
23093 gen_rtx_PLUS (Pmode, copy_rtx (srcptr), adjust));
23094
23095 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23096 piece_size);
23097 src = adjust_automodify_address_nv (src, move_mode, srcptr,
23098 piece_size);
23099 }
23100
23101 /* Update DST and SRC rtx. */
23102 *srcmem = src;
23103 return dst;
23104 }
23105
23106 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
23107 static void
23108 expand_movmem_epilogue (rtx destmem, rtx srcmem,
23109 rtx destptr, rtx srcptr, rtx count, int max_size)
23110 {
23111 rtx src, dest;
23112 if (CONST_INT_P (count))
23113 {
23114 HOST_WIDE_INT countval = INTVAL (count);
23115 HOST_WIDE_INT epilogue_size = countval % max_size;
23116 int i;
23117
23118 /* For now MAX_SIZE should be a power of 2. This assert could be
23119 relaxed, but it'll require a bit more complicated epilogue
23120 expanding. */
23121 gcc_assert ((max_size & (max_size - 1)) == 0);
23122 for (i = max_size; i >= 1; i >>= 1)
23123 {
23124 if (epilogue_size & i)
23125 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23126 }
23127 return;
23128 }
23129 if (max_size > 8)
23130 {
23131 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
23132 count, 1, OPTAB_DIRECT);
23133 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
23134 count, QImode, 1, 4, false);
23135 return;
23136 }
23137
23138 /* When there are stringops, we can cheaply increase dest and src pointers.
23139 Otherwise we save code size by maintaining offset (zero is readily
23140 available from preceding rep operation) and using x86 addressing modes.
23141 */
23142 if (TARGET_SINGLE_STRINGOP)
23143 {
23144 if (max_size > 4)
23145 {
23146 rtx label = ix86_expand_aligntest (count, 4, true);
23147 src = change_address (srcmem, SImode, srcptr);
23148 dest = change_address (destmem, SImode, destptr);
23149 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23150 emit_label (label);
23151 LABEL_NUSES (label) = 1;
23152 }
23153 if (max_size > 2)
23154 {
23155 rtx label = ix86_expand_aligntest (count, 2, true);
23156 src = change_address (srcmem, HImode, srcptr);
23157 dest = change_address (destmem, HImode, destptr);
23158 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23159 emit_label (label);
23160 LABEL_NUSES (label) = 1;
23161 }
23162 if (max_size > 1)
23163 {
23164 rtx label = ix86_expand_aligntest (count, 1, true);
23165 src = change_address (srcmem, QImode, srcptr);
23166 dest = change_address (destmem, QImode, destptr);
23167 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23168 emit_label (label);
23169 LABEL_NUSES (label) = 1;
23170 }
23171 }
23172 else
23173 {
23174 rtx offset = force_reg (Pmode, const0_rtx);
23175 rtx tmp;
23176
23177 if (max_size > 4)
23178 {
23179 rtx label = ix86_expand_aligntest (count, 4, true);
23180 src = change_address (srcmem, SImode, srcptr);
23181 dest = change_address (destmem, SImode, destptr);
23182 emit_move_insn (dest, src);
23183 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
23184 true, OPTAB_LIB_WIDEN);
23185 if (tmp != offset)
23186 emit_move_insn (offset, tmp);
23187 emit_label (label);
23188 LABEL_NUSES (label) = 1;
23189 }
23190 if (max_size > 2)
23191 {
23192 rtx label = ix86_expand_aligntest (count, 2, true);
23193 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23194 src = change_address (srcmem, HImode, tmp);
23195 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23196 dest = change_address (destmem, HImode, tmp);
23197 emit_move_insn (dest, src);
23198 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
23199 true, OPTAB_LIB_WIDEN);
23200 if (tmp != offset)
23201 emit_move_insn (offset, tmp);
23202 emit_label (label);
23203 LABEL_NUSES (label) = 1;
23204 }
23205 if (max_size > 1)
23206 {
23207 rtx label = ix86_expand_aligntest (count, 1, true);
23208 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23209 src = change_address (srcmem, QImode, tmp);
23210 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23211 dest = change_address (destmem, QImode, tmp);
23212 emit_move_insn (dest, src);
23213 emit_label (label);
23214 LABEL_NUSES (label) = 1;
23215 }
23216 }
23217 }
23218
23219 /* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM
23220 with value PROMOTED_VAL.
23221 SRC is passed by pointer to be updated on return.
23222 Return value is updated DST. */
23223 static rtx
23224 emit_memset (rtx destmem, rtx destptr, rtx promoted_val,
23225 HOST_WIDE_INT size_to_move)
23226 {
23227 rtx dst = destmem, adjust;
23228 enum insn_code code;
23229 enum machine_mode move_mode;
23230 int piece_size, i;
23231
23232 /* Find the widest mode in which we could perform moves.
23233 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23234 it until move of such size is supported. */
23235 move_mode = GET_MODE (promoted_val);
23236 if (move_mode == VOIDmode)
23237 move_mode = QImode;
23238 if (size_to_move < GET_MODE_SIZE (move_mode))
23239 {
23240 move_mode = mode_for_size (size_to_move * BITS_PER_UNIT, MODE_INT, 0);
23241 promoted_val = gen_lowpart (move_mode, promoted_val);
23242 }
23243 piece_size = GET_MODE_SIZE (move_mode);
23244 code = optab_handler (mov_optab, move_mode);
23245 gcc_assert (code != CODE_FOR_nothing && promoted_val != NULL_RTX);
23246
23247 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23248
23249 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23250 gcc_assert (size_to_move % piece_size == 0);
23251 adjust = GEN_INT (piece_size);
23252 for (i = 0; i < size_to_move; i += piece_size)
23253 {
23254 if (piece_size <= GET_MODE_SIZE (word_mode))
23255 {
23256 emit_insn (gen_strset (destptr, dst, promoted_val));
23257 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23258 piece_size);
23259 continue;
23260 }
23261
23262 emit_insn (GEN_FCN (code) (dst, promoted_val));
23263
23264 emit_move_insn (destptr,
23265 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23266
23267 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23268 piece_size);
23269 }
23270
23271 /* Update DST rtx. */
23272 return dst;
23273 }
23274 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23275 static void
23276 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
23277 rtx count, int max_size)
23278 {
23279 count =
23280 expand_simple_binop (counter_mode (count), AND, count,
23281 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
23282 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
23283 gen_lowpart (QImode, value), count, QImode,
23284 1, max_size / 2, true);
23285 }
23286
23287 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23288 static void
23289 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value,
23290 rtx count, int max_size)
23291 {
23292 rtx dest;
23293
23294 if (CONST_INT_P (count))
23295 {
23296 HOST_WIDE_INT countval = INTVAL (count);
23297 HOST_WIDE_INT epilogue_size = countval % max_size;
23298 int i;
23299
23300 /* For now MAX_SIZE should be a power of 2. This assert could be
23301 relaxed, but it'll require a bit more complicated epilogue
23302 expanding. */
23303 gcc_assert ((max_size & (max_size - 1)) == 0);
23304 for (i = max_size; i >= 1; i >>= 1)
23305 {
23306 if (epilogue_size & i)
23307 {
23308 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23309 destmem = emit_memset (destmem, destptr, vec_value, i);
23310 else
23311 destmem = emit_memset (destmem, destptr, value, i);
23312 }
23313 }
23314 return;
23315 }
23316 if (max_size > 32)
23317 {
23318 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
23319 return;
23320 }
23321 if (max_size > 16)
23322 {
23323 rtx label = ix86_expand_aligntest (count, 16, true);
23324 if (TARGET_64BIT)
23325 {
23326 dest = change_address (destmem, DImode, destptr);
23327 emit_insn (gen_strset (destptr, dest, value));
23328 dest = adjust_automodify_address_nv (dest, DImode, destptr, 8);
23329 emit_insn (gen_strset (destptr, dest, value));
23330 }
23331 else
23332 {
23333 dest = change_address (destmem, SImode, destptr);
23334 emit_insn (gen_strset (destptr, dest, value));
23335 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23336 emit_insn (gen_strset (destptr, dest, value));
23337 dest = adjust_automodify_address_nv (dest, SImode, destptr, 8);
23338 emit_insn (gen_strset (destptr, dest, value));
23339 dest = adjust_automodify_address_nv (dest, SImode, destptr, 12);
23340 emit_insn (gen_strset (destptr, dest, value));
23341 }
23342 emit_label (label);
23343 LABEL_NUSES (label) = 1;
23344 }
23345 if (max_size > 8)
23346 {
23347 rtx label = ix86_expand_aligntest (count, 8, true);
23348 if (TARGET_64BIT)
23349 {
23350 dest = change_address (destmem, DImode, destptr);
23351 emit_insn (gen_strset (destptr, dest, value));
23352 }
23353 else
23354 {
23355 dest = change_address (destmem, SImode, destptr);
23356 emit_insn (gen_strset (destptr, dest, value));
23357 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23358 emit_insn (gen_strset (destptr, dest, value));
23359 }
23360 emit_label (label);
23361 LABEL_NUSES (label) = 1;
23362 }
23363 if (max_size > 4)
23364 {
23365 rtx label = ix86_expand_aligntest (count, 4, true);
23366 dest = change_address (destmem, SImode, destptr);
23367 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
23368 emit_label (label);
23369 LABEL_NUSES (label) = 1;
23370 }
23371 if (max_size > 2)
23372 {
23373 rtx label = ix86_expand_aligntest (count, 2, true);
23374 dest = change_address (destmem, HImode, destptr);
23375 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
23376 emit_label (label);
23377 LABEL_NUSES (label) = 1;
23378 }
23379 if (max_size > 1)
23380 {
23381 rtx label = ix86_expand_aligntest (count, 1, true);
23382 dest = change_address (destmem, QImode, destptr);
23383 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
23384 emit_label (label);
23385 LABEL_NUSES (label) = 1;
23386 }
23387 }
23388
23389 /* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to
23390 DESTMEM to align it to DESIRED_ALIGNMENT. Original alignment is ALIGN.
23391 Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are
23392 ignored.
23393 Return value is updated DESTMEM. */
23394 static rtx
23395 expand_set_or_movmem_prologue (rtx destmem, rtx srcmem,
23396 rtx destptr, rtx srcptr, rtx value,
23397 rtx vec_value, rtx count, int align,
23398 int desired_alignment, bool issetmem)
23399 {
23400 int i;
23401 for (i = 1; i < desired_alignment; i <<= 1)
23402 {
23403 if (align <= i)
23404 {
23405 rtx label = ix86_expand_aligntest (destptr, i, false);
23406 if (issetmem)
23407 {
23408 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23409 destmem = emit_memset (destmem, destptr, vec_value, i);
23410 else
23411 destmem = emit_memset (destmem, destptr, value, i);
23412 }
23413 else
23414 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23415 ix86_adjust_counter (count, i);
23416 emit_label (label);
23417 LABEL_NUSES (label) = 1;
23418 set_mem_align (destmem, i * 2 * BITS_PER_UNIT);
23419 }
23420 }
23421 return destmem;
23422 }
23423
23424 /* Test if COUNT&SIZE is nonzero and if so, expand movme
23425 or setmem sequence that is valid for SIZE..2*SIZE-1 bytes
23426 and jump to DONE_LABEL. */
23427 static void
23428 expand_small_movmem_or_setmem (rtx destmem, rtx srcmem,
23429 rtx destptr, rtx srcptr,
23430 rtx value, rtx vec_value,
23431 rtx count, int size,
23432 rtx done_label, bool issetmem)
23433 {
23434 rtx label = ix86_expand_aligntest (count, size, false);
23435 enum machine_mode mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 1);
23436 rtx modesize;
23437 int n;
23438
23439 /* If we do not have vector value to copy, we must reduce size. */
23440 if (issetmem)
23441 {
23442 if (!vec_value)
23443 {
23444 if (GET_MODE (value) == VOIDmode && size > 8)
23445 mode = Pmode;
23446 else if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (value)))
23447 mode = GET_MODE (value);
23448 }
23449 else
23450 mode = GET_MODE (vec_value), value = vec_value;
23451 }
23452 else
23453 {
23454 /* Choose appropriate vector mode. */
23455 if (size >= 32)
23456 mode = TARGET_AVX ? V32QImode : TARGET_SSE ? V16QImode : DImode;
23457 else if (size >= 16)
23458 mode = TARGET_SSE ? V16QImode : DImode;
23459 srcmem = change_address (srcmem, mode, srcptr);
23460 }
23461 destmem = change_address (destmem, mode, destptr);
23462 modesize = GEN_INT (GET_MODE_SIZE (mode));
23463 gcc_assert (GET_MODE_SIZE (mode) <= size);
23464 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
23465 {
23466 if (issetmem)
23467 emit_move_insn (destmem, gen_lowpart (mode, value));
23468 else
23469 {
23470 emit_move_insn (destmem, srcmem);
23471 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23472 }
23473 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23474 }
23475
23476 destmem = offset_address (destmem, count, 1);
23477 destmem = offset_address (destmem, GEN_INT (-2 * size),
23478 GET_MODE_SIZE (mode));
23479 if (!issetmem)
23480 {
23481 srcmem = offset_address (srcmem, count, 1);
23482 srcmem = offset_address (srcmem, GEN_INT (-2 * size),
23483 GET_MODE_SIZE (mode));
23484 }
23485 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
23486 {
23487 if (issetmem)
23488 emit_move_insn (destmem, gen_lowpart (mode, value));
23489 else
23490 {
23491 emit_move_insn (destmem, srcmem);
23492 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23493 }
23494 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23495 }
23496 emit_jump_insn (gen_jump (done_label));
23497 emit_barrier ();
23498
23499 emit_label (label);
23500 LABEL_NUSES (label) = 1;
23501 }
23502
23503 /* Handle small memcpy (up to SIZE that is supposed to be small power of 2.
23504 and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN
23505 bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can
23506 proceed with an loop copying SIZE bytes at once. Do moves in MODE.
23507 DONE_LABEL is a label after the whole copying sequence. The label is created
23508 on demand if *DONE_LABEL is NULL.
23509 MIN_SIZE is minimal size of block copied. This value gets adjusted for new
23510 bounds after the initial copies.
23511
23512 DESTMEM/SRCMEM are memory expressions pointing to the copies block,
23513 DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether
23514 we will dispatch to a library call for large blocks.
23515
23516 In pseudocode we do:
23517
23518 if (COUNT < SIZE)
23519 {
23520 Assume that SIZE is 4. Bigger sizes are handled analogously
23521 if (COUNT & 4)
23522 {
23523 copy 4 bytes from SRCPTR to DESTPTR
23524 copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4
23525 goto done_label
23526 }
23527 if (!COUNT)
23528 goto done_label;
23529 copy 1 byte from SRCPTR to DESTPTR
23530 if (COUNT & 2)
23531 {
23532 copy 2 bytes from SRCPTR to DESTPTR
23533 copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2
23534 }
23535 }
23536 else
23537 {
23538 copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR
23539 copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE
23540
23541 OLD_DESPTR = DESTPTR;
23542 Align DESTPTR up to DESIRED_ALIGN
23543 SRCPTR += DESTPTR - OLD_DESTPTR
23544 COUNT -= DEST_PTR - OLD_DESTPTR
23545 if (DYNAMIC_CHECK)
23546 Round COUNT down to multiple of SIZE
23547 << optional caller supplied zero size guard is here >>
23548 << optional caller suppplied dynamic check is here >>
23549 << caller supplied main copy loop is here >>
23550 }
23551 done_label:
23552 */
23553 static void
23554 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves (rtx destmem, rtx srcmem,
23555 rtx *destptr, rtx *srcptr,
23556 enum machine_mode mode,
23557 rtx value, rtx vec_value,
23558 rtx *count,
23559 rtx *done_label,
23560 int size,
23561 int desired_align,
23562 int align,
23563 unsigned HOST_WIDE_INT *min_size,
23564 bool dynamic_check,
23565 bool issetmem)
23566 {
23567 rtx loop_label = NULL, label;
23568 int n;
23569 rtx modesize;
23570 int prolog_size = 0;
23571 rtx mode_value;
23572
23573 /* Chose proper value to copy. */
23574 if (issetmem && VECTOR_MODE_P (mode))
23575 mode_value = vec_value;
23576 else
23577 mode_value = value;
23578 gcc_assert (GET_MODE_SIZE (mode) <= size);
23579
23580 /* See if block is big or small, handle small blocks. */
23581 if (!CONST_INT_P (*count) && *min_size < (unsigned HOST_WIDE_INT)size)
23582 {
23583 int size2 = size;
23584 loop_label = gen_label_rtx ();
23585
23586 if (!*done_label)
23587 *done_label = gen_label_rtx ();
23588
23589 emit_cmp_and_jump_insns (*count, GEN_INT (size2), GE, 0, GET_MODE (*count),
23590 1, loop_label);
23591 size2 >>= 1;
23592
23593 /* Handle sizes > 3. */
23594 for (;size2 > 2; size2 >>= 1)
23595 expand_small_movmem_or_setmem (destmem, srcmem,
23596 *destptr, *srcptr,
23597 value, vec_value,
23598 *count,
23599 size2, *done_label, issetmem);
23600 /* Nothing to copy? Jump to DONE_LABEL if so */
23601 emit_cmp_and_jump_insns (*count, const0_rtx, EQ, 0, GET_MODE (*count),
23602 1, *done_label);
23603
23604 /* Do a byte copy. */
23605 destmem = change_address (destmem, QImode, *destptr);
23606 if (issetmem)
23607 emit_move_insn (destmem, gen_lowpart (QImode, value));
23608 else
23609 {
23610 srcmem = change_address (srcmem, QImode, *srcptr);
23611 emit_move_insn (destmem, srcmem);
23612 }
23613
23614 /* Handle sizes 2 and 3. */
23615 label = ix86_expand_aligntest (*count, 2, false);
23616 destmem = change_address (destmem, HImode, *destptr);
23617 destmem = offset_address (destmem, *count, 1);
23618 destmem = offset_address (destmem, GEN_INT (-2), 2);
23619 if (issetmem)
23620 emit_move_insn (destmem, gen_lowpart (HImode, value));
23621 else
23622 {
23623 srcmem = change_address (srcmem, HImode, *srcptr);
23624 srcmem = offset_address (srcmem, *count, 1);
23625 srcmem = offset_address (srcmem, GEN_INT (-2), 2);
23626 emit_move_insn (destmem, srcmem);
23627 }
23628
23629 emit_label (label);
23630 LABEL_NUSES (label) = 1;
23631 emit_jump_insn (gen_jump (*done_label));
23632 emit_barrier ();
23633 }
23634 else
23635 gcc_assert (*min_size >= (unsigned HOST_WIDE_INT)size
23636 || UINTVAL (*count) >= (unsigned HOST_WIDE_INT)size);
23637
23638 /* Start memcpy for COUNT >= SIZE. */
23639 if (loop_label)
23640 {
23641 emit_label (loop_label);
23642 LABEL_NUSES (loop_label) = 1;
23643 }
23644
23645 /* Copy first desired_align bytes. */
23646 if (!issetmem)
23647 srcmem = change_address (srcmem, mode, *srcptr);
23648 destmem = change_address (destmem, mode, *destptr);
23649 modesize = GEN_INT (GET_MODE_SIZE (mode));
23650 for (n = 0; prolog_size < desired_align - align; n++)
23651 {
23652 if (issetmem)
23653 emit_move_insn (destmem, mode_value);
23654 else
23655 {
23656 emit_move_insn (destmem, srcmem);
23657 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23658 }
23659 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23660 prolog_size += GET_MODE_SIZE (mode);
23661 }
23662
23663
23664 /* Copy last SIZE bytes. */
23665 destmem = offset_address (destmem, *count, 1);
23666 destmem = offset_address (destmem,
23667 GEN_INT (-size - prolog_size),
23668 1);
23669 if (issetmem)
23670 emit_move_insn (destmem, mode_value);
23671 else
23672 {
23673 srcmem = offset_address (srcmem, *count, 1);
23674 srcmem = offset_address (srcmem,
23675 GEN_INT (-size - prolog_size),
23676 1);
23677 emit_move_insn (destmem, srcmem);
23678 }
23679 for (n = 1; n * GET_MODE_SIZE (mode) < size; n++)
23680 {
23681 destmem = offset_address (destmem, modesize, 1);
23682 if (issetmem)
23683 emit_move_insn (destmem, mode_value);
23684 else
23685 {
23686 srcmem = offset_address (srcmem, modesize, 1);
23687 emit_move_insn (destmem, srcmem);
23688 }
23689 }
23690
23691 /* Align destination. */
23692 if (desired_align > 1 && desired_align > align)
23693 {
23694 rtx saveddest = *destptr;
23695
23696 gcc_assert (desired_align <= size);
23697 /* Align destptr up, place it to new register. */
23698 *destptr = expand_simple_binop (GET_MODE (*destptr), PLUS, *destptr,
23699 GEN_INT (prolog_size),
23700 NULL_RTX, 1, OPTAB_DIRECT);
23701 *destptr = expand_simple_binop (GET_MODE (*destptr), AND, *destptr,
23702 GEN_INT (-desired_align),
23703 *destptr, 1, OPTAB_DIRECT);
23704 /* See how many bytes we skipped. */
23705 saveddest = expand_simple_binop (GET_MODE (*destptr), MINUS, saveddest,
23706 *destptr,
23707 saveddest, 1, OPTAB_DIRECT);
23708 /* Adjust srcptr and count. */
23709 if (!issetmem)
23710 *srcptr = expand_simple_binop (GET_MODE (*srcptr), MINUS, *srcptr, saveddest,
23711 *srcptr, 1, OPTAB_DIRECT);
23712 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
23713 saveddest, *count, 1, OPTAB_DIRECT);
23714 /* We copied at most size + prolog_size. */
23715 if (*min_size > (unsigned HOST_WIDE_INT)(size + prolog_size))
23716 *min_size = (*min_size - size) & ~(unsigned HOST_WIDE_INT)(size - 1);
23717 else
23718 *min_size = 0;
23719
23720 /* Our loops always round down the bock size, but for dispatch to library
23721 we need precise value. */
23722 if (dynamic_check)
23723 *count = expand_simple_binop (GET_MODE (*count), AND, *count,
23724 GEN_INT (-size), *count, 1, OPTAB_DIRECT);
23725 }
23726 else
23727 {
23728 gcc_assert (prolog_size == 0);
23729 /* Decrease count, so we won't end up copying last word twice. */
23730 if (!CONST_INT_P (*count))
23731 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
23732 constm1_rtx, *count, 1, OPTAB_DIRECT);
23733 else
23734 *count = GEN_INT ((UINTVAL (*count) - 1) & ~(unsigned HOST_WIDE_INT)(size - 1));
23735 if (*min_size)
23736 *min_size = (*min_size - 1) & ~(unsigned HOST_WIDE_INT)(size - 1);
23737 }
23738 }
23739
23740
23741 /* This function is like the previous one, except here we know how many bytes
23742 need to be copied. That allows us to update alignment not only of DST, which
23743 is returned, but also of SRC, which is passed as a pointer for that
23744 reason. */
23745 static rtx
23746 expand_set_or_movmem_constant_prologue (rtx dst, rtx *srcp, rtx destreg,
23747 rtx srcreg, rtx value, rtx vec_value,
23748 int desired_align, int align_bytes,
23749 bool issetmem)
23750 {
23751 rtx src = NULL;
23752 rtx orig_dst = dst;
23753 rtx orig_src = NULL;
23754 int piece_size = 1;
23755 int copied_bytes = 0;
23756
23757 if (!issetmem)
23758 {
23759 gcc_assert (srcp != NULL);
23760 src = *srcp;
23761 orig_src = src;
23762 }
23763
23764 for (piece_size = 1;
23765 piece_size <= desired_align && copied_bytes < align_bytes;
23766 piece_size <<= 1)
23767 {
23768 if (align_bytes & piece_size)
23769 {
23770 if (issetmem)
23771 {
23772 if (vec_value && piece_size > GET_MODE_SIZE (GET_MODE (value)))
23773 dst = emit_memset (dst, destreg, vec_value, piece_size);
23774 else
23775 dst = emit_memset (dst, destreg, value, piece_size);
23776 }
23777 else
23778 dst = emit_memmov (dst, &src, destreg, srcreg, piece_size);
23779 copied_bytes += piece_size;
23780 }
23781 }
23782 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
23783 set_mem_align (dst, desired_align * BITS_PER_UNIT);
23784 if (MEM_SIZE_KNOWN_P (orig_dst))
23785 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
23786
23787 if (!issetmem)
23788 {
23789 int src_align_bytes = get_mem_align_offset (src, desired_align
23790 * BITS_PER_UNIT);
23791 if (src_align_bytes >= 0)
23792 src_align_bytes = desired_align - src_align_bytes;
23793 if (src_align_bytes >= 0)
23794 {
23795 unsigned int src_align;
23796 for (src_align = desired_align; src_align >= 2; src_align >>= 1)
23797 {
23798 if ((src_align_bytes & (src_align - 1))
23799 == (align_bytes & (src_align - 1)))
23800 break;
23801 }
23802 if (src_align > (unsigned int) desired_align)
23803 src_align = desired_align;
23804 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
23805 set_mem_align (src, src_align * BITS_PER_UNIT);
23806 }
23807 if (MEM_SIZE_KNOWN_P (orig_src))
23808 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
23809 *srcp = src;
23810 }
23811
23812 return dst;
23813 }
23814
23815 /* Return true if ALG can be used in current context.
23816 Assume we expand memset if MEMSET is true. */
23817 static bool
23818 alg_usable_p (enum stringop_alg alg, bool memset)
23819 {
23820 if (alg == no_stringop)
23821 return false;
23822 if (alg == vector_loop)
23823 return TARGET_SSE || TARGET_AVX;
23824 /* Algorithms using the rep prefix want at least edi and ecx;
23825 additionally, memset wants eax and memcpy wants esi. Don't
23826 consider such algorithms if the user has appropriated those
23827 registers for their own purposes. */
23828 if (alg == rep_prefix_1_byte
23829 || alg == rep_prefix_4_byte
23830 || alg == rep_prefix_8_byte)
23831 return !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
23832 || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
23833 return true;
23834 }
23835
23836 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
23837 static enum stringop_alg
23838 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
23839 unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size,
23840 bool memset, bool zero_memset, int *dynamic_check, bool *noalign)
23841 {
23842 const struct stringop_algs * algs;
23843 bool optimize_for_speed;
23844 int max = 0;
23845 const struct processor_costs *cost;
23846 int i;
23847 bool any_alg_usable_p = false;
23848
23849 *noalign = false;
23850 *dynamic_check = -1;
23851
23852 /* Even if the string operation call is cold, we still might spend a lot
23853 of time processing large blocks. */
23854 if (optimize_function_for_size_p (cfun)
23855 || (optimize_insn_for_size_p ()
23856 && (max_size < 256
23857 || (expected_size != -1 && expected_size < 256))))
23858 optimize_for_speed = false;
23859 else
23860 optimize_for_speed = true;
23861
23862 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
23863 if (memset)
23864 algs = &cost->memset[TARGET_64BIT != 0];
23865 else
23866 algs = &cost->memcpy[TARGET_64BIT != 0];
23867
23868 /* See maximal size for user defined algorithm. */
23869 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
23870 {
23871 enum stringop_alg candidate = algs->size[i].alg;
23872 bool usable = alg_usable_p (candidate, memset);
23873 any_alg_usable_p |= usable;
23874
23875 if (candidate != libcall && candidate && usable)
23876 max = algs->size[i].max;
23877 }
23878
23879 /* If expected size is not known but max size is small enough
23880 so inline version is a win, set expected size into
23881 the range. */
23882 if (((max > 1 && (unsigned HOST_WIDE_INT) max >= max_size) || max == -1)
23883 && expected_size == -1)
23884 expected_size = min_size / 2 + max_size / 2;
23885
23886 /* If user specified the algorithm, honnor it if possible. */
23887 if (ix86_stringop_alg != no_stringop
23888 && alg_usable_p (ix86_stringop_alg, memset))
23889 return ix86_stringop_alg;
23890 /* rep; movq or rep; movl is the smallest variant. */
23891 else if (!optimize_for_speed)
23892 {
23893 *noalign = true;
23894 if (!count || (count & 3) || (memset && !zero_memset))
23895 return alg_usable_p (rep_prefix_1_byte, memset)
23896 ? rep_prefix_1_byte : loop_1_byte;
23897 else
23898 return alg_usable_p (rep_prefix_4_byte, memset)
23899 ? rep_prefix_4_byte : loop;
23900 }
23901 /* Very tiny blocks are best handled via the loop, REP is expensive to
23902 setup. */
23903 else if (expected_size != -1 && expected_size < 4)
23904 return loop_1_byte;
23905 else if (expected_size != -1)
23906 {
23907 enum stringop_alg alg = libcall;
23908 bool alg_noalign = false;
23909 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
23910 {
23911 /* We get here if the algorithms that were not libcall-based
23912 were rep-prefix based and we are unable to use rep prefixes
23913 based on global register usage. Break out of the loop and
23914 use the heuristic below. */
23915 if (algs->size[i].max == 0)
23916 break;
23917 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
23918 {
23919 enum stringop_alg candidate = algs->size[i].alg;
23920
23921 if (candidate != libcall && alg_usable_p (candidate, memset))
23922 {
23923 alg = candidate;
23924 alg_noalign = algs->size[i].noalign;
23925 }
23926 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
23927 last non-libcall inline algorithm. */
23928 if (TARGET_INLINE_ALL_STRINGOPS)
23929 {
23930 /* When the current size is best to be copied by a libcall,
23931 but we are still forced to inline, run the heuristic below
23932 that will pick code for medium sized blocks. */
23933 if (alg != libcall)
23934 {
23935 *noalign = alg_noalign;
23936 return alg;
23937 }
23938 break;
23939 }
23940 else if (alg_usable_p (candidate, memset))
23941 {
23942 *noalign = algs->size[i].noalign;
23943 return candidate;
23944 }
23945 }
23946 }
23947 }
23948 /* When asked to inline the call anyway, try to pick meaningful choice.
23949 We look for maximal size of block that is faster to copy by hand and
23950 take blocks of at most of that size guessing that average size will
23951 be roughly half of the block.
23952
23953 If this turns out to be bad, we might simply specify the preferred
23954 choice in ix86_costs. */
23955 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
23956 && (algs->unknown_size == libcall
23957 || !alg_usable_p (algs->unknown_size, memset)))
23958 {
23959 enum stringop_alg alg;
23960
23961 /* If there aren't any usable algorithms, then recursing on
23962 smaller sizes isn't going to find anything. Just return the
23963 simple byte-at-a-time copy loop. */
23964 if (!any_alg_usable_p)
23965 {
23966 /* Pick something reasonable. */
23967 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
23968 *dynamic_check = 128;
23969 return loop_1_byte;
23970 }
23971 if (max <= 0)
23972 max = 4096;
23973 alg = decide_alg (count, max / 2, min_size, max_size, memset,
23974 zero_memset, dynamic_check, noalign);
23975 gcc_assert (*dynamic_check == -1);
23976 gcc_assert (alg != libcall);
23977 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
23978 *dynamic_check = max;
23979 return alg;
23980 }
23981 return (alg_usable_p (algs->unknown_size, memset)
23982 ? algs->unknown_size : libcall);
23983 }
23984
23985 /* Decide on alignment. We know that the operand is already aligned to ALIGN
23986 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
23987 static int
23988 decide_alignment (int align,
23989 enum stringop_alg alg,
23990 int expected_size,
23991 enum machine_mode move_mode)
23992 {
23993 int desired_align = 0;
23994
23995 gcc_assert (alg != no_stringop);
23996
23997 if (alg == libcall)
23998 return 0;
23999 if (move_mode == VOIDmode)
24000 return 0;
24001
24002 desired_align = GET_MODE_SIZE (move_mode);
24003 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
24004 copying whole cacheline at once. */
24005 if (TARGET_PENTIUMPRO
24006 && (alg == rep_prefix_4_byte || alg == rep_prefix_1_byte))
24007 desired_align = 8;
24008
24009 if (optimize_size)
24010 desired_align = 1;
24011 if (desired_align < align)
24012 desired_align = align;
24013 if (expected_size != -1 && expected_size < 4)
24014 desired_align = align;
24015
24016 return desired_align;
24017 }
24018
24019
24020 /* Helper function for memcpy. For QImode value 0xXY produce
24021 0xXYXYXYXY of wide specified by MODE. This is essentially
24022 a * 0x10101010, but we can do slightly better than
24023 synth_mult by unwinding the sequence by hand on CPUs with
24024 slow multiply. */
24025 static rtx
24026 promote_duplicated_reg (enum machine_mode mode, rtx val)
24027 {
24028 enum machine_mode valmode = GET_MODE (val);
24029 rtx tmp;
24030 int nops = mode == DImode ? 3 : 2;
24031
24032 gcc_assert (mode == SImode || mode == DImode || val == const0_rtx);
24033 if (val == const0_rtx)
24034 return copy_to_mode_reg (mode, CONST0_RTX (mode));
24035 if (CONST_INT_P (val))
24036 {
24037 HOST_WIDE_INT v = INTVAL (val) & 255;
24038
24039 v |= v << 8;
24040 v |= v << 16;
24041 if (mode == DImode)
24042 v |= (v << 16) << 16;
24043 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
24044 }
24045
24046 if (valmode == VOIDmode)
24047 valmode = QImode;
24048 if (valmode != QImode)
24049 val = gen_lowpart (QImode, val);
24050 if (mode == QImode)
24051 return val;
24052 if (!TARGET_PARTIAL_REG_STALL)
24053 nops--;
24054 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
24055 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
24056 <= (ix86_cost->shift_const + ix86_cost->add) * nops
24057 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
24058 {
24059 rtx reg = convert_modes (mode, QImode, val, true);
24060 tmp = promote_duplicated_reg (mode, const1_rtx);
24061 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
24062 OPTAB_DIRECT);
24063 }
24064 else
24065 {
24066 rtx reg = convert_modes (mode, QImode, val, true);
24067
24068 if (!TARGET_PARTIAL_REG_STALL)
24069 if (mode == SImode)
24070 emit_insn (gen_movsi_insv_1 (reg, reg));
24071 else
24072 emit_insn (gen_movdi_insv_1 (reg, reg));
24073 else
24074 {
24075 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
24076 NULL, 1, OPTAB_DIRECT);
24077 reg =
24078 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24079 }
24080 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
24081 NULL, 1, OPTAB_DIRECT);
24082 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24083 if (mode == SImode)
24084 return reg;
24085 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
24086 NULL, 1, OPTAB_DIRECT);
24087 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24088 return reg;
24089 }
24090 }
24091
24092 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
24093 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
24094 alignment from ALIGN to DESIRED_ALIGN. */
24095 static rtx
24096 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align,
24097 int align)
24098 {
24099 rtx promoted_val;
24100
24101 if (TARGET_64BIT
24102 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
24103 promoted_val = promote_duplicated_reg (DImode, val);
24104 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
24105 promoted_val = promote_duplicated_reg (SImode, val);
24106 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
24107 promoted_val = promote_duplicated_reg (HImode, val);
24108 else
24109 promoted_val = val;
24110
24111 return promoted_val;
24112 }
24113
24114 /* Expand string move (memcpy) ot store (memset) operation. Use i386 string
24115 operations when profitable. The code depends upon architecture, block size
24116 and alignment, but always has one of the following overall structures:
24117
24118 Aligned move sequence:
24119
24120 1) Prologue guard: Conditional that jumps up to epilogues for small
24121 blocks that can be handled by epilogue alone. This is faster
24122 but also needed for correctness, since prologue assume the block
24123 is larger than the desired alignment.
24124
24125 Optional dynamic check for size and libcall for large
24126 blocks is emitted here too, with -minline-stringops-dynamically.
24127
24128 2) Prologue: copy first few bytes in order to get destination
24129 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
24130 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
24131 copied. We emit either a jump tree on power of two sized
24132 blocks, or a byte loop.
24133
24134 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24135 with specified algorithm.
24136
24137 4) Epilogue: code copying tail of the block that is too small to be
24138 handled by main body (or up to size guarded by prologue guard).
24139
24140 Misaligned move sequence
24141
24142 1) missaligned move prologue/epilogue containing:
24143 a) Prologue handling small memory blocks and jumping to done_label
24144 (skipped if blocks are known to be large enough)
24145 b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is
24146 needed by single possibly misaligned move
24147 (skipped if alignment is not needed)
24148 c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves
24149
24150 2) Zero size guard dispatching to done_label, if needed
24151
24152 3) dispatch to library call, if needed,
24153
24154 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24155 with specified algorithm. */
24156 bool
24157 ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
24158 rtx align_exp, rtx expected_align_exp,
24159 rtx expected_size_exp, rtx min_size_exp,
24160 rtx max_size_exp, rtx probable_max_size_exp,
24161 bool issetmem)
24162 {
24163 rtx destreg;
24164 rtx srcreg = NULL;
24165 rtx label = NULL;
24166 rtx tmp;
24167 rtx jump_around_label = NULL;
24168 HOST_WIDE_INT align = 1;
24169 unsigned HOST_WIDE_INT count = 0;
24170 HOST_WIDE_INT expected_size = -1;
24171 int size_needed = 0, epilogue_size_needed;
24172 int desired_align = 0, align_bytes = 0;
24173 enum stringop_alg alg;
24174 rtx promoted_val = NULL;
24175 rtx vec_promoted_val = NULL;
24176 bool force_loopy_epilogue = false;
24177 int dynamic_check;
24178 bool need_zero_guard = false;
24179 bool noalign;
24180 enum machine_mode move_mode = VOIDmode;
24181 int unroll_factor = 1;
24182 /* TODO: Once value ranges are available, fill in proper data. */
24183 unsigned HOST_WIDE_INT min_size = 0;
24184 unsigned HOST_WIDE_INT max_size = -1;
24185 unsigned HOST_WIDE_INT probable_max_size = -1;
24186 bool misaligned_prologue_used = false;
24187
24188 if (CONST_INT_P (align_exp))
24189 align = INTVAL (align_exp);
24190 /* i386 can do misaligned access on reasonably increased cost. */
24191 if (CONST_INT_P (expected_align_exp)
24192 && INTVAL (expected_align_exp) > align)
24193 align = INTVAL (expected_align_exp);
24194 /* ALIGN is the minimum of destination and source alignment, but we care here
24195 just about destination alignment. */
24196 else if (!issetmem
24197 && MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
24198 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
24199
24200 if (CONST_INT_P (count_exp))
24201 {
24202 min_size = max_size = probable_max_size = count = expected_size
24203 = INTVAL (count_exp);
24204 /* When COUNT is 0, there is nothing to do. */
24205 if (!count)
24206 return true;
24207 }
24208 else
24209 {
24210 if (min_size_exp)
24211 min_size = INTVAL (min_size_exp);
24212 if (max_size_exp)
24213 max_size = INTVAL (max_size_exp);
24214 if (probable_max_size_exp)
24215 probable_max_size = INTVAL (probable_max_size_exp);
24216 if (CONST_INT_P (expected_size_exp))
24217 expected_size = INTVAL (expected_size_exp);
24218 }
24219
24220 /* Make sure we don't need to care about overflow later on. */
24221 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
24222 return false;
24223
24224 /* Step 0: Decide on preferred algorithm, desired alignment and
24225 size of chunks to be copied by main loop. */
24226 alg = decide_alg (count, expected_size, min_size, probable_max_size,
24227 issetmem,
24228 issetmem && val_exp == const0_rtx,
24229 &dynamic_check, &noalign);
24230 if (alg == libcall)
24231 return false;
24232 gcc_assert (alg != no_stringop);
24233
24234 /* For now vector-version of memset is generated only for memory zeroing, as
24235 creating of promoted vector value is very cheap in this case. */
24236 if (issetmem && alg == vector_loop && val_exp != const0_rtx)
24237 alg = unrolled_loop;
24238
24239 if (!count)
24240 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
24241 destreg = ix86_copy_addr_to_reg (XEXP (dst, 0));
24242 if (!issetmem)
24243 srcreg = ix86_copy_addr_to_reg (XEXP (src, 0));
24244
24245 unroll_factor = 1;
24246 move_mode = word_mode;
24247 switch (alg)
24248 {
24249 case libcall:
24250 case no_stringop:
24251 case last_alg:
24252 gcc_unreachable ();
24253 case loop_1_byte:
24254 need_zero_guard = true;
24255 move_mode = QImode;
24256 break;
24257 case loop:
24258 need_zero_guard = true;
24259 break;
24260 case unrolled_loop:
24261 need_zero_guard = true;
24262 unroll_factor = (TARGET_64BIT ? 4 : 2);
24263 break;
24264 case vector_loop:
24265 need_zero_guard = true;
24266 unroll_factor = 4;
24267 /* Find the widest supported mode. */
24268 move_mode = word_mode;
24269 while (optab_handler (mov_optab, GET_MODE_WIDER_MODE (move_mode))
24270 != CODE_FOR_nothing)
24271 move_mode = GET_MODE_WIDER_MODE (move_mode);
24272
24273 /* Find the corresponding vector mode with the same size as MOVE_MODE.
24274 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
24275 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
24276 {
24277 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
24278 move_mode = mode_for_vector (word_mode, nunits);
24279 if (optab_handler (mov_optab, move_mode) == CODE_FOR_nothing)
24280 move_mode = word_mode;
24281 }
24282 gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing);
24283 break;
24284 case rep_prefix_8_byte:
24285 move_mode = DImode;
24286 break;
24287 case rep_prefix_4_byte:
24288 move_mode = SImode;
24289 break;
24290 case rep_prefix_1_byte:
24291 move_mode = QImode;
24292 break;
24293 }
24294 size_needed = GET_MODE_SIZE (move_mode) * unroll_factor;
24295 epilogue_size_needed = size_needed;
24296
24297 desired_align = decide_alignment (align, alg, expected_size, move_mode);
24298 if (!TARGET_ALIGN_STRINGOPS || noalign)
24299 align = desired_align;
24300
24301 /* Step 1: Prologue guard. */
24302
24303 /* Alignment code needs count to be in register. */
24304 if (CONST_INT_P (count_exp) && desired_align > align)
24305 {
24306 if (INTVAL (count_exp) > desired_align
24307 && INTVAL (count_exp) > size_needed)
24308 {
24309 align_bytes
24310 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
24311 if (align_bytes <= 0)
24312 align_bytes = 0;
24313 else
24314 align_bytes = desired_align - align_bytes;
24315 }
24316 if (align_bytes == 0)
24317 count_exp = force_reg (counter_mode (count_exp), count_exp);
24318 }
24319 gcc_assert (desired_align >= 1 && align >= 1);
24320
24321 /* Misaligned move sequences handle both prologue and epilogue at once.
24322 Default code generation results in a smaller code for large alignments
24323 and also avoids redundant job when sizes are known precisely. */
24324 misaligned_prologue_used
24325 = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES
24326 && MAX (desired_align, epilogue_size_needed) <= 32
24327 && desired_align <= epilogue_size_needed
24328 && ((desired_align > align && !align_bytes)
24329 || (!count && epilogue_size_needed > 1)));
24330
24331 /* Do the cheap promotion to allow better CSE across the
24332 main loop and epilogue (ie one load of the big constant in the
24333 front of all code.
24334 For now the misaligned move sequences do not have fast path
24335 without broadcasting. */
24336 if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used)))
24337 {
24338 if (alg == vector_loop)
24339 {
24340 gcc_assert (val_exp == const0_rtx);
24341 vec_promoted_val = promote_duplicated_reg (move_mode, val_exp);
24342 promoted_val = promote_duplicated_reg_to_size (val_exp,
24343 GET_MODE_SIZE (word_mode),
24344 desired_align, align);
24345 }
24346 else
24347 {
24348 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
24349 desired_align, align);
24350 }
24351 }
24352 /* Misaligned move sequences handles both prologues and epilogues at once.
24353 Default code generation results in smaller code for large alignments and
24354 also avoids redundant job when sizes are known precisely. */
24355 if (misaligned_prologue_used)
24356 {
24357 /* Misaligned move prologue handled small blocks by itself. */
24358 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves
24359 (dst, src, &destreg, &srcreg,
24360 move_mode, promoted_val, vec_promoted_val,
24361 &count_exp,
24362 &jump_around_label,
24363 desired_align < align
24364 ? MAX (desired_align, epilogue_size_needed) : epilogue_size_needed,
24365 desired_align, align, &min_size, dynamic_check, issetmem);
24366 if (!issetmem)
24367 src = change_address (src, BLKmode, srcreg);
24368 dst = change_address (dst, BLKmode, destreg);
24369 set_mem_align (dst, desired_align * BITS_PER_UNIT);
24370 epilogue_size_needed = 0;
24371 if (need_zero_guard && !min_size)
24372 {
24373 /* It is possible that we copied enough so the main loop will not
24374 execute. */
24375 gcc_assert (size_needed > 1);
24376 if (jump_around_label == NULL_RTX)
24377 jump_around_label = gen_label_rtx ();
24378 emit_cmp_and_jump_insns (count_exp,
24379 GEN_INT (size_needed),
24380 LTU, 0, counter_mode (count_exp), 1, jump_around_label);
24381 if (expected_size == -1
24382 || expected_size < (desired_align - align) / 2 + size_needed)
24383 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24384 else
24385 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24386 }
24387 }
24388 /* Ensure that alignment prologue won't copy past end of block. */
24389 else if (size_needed > 1 || (desired_align > 1 && desired_align > align))
24390 {
24391 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
24392 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
24393 Make sure it is power of 2. */
24394 epilogue_size_needed = 1 << (floor_log2 (epilogue_size_needed) + 1);
24395
24396 /* To improve performance of small blocks, we jump around the VAL
24397 promoting mode. This mean that if the promoted VAL is not constant,
24398 we might not use it in the epilogue and have to use byte
24399 loop variant. */
24400 if (issetmem && epilogue_size_needed > 2 && !promoted_val)
24401 force_loopy_epilogue = true;
24402 if ((count && count < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24403 || max_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24404 {
24405 /* If main algorithm works on QImode, no epilogue is needed.
24406 For small sizes just don't align anything. */
24407 if (size_needed == 1)
24408 desired_align = align;
24409 else
24410 goto epilogue;
24411 }
24412 else if (!count
24413 && min_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24414 {
24415 label = gen_label_rtx ();
24416 emit_cmp_and_jump_insns (count_exp,
24417 GEN_INT (epilogue_size_needed),
24418 LTU, 0, counter_mode (count_exp), 1, label);
24419 if (expected_size == -1 || expected_size < epilogue_size_needed)
24420 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24421 else
24422 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24423 }
24424 }
24425
24426 /* Emit code to decide on runtime whether library call or inline should be
24427 used. */
24428 if (dynamic_check != -1)
24429 {
24430 if (!issetmem && CONST_INT_P (count_exp))
24431 {
24432 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
24433 {
24434 emit_block_move_via_libcall (dst, src, count_exp, false);
24435 count_exp = const0_rtx;
24436 goto epilogue;
24437 }
24438 }
24439 else
24440 {
24441 rtx hot_label = gen_label_rtx ();
24442 if (jump_around_label == NULL_RTX)
24443 jump_around_label = gen_label_rtx ();
24444 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
24445 LEU, 0, counter_mode (count_exp),
24446 1, hot_label);
24447 predict_jump (REG_BR_PROB_BASE * 90 / 100);
24448 if (issetmem)
24449 set_storage_via_libcall (dst, count_exp, val_exp, false);
24450 else
24451 emit_block_move_via_libcall (dst, src, count_exp, false);
24452 emit_jump (jump_around_label);
24453 emit_label (hot_label);
24454 }
24455 }
24456
24457 /* Step 2: Alignment prologue. */
24458 /* Do the expensive promotion once we branched off the small blocks. */
24459 if (issetmem && !promoted_val)
24460 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
24461 desired_align, align);
24462
24463 if (desired_align > align && !misaligned_prologue_used)
24464 {
24465 if (align_bytes == 0)
24466 {
24467 /* Except for the first move in prologue, we no longer know
24468 constant offset in aliasing info. It don't seems to worth
24469 the pain to maintain it for the first move, so throw away
24470 the info early. */
24471 dst = change_address (dst, BLKmode, destreg);
24472 if (!issetmem)
24473 src = change_address (src, BLKmode, srcreg);
24474 dst = expand_set_or_movmem_prologue (dst, src, destreg, srcreg,
24475 promoted_val, vec_promoted_val,
24476 count_exp, align, desired_align,
24477 issetmem);
24478 /* At most desired_align - align bytes are copied. */
24479 if (min_size < (unsigned)(desired_align - align))
24480 min_size = 0;
24481 else
24482 min_size -= desired_align - align;
24483 }
24484 else
24485 {
24486 /* If we know how many bytes need to be stored before dst is
24487 sufficiently aligned, maintain aliasing info accurately. */
24488 dst = expand_set_or_movmem_constant_prologue (dst, &src, destreg,
24489 srcreg,
24490 promoted_val,
24491 vec_promoted_val,
24492 desired_align,
24493 align_bytes,
24494 issetmem);
24495
24496 count_exp = plus_constant (counter_mode (count_exp),
24497 count_exp, -align_bytes);
24498 count -= align_bytes;
24499 min_size -= align_bytes;
24500 max_size -= align_bytes;
24501 }
24502 if (need_zero_guard
24503 && !min_size
24504 && (count < (unsigned HOST_WIDE_INT) size_needed
24505 || (align_bytes == 0
24506 && count < ((unsigned HOST_WIDE_INT) size_needed
24507 + desired_align - align))))
24508 {
24509 /* It is possible that we copied enough so the main loop will not
24510 execute. */
24511 gcc_assert (size_needed > 1);
24512 if (label == NULL_RTX)
24513 label = gen_label_rtx ();
24514 emit_cmp_and_jump_insns (count_exp,
24515 GEN_INT (size_needed),
24516 LTU, 0, counter_mode (count_exp), 1, label);
24517 if (expected_size == -1
24518 || expected_size < (desired_align - align) / 2 + size_needed)
24519 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24520 else
24521 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24522 }
24523 }
24524 if (label && size_needed == 1)
24525 {
24526 emit_label (label);
24527 LABEL_NUSES (label) = 1;
24528 label = NULL;
24529 epilogue_size_needed = 1;
24530 if (issetmem)
24531 promoted_val = val_exp;
24532 }
24533 else if (label == NULL_RTX && !misaligned_prologue_used)
24534 epilogue_size_needed = size_needed;
24535
24536 /* Step 3: Main loop. */
24537
24538 switch (alg)
24539 {
24540 case libcall:
24541 case no_stringop:
24542 case last_alg:
24543 gcc_unreachable ();
24544 case loop_1_byte:
24545 case loop:
24546 case unrolled_loop:
24547 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, promoted_val,
24548 count_exp, move_mode, unroll_factor,
24549 expected_size, issetmem);
24550 break;
24551 case vector_loop:
24552 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg,
24553 vec_promoted_val, count_exp, move_mode,
24554 unroll_factor, expected_size, issetmem);
24555 break;
24556 case rep_prefix_8_byte:
24557 case rep_prefix_4_byte:
24558 case rep_prefix_1_byte:
24559 expand_set_or_movmem_via_rep (dst, src, destreg, srcreg, promoted_val,
24560 val_exp, count_exp, move_mode, issetmem);
24561 break;
24562 }
24563 /* Adjust properly the offset of src and dest memory for aliasing. */
24564 if (CONST_INT_P (count_exp))
24565 {
24566 if (!issetmem)
24567 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
24568 (count / size_needed) * size_needed);
24569 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
24570 (count / size_needed) * size_needed);
24571 }
24572 else
24573 {
24574 if (!issetmem)
24575 src = change_address (src, BLKmode, srcreg);
24576 dst = change_address (dst, BLKmode, destreg);
24577 }
24578
24579 /* Step 4: Epilogue to copy the remaining bytes. */
24580 epilogue:
24581 if (label)
24582 {
24583 /* When the main loop is done, COUNT_EXP might hold original count,
24584 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
24585 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
24586 bytes. Compensate if needed. */
24587
24588 if (size_needed < epilogue_size_needed)
24589 {
24590 tmp =
24591 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
24592 GEN_INT (size_needed - 1), count_exp, 1,
24593 OPTAB_DIRECT);
24594 if (tmp != count_exp)
24595 emit_move_insn (count_exp, tmp);
24596 }
24597 emit_label (label);
24598 LABEL_NUSES (label) = 1;
24599 }
24600
24601 if (count_exp != const0_rtx && epilogue_size_needed > 1)
24602 {
24603 if (force_loopy_epilogue)
24604 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
24605 epilogue_size_needed);
24606 else
24607 {
24608 if (issetmem)
24609 expand_setmem_epilogue (dst, destreg, promoted_val,
24610 vec_promoted_val, count_exp,
24611 epilogue_size_needed);
24612 else
24613 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
24614 epilogue_size_needed);
24615 }
24616 }
24617 if (jump_around_label)
24618 emit_label (jump_around_label);
24619 return true;
24620 }
24621
24622
24623 /* Expand the appropriate insns for doing strlen if not just doing
24624 repnz; scasb
24625
24626 out = result, initialized with the start address
24627 align_rtx = alignment of the address.
24628 scratch = scratch register, initialized with the startaddress when
24629 not aligned, otherwise undefined
24630
24631 This is just the body. It needs the initializations mentioned above and
24632 some address computing at the end. These things are done in i386.md. */
24633
24634 static void
24635 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
24636 {
24637 int align;
24638 rtx tmp;
24639 rtx align_2_label = NULL_RTX;
24640 rtx align_3_label = NULL_RTX;
24641 rtx align_4_label = gen_label_rtx ();
24642 rtx end_0_label = gen_label_rtx ();
24643 rtx mem;
24644 rtx tmpreg = gen_reg_rtx (SImode);
24645 rtx scratch = gen_reg_rtx (SImode);
24646 rtx cmp;
24647
24648 align = 0;
24649 if (CONST_INT_P (align_rtx))
24650 align = INTVAL (align_rtx);
24651
24652 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
24653
24654 /* Is there a known alignment and is it less than 4? */
24655 if (align < 4)
24656 {
24657 rtx scratch1 = gen_reg_rtx (Pmode);
24658 emit_move_insn (scratch1, out);
24659 /* Is there a known alignment and is it not 2? */
24660 if (align != 2)
24661 {
24662 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
24663 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
24664
24665 /* Leave just the 3 lower bits. */
24666 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
24667 NULL_RTX, 0, OPTAB_WIDEN);
24668
24669 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
24670 Pmode, 1, align_4_label);
24671 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
24672 Pmode, 1, align_2_label);
24673 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
24674 Pmode, 1, align_3_label);
24675 }
24676 else
24677 {
24678 /* Since the alignment is 2, we have to check 2 or 0 bytes;
24679 check if is aligned to 4 - byte. */
24680
24681 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
24682 NULL_RTX, 0, OPTAB_WIDEN);
24683
24684 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
24685 Pmode, 1, align_4_label);
24686 }
24687
24688 mem = change_address (src, QImode, out);
24689
24690 /* Now compare the bytes. */
24691
24692 /* Compare the first n unaligned byte on a byte per byte basis. */
24693 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
24694 QImode, 1, end_0_label);
24695
24696 /* Increment the address. */
24697 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
24698
24699 /* Not needed with an alignment of 2 */
24700 if (align != 2)
24701 {
24702 emit_label (align_2_label);
24703
24704 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
24705 end_0_label);
24706
24707 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
24708
24709 emit_label (align_3_label);
24710 }
24711
24712 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
24713 end_0_label);
24714
24715 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
24716 }
24717
24718 /* Generate loop to check 4 bytes at a time. It is not a good idea to
24719 align this loop. It gives only huge programs, but does not help to
24720 speed up. */
24721 emit_label (align_4_label);
24722
24723 mem = change_address (src, SImode, out);
24724 emit_move_insn (scratch, mem);
24725 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
24726
24727 /* This formula yields a nonzero result iff one of the bytes is zero.
24728 This saves three branches inside loop and many cycles. */
24729
24730 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
24731 emit_insn (gen_one_cmplsi2 (scratch, scratch));
24732 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
24733 emit_insn (gen_andsi3 (tmpreg, tmpreg,
24734 gen_int_mode (0x80808080, SImode)));
24735 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
24736 align_4_label);
24737
24738 if (TARGET_CMOVE)
24739 {
24740 rtx reg = gen_reg_rtx (SImode);
24741 rtx reg2 = gen_reg_rtx (Pmode);
24742 emit_move_insn (reg, tmpreg);
24743 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
24744
24745 /* If zero is not in the first two bytes, move two bytes forward. */
24746 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
24747 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24748 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
24749 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
24750 gen_rtx_IF_THEN_ELSE (SImode, tmp,
24751 reg,
24752 tmpreg)));
24753 /* Emit lea manually to avoid clobbering of flags. */
24754 emit_insn (gen_rtx_SET (SImode, reg2,
24755 gen_rtx_PLUS (Pmode, out, const2_rtx)));
24756
24757 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24758 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
24759 emit_insn (gen_rtx_SET (VOIDmode, out,
24760 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
24761 reg2,
24762 out)));
24763 }
24764 else
24765 {
24766 rtx end_2_label = gen_label_rtx ();
24767 /* Is zero in the first two bytes? */
24768
24769 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
24770 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24771 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
24772 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
24773 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
24774 pc_rtx);
24775 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
24776 JUMP_LABEL (tmp) = end_2_label;
24777
24778 /* Not in the first two. Move two bytes forward. */
24779 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
24780 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
24781
24782 emit_label (end_2_label);
24783
24784 }
24785
24786 /* Avoid branch in fixing the byte. */
24787 tmpreg = gen_lowpart (QImode, tmpreg);
24788 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
24789 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
24790 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
24791 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
24792
24793 emit_label (end_0_label);
24794 }
24795
24796 /* Expand strlen. */
24797
24798 bool
24799 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
24800 {
24801 rtx addr, scratch1, scratch2, scratch3, scratch4;
24802
24803 /* The generic case of strlen expander is long. Avoid it's
24804 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
24805
24806 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
24807 && !TARGET_INLINE_ALL_STRINGOPS
24808 && !optimize_insn_for_size_p ()
24809 && (!CONST_INT_P (align) || INTVAL (align) < 4))
24810 return false;
24811
24812 addr = force_reg (Pmode, XEXP (src, 0));
24813 scratch1 = gen_reg_rtx (Pmode);
24814
24815 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
24816 && !optimize_insn_for_size_p ())
24817 {
24818 /* Well it seems that some optimizer does not combine a call like
24819 foo(strlen(bar), strlen(bar));
24820 when the move and the subtraction is done here. It does calculate
24821 the length just once when these instructions are done inside of
24822 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
24823 often used and I use one fewer register for the lifetime of
24824 output_strlen_unroll() this is better. */
24825
24826 emit_move_insn (out, addr);
24827
24828 ix86_expand_strlensi_unroll_1 (out, src, align);
24829
24830 /* strlensi_unroll_1 returns the address of the zero at the end of
24831 the string, like memchr(), so compute the length by subtracting
24832 the start address. */
24833 emit_insn (ix86_gen_sub3 (out, out, addr));
24834 }
24835 else
24836 {
24837 rtx unspec;
24838
24839 /* Can't use this if the user has appropriated eax, ecx, or edi. */
24840 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
24841 return false;
24842
24843 scratch2 = gen_reg_rtx (Pmode);
24844 scratch3 = gen_reg_rtx (Pmode);
24845 scratch4 = force_reg (Pmode, constm1_rtx);
24846
24847 emit_move_insn (scratch3, addr);
24848 eoschar = force_reg (QImode, eoschar);
24849
24850 src = replace_equiv_address_nv (src, scratch3);
24851
24852 /* If .md starts supporting :P, this can be done in .md. */
24853 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
24854 scratch4), UNSPEC_SCAS);
24855 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
24856 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
24857 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
24858 }
24859 return true;
24860 }
24861
24862 /* For given symbol (function) construct code to compute address of it's PLT
24863 entry in large x86-64 PIC model. */
24864 static rtx
24865 construct_plt_address (rtx symbol)
24866 {
24867 rtx tmp, unspec;
24868
24869 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
24870 gcc_assert (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF);
24871 gcc_assert (Pmode == DImode);
24872
24873 tmp = gen_reg_rtx (Pmode);
24874 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
24875
24876 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
24877 emit_insn (ix86_gen_add3 (tmp, tmp, pic_offset_table_rtx));
24878 return tmp;
24879 }
24880
24881 rtx
24882 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
24883 rtx callarg2,
24884 rtx pop, bool sibcall)
24885 {
24886 unsigned int const cregs_size
24887 = ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers);
24888 rtx vec[3 + cregs_size];
24889 rtx use = NULL, call;
24890 unsigned int vec_len = 0;
24891
24892 if (pop == const0_rtx)
24893 pop = NULL;
24894 gcc_assert (!TARGET_64BIT || !pop);
24895
24896 if (TARGET_MACHO && !TARGET_64BIT)
24897 {
24898 #if TARGET_MACHO
24899 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
24900 fnaddr = machopic_indirect_call_target (fnaddr);
24901 #endif
24902 }
24903 else
24904 {
24905 /* Static functions and indirect calls don't need the pic register. */
24906 if (flag_pic
24907 && (!TARGET_64BIT
24908 || (ix86_cmodel == CM_LARGE_PIC
24909 && DEFAULT_ABI != MS_ABI))
24910 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
24911 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
24912 use_reg (&use, pic_offset_table_rtx);
24913 }
24914
24915 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
24916 {
24917 rtx al = gen_rtx_REG (QImode, AX_REG);
24918 emit_move_insn (al, callarg2);
24919 use_reg (&use, al);
24920 }
24921
24922 if (ix86_cmodel == CM_LARGE_PIC
24923 && !TARGET_PECOFF
24924 && MEM_P (fnaddr)
24925 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
24926 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
24927 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
24928 else if (sibcall
24929 ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
24930 : !call_insn_operand (XEXP (fnaddr, 0), word_mode))
24931 {
24932 fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
24933 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));
24934 }
24935
24936 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
24937 if (retval)
24938 call = gen_rtx_SET (VOIDmode, retval, call);
24939 vec[vec_len++] = call;
24940
24941 if (pop)
24942 {
24943 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
24944 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
24945 vec[vec_len++] = pop;
24946 }
24947
24948 if (TARGET_64BIT_MS_ABI
24949 && (!callarg2 || INTVAL (callarg2) != -2))
24950 {
24951 unsigned i;
24952
24953 vec[vec_len++] = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
24954 UNSPEC_MS_TO_SYSV_CALL);
24955
24956 for (i = 0; i < cregs_size; i++)
24957 {
24958 int regno = x86_64_ms_sysv_extra_clobbered_registers[i];
24959 enum machine_mode mode = SSE_REGNO_P (regno) ? TImode : DImode;
24960
24961 vec[vec_len++]
24962 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (mode, regno));
24963 }
24964 }
24965
24966 if (vec_len > 1)
24967 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
24968 call = emit_call_insn (call);
24969 if (use)
24970 CALL_INSN_FUNCTION_USAGE (call) = use;
24971
24972 return call;
24973 }
24974
24975 /* Output the assembly for a call instruction. */
24976
24977 const char *
24978 ix86_output_call_insn (rtx insn, rtx call_op)
24979 {
24980 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
24981 bool seh_nop_p = false;
24982 const char *xasm;
24983
24984 if (SIBLING_CALL_P (insn))
24985 {
24986 if (direct_p)
24987 xasm = "jmp\t%P0";
24988 /* SEH epilogue detection requires the indirect branch case
24989 to include REX.W. */
24990 else if (TARGET_SEH)
24991 xasm = "rex.W jmp %A0";
24992 else
24993 xasm = "jmp\t%A0";
24994
24995 output_asm_insn (xasm, &call_op);
24996 return "";
24997 }
24998
24999 /* SEH unwinding can require an extra nop to be emitted in several
25000 circumstances. Determine if we have one of those. */
25001 if (TARGET_SEH)
25002 {
25003 rtx i;
25004
25005 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
25006 {
25007 /* If we get to another real insn, we don't need the nop. */
25008 if (INSN_P (i))
25009 break;
25010
25011 /* If we get to the epilogue note, prevent a catch region from
25012 being adjacent to the standard epilogue sequence. If non-
25013 call-exceptions, we'll have done this during epilogue emission. */
25014 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
25015 && !flag_non_call_exceptions
25016 && !can_throw_internal (insn))
25017 {
25018 seh_nop_p = true;
25019 break;
25020 }
25021 }
25022
25023 /* If we didn't find a real insn following the call, prevent the
25024 unwinder from looking into the next function. */
25025 if (i == NULL)
25026 seh_nop_p = true;
25027 }
25028
25029 if (direct_p)
25030 xasm = "call\t%P0";
25031 else
25032 xasm = "call\t%A0";
25033
25034 output_asm_insn (xasm, &call_op);
25035
25036 if (seh_nop_p)
25037 return "nop";
25038
25039 return "";
25040 }
25041 \f
25042 /* Clear stack slot assignments remembered from previous functions.
25043 This is called from INIT_EXPANDERS once before RTL is emitted for each
25044 function. */
25045
25046 static struct machine_function *
25047 ix86_init_machine_status (void)
25048 {
25049 struct machine_function *f;
25050
25051 f = ggc_cleared_alloc<machine_function> ();
25052 f->use_fast_prologue_epilogue_nregs = -1;
25053 f->call_abi = ix86_abi;
25054
25055 return f;
25056 }
25057
25058 /* Return a MEM corresponding to a stack slot with mode MODE.
25059 Allocate a new slot if necessary.
25060
25061 The RTL for a function can have several slots available: N is
25062 which slot to use. */
25063
25064 rtx
25065 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
25066 {
25067 struct stack_local_entry *s;
25068
25069 gcc_assert (n < MAX_386_STACK_LOCALS);
25070
25071 for (s = ix86_stack_locals; s; s = s->next)
25072 if (s->mode == mode && s->n == n)
25073 return validize_mem (copy_rtx (s->rtl));
25074
25075 s = ggc_alloc<stack_local_entry> ();
25076 s->n = n;
25077 s->mode = mode;
25078 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
25079
25080 s->next = ix86_stack_locals;
25081 ix86_stack_locals = s;
25082 return validize_mem (copy_rtx (s->rtl));
25083 }
25084
25085 static void
25086 ix86_instantiate_decls (void)
25087 {
25088 struct stack_local_entry *s;
25089
25090 for (s = ix86_stack_locals; s; s = s->next)
25091 if (s->rtl != NULL_RTX)
25092 instantiate_decl_rtl (s->rtl);
25093 }
25094 \f
25095 /* Check whether x86 address PARTS is a pc-relative address. */
25096
25097 static bool
25098 rip_relative_addr_p (struct ix86_address *parts)
25099 {
25100 rtx base, index, disp;
25101
25102 base = parts->base;
25103 index = parts->index;
25104 disp = parts->disp;
25105
25106 if (disp && !base && !index)
25107 {
25108 if (TARGET_64BIT)
25109 {
25110 rtx symbol = disp;
25111
25112 if (GET_CODE (disp) == CONST)
25113 symbol = XEXP (disp, 0);
25114 if (GET_CODE (symbol) == PLUS
25115 && CONST_INT_P (XEXP (symbol, 1)))
25116 symbol = XEXP (symbol, 0);
25117
25118 if (GET_CODE (symbol) == LABEL_REF
25119 || (GET_CODE (symbol) == SYMBOL_REF
25120 && SYMBOL_REF_TLS_MODEL (symbol) == 0)
25121 || (GET_CODE (symbol) == UNSPEC
25122 && (XINT (symbol, 1) == UNSPEC_GOTPCREL
25123 || XINT (symbol, 1) == UNSPEC_PCREL
25124 || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
25125 return true;
25126 }
25127 }
25128 return false;
25129 }
25130
25131 /* Calculate the length of the memory address in the instruction encoding.
25132 Includes addr32 prefix, does not include the one-byte modrm, opcode,
25133 or other prefixes. We never generate addr32 prefix for LEA insn. */
25134
25135 int
25136 memory_address_length (rtx addr, bool lea)
25137 {
25138 struct ix86_address parts;
25139 rtx base, index, disp;
25140 int len;
25141 int ok;
25142
25143 if (GET_CODE (addr) == PRE_DEC
25144 || GET_CODE (addr) == POST_INC
25145 || GET_CODE (addr) == PRE_MODIFY
25146 || GET_CODE (addr) == POST_MODIFY)
25147 return 0;
25148
25149 ok = ix86_decompose_address (addr, &parts);
25150 gcc_assert (ok);
25151
25152 len = (parts.seg == SEG_DEFAULT) ? 0 : 1;
25153
25154 /* If this is not LEA instruction, add the length of addr32 prefix. */
25155 if (TARGET_64BIT && !lea
25156 && (SImode_address_operand (addr, VOIDmode)
25157 || (parts.base && GET_MODE (parts.base) == SImode)
25158 || (parts.index && GET_MODE (parts.index) == SImode)))
25159 len++;
25160
25161 base = parts.base;
25162 index = parts.index;
25163 disp = parts.disp;
25164
25165 if (base && GET_CODE (base) == SUBREG)
25166 base = SUBREG_REG (base);
25167 if (index && GET_CODE (index) == SUBREG)
25168 index = SUBREG_REG (index);
25169
25170 gcc_assert (base == NULL_RTX || REG_P (base));
25171 gcc_assert (index == NULL_RTX || REG_P (index));
25172
25173 /* Rule of thumb:
25174 - esp as the base always wants an index,
25175 - ebp as the base always wants a displacement,
25176 - r12 as the base always wants an index,
25177 - r13 as the base always wants a displacement. */
25178
25179 /* Register Indirect. */
25180 if (base && !index && !disp)
25181 {
25182 /* esp (for its index) and ebp (for its displacement) need
25183 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
25184 code. */
25185 if (base == arg_pointer_rtx
25186 || base == frame_pointer_rtx
25187 || REGNO (base) == SP_REG
25188 || REGNO (base) == BP_REG
25189 || REGNO (base) == R12_REG
25190 || REGNO (base) == R13_REG)
25191 len++;
25192 }
25193
25194 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
25195 is not disp32, but disp32(%rip), so for disp32
25196 SIB byte is needed, unless print_operand_address
25197 optimizes it into disp32(%rip) or (%rip) is implied
25198 by UNSPEC. */
25199 else if (disp && !base && !index)
25200 {
25201 len += 4;
25202 if (rip_relative_addr_p (&parts))
25203 len++;
25204 }
25205 else
25206 {
25207 /* Find the length of the displacement constant. */
25208 if (disp)
25209 {
25210 if (base && satisfies_constraint_K (disp))
25211 len += 1;
25212 else
25213 len += 4;
25214 }
25215 /* ebp always wants a displacement. Similarly r13. */
25216 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
25217 len++;
25218
25219 /* An index requires the two-byte modrm form.... */
25220 if (index
25221 /* ...like esp (or r12), which always wants an index. */
25222 || base == arg_pointer_rtx
25223 || base == frame_pointer_rtx
25224 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
25225 len++;
25226 }
25227
25228 return len;
25229 }
25230
25231 /* Compute default value for "length_immediate" attribute. When SHORTFORM
25232 is set, expect that insn have 8bit immediate alternative. */
25233 int
25234 ix86_attr_length_immediate_default (rtx insn, bool shortform)
25235 {
25236 int len = 0;
25237 int i;
25238 extract_insn_cached (insn);
25239 for (i = recog_data.n_operands - 1; i >= 0; --i)
25240 if (CONSTANT_P (recog_data.operand[i]))
25241 {
25242 enum attr_mode mode = get_attr_mode (insn);
25243
25244 gcc_assert (!len);
25245 if (shortform && CONST_INT_P (recog_data.operand[i]))
25246 {
25247 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
25248 switch (mode)
25249 {
25250 case MODE_QI:
25251 len = 1;
25252 continue;
25253 case MODE_HI:
25254 ival = trunc_int_for_mode (ival, HImode);
25255 break;
25256 case MODE_SI:
25257 ival = trunc_int_for_mode (ival, SImode);
25258 break;
25259 default:
25260 break;
25261 }
25262 if (IN_RANGE (ival, -128, 127))
25263 {
25264 len = 1;
25265 continue;
25266 }
25267 }
25268 switch (mode)
25269 {
25270 case MODE_QI:
25271 len = 1;
25272 break;
25273 case MODE_HI:
25274 len = 2;
25275 break;
25276 case MODE_SI:
25277 len = 4;
25278 break;
25279 /* Immediates for DImode instructions are encoded
25280 as 32bit sign extended values. */
25281 case MODE_DI:
25282 len = 4;
25283 break;
25284 default:
25285 fatal_insn ("unknown insn mode", insn);
25286 }
25287 }
25288 return len;
25289 }
25290
25291 /* Compute default value for "length_address" attribute. */
25292 int
25293 ix86_attr_length_address_default (rtx insn)
25294 {
25295 int i;
25296
25297 if (get_attr_type (insn) == TYPE_LEA)
25298 {
25299 rtx set = PATTERN (insn), addr;
25300
25301 if (GET_CODE (set) == PARALLEL)
25302 set = XVECEXP (set, 0, 0);
25303
25304 gcc_assert (GET_CODE (set) == SET);
25305
25306 addr = SET_SRC (set);
25307
25308 return memory_address_length (addr, true);
25309 }
25310
25311 extract_insn_cached (insn);
25312 for (i = recog_data.n_operands - 1; i >= 0; --i)
25313 if (MEM_P (recog_data.operand[i]))
25314 {
25315 constrain_operands_cached (reload_completed);
25316 if (which_alternative != -1)
25317 {
25318 const char *constraints = recog_data.constraints[i];
25319 int alt = which_alternative;
25320
25321 while (*constraints == '=' || *constraints == '+')
25322 constraints++;
25323 while (alt-- > 0)
25324 while (*constraints++ != ',')
25325 ;
25326 /* Skip ignored operands. */
25327 if (*constraints == 'X')
25328 continue;
25329 }
25330 return memory_address_length (XEXP (recog_data.operand[i], 0), false);
25331 }
25332 return 0;
25333 }
25334
25335 /* Compute default value for "length_vex" attribute. It includes
25336 2 or 3 byte VEX prefix and 1 opcode byte. */
25337
25338 int
25339 ix86_attr_length_vex_default (rtx insn, bool has_0f_opcode, bool has_vex_w)
25340 {
25341 int i;
25342
25343 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
25344 byte VEX prefix. */
25345 if (!has_0f_opcode || has_vex_w)
25346 return 3 + 1;
25347
25348 /* We can always use 2 byte VEX prefix in 32bit. */
25349 if (!TARGET_64BIT)
25350 return 2 + 1;
25351
25352 extract_insn_cached (insn);
25353
25354 for (i = recog_data.n_operands - 1; i >= 0; --i)
25355 if (REG_P (recog_data.operand[i]))
25356 {
25357 /* REX.W bit uses 3 byte VEX prefix. */
25358 if (GET_MODE (recog_data.operand[i]) == DImode
25359 && GENERAL_REG_P (recog_data.operand[i]))
25360 return 3 + 1;
25361 }
25362 else
25363 {
25364 /* REX.X or REX.B bits use 3 byte VEX prefix. */
25365 if (MEM_P (recog_data.operand[i])
25366 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
25367 return 3 + 1;
25368 }
25369
25370 return 2 + 1;
25371 }
25372 \f
25373 /* Return the maximum number of instructions a cpu can issue. */
25374
25375 static int
25376 ix86_issue_rate (void)
25377 {
25378 switch (ix86_tune)
25379 {
25380 case PROCESSOR_PENTIUM:
25381 case PROCESSOR_BONNELL:
25382 case PROCESSOR_SILVERMONT:
25383 case PROCESSOR_INTEL:
25384 case PROCESSOR_K6:
25385 case PROCESSOR_BTVER2:
25386 case PROCESSOR_PENTIUM4:
25387 case PROCESSOR_NOCONA:
25388 return 2;
25389
25390 case PROCESSOR_PENTIUMPRO:
25391 case PROCESSOR_ATHLON:
25392 case PROCESSOR_K8:
25393 case PROCESSOR_AMDFAM10:
25394 case PROCESSOR_GENERIC:
25395 case PROCESSOR_BTVER1:
25396 return 3;
25397
25398 case PROCESSOR_BDVER1:
25399 case PROCESSOR_BDVER2:
25400 case PROCESSOR_BDVER3:
25401 case PROCESSOR_BDVER4:
25402 case PROCESSOR_CORE2:
25403 case PROCESSOR_NEHALEM:
25404 case PROCESSOR_SANDYBRIDGE:
25405 case PROCESSOR_HASWELL:
25406 return 4;
25407
25408 default:
25409 return 1;
25410 }
25411 }
25412
25413 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
25414 by DEP_INSN and nothing set by DEP_INSN. */
25415
25416 static bool
25417 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
25418 {
25419 rtx set, set2;
25420
25421 /* Simplify the test for uninteresting insns. */
25422 if (insn_type != TYPE_SETCC
25423 && insn_type != TYPE_ICMOV
25424 && insn_type != TYPE_FCMOV
25425 && insn_type != TYPE_IBR)
25426 return false;
25427
25428 if ((set = single_set (dep_insn)) != 0)
25429 {
25430 set = SET_DEST (set);
25431 set2 = NULL_RTX;
25432 }
25433 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
25434 && XVECLEN (PATTERN (dep_insn), 0) == 2
25435 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
25436 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
25437 {
25438 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
25439 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
25440 }
25441 else
25442 return false;
25443
25444 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
25445 return false;
25446
25447 /* This test is true if the dependent insn reads the flags but
25448 not any other potentially set register. */
25449 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
25450 return false;
25451
25452 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
25453 return false;
25454
25455 return true;
25456 }
25457
25458 /* Return true iff USE_INSN has a memory address with operands set by
25459 SET_INSN. */
25460
25461 bool
25462 ix86_agi_dependent (rtx set_insn, rtx use_insn)
25463 {
25464 int i;
25465 extract_insn_cached (use_insn);
25466 for (i = recog_data.n_operands - 1; i >= 0; --i)
25467 if (MEM_P (recog_data.operand[i]))
25468 {
25469 rtx addr = XEXP (recog_data.operand[i], 0);
25470 return modified_in_p (addr, set_insn) != 0;
25471 }
25472 return false;
25473 }
25474
25475 /* Helper function for exact_store_load_dependency.
25476 Return true if addr is found in insn. */
25477 static bool
25478 exact_dependency_1 (rtx addr, rtx insn)
25479 {
25480 enum rtx_code code;
25481 const char *format_ptr;
25482 int i, j;
25483
25484 code = GET_CODE (insn);
25485 switch (code)
25486 {
25487 case MEM:
25488 if (rtx_equal_p (addr, insn))
25489 return true;
25490 break;
25491 case REG:
25492 CASE_CONST_ANY:
25493 case SYMBOL_REF:
25494 case CODE_LABEL:
25495 case PC:
25496 case CC0:
25497 case EXPR_LIST:
25498 return false;
25499 default:
25500 break;
25501 }
25502
25503 format_ptr = GET_RTX_FORMAT (code);
25504 for (i = 0; i < GET_RTX_LENGTH (code); i++)
25505 {
25506 switch (*format_ptr++)
25507 {
25508 case 'e':
25509 if (exact_dependency_1 (addr, XEXP (insn, i)))
25510 return true;
25511 break;
25512 case 'E':
25513 for (j = 0; j < XVECLEN (insn, i); j++)
25514 if (exact_dependency_1 (addr, XVECEXP (insn, i, j)))
25515 return true;
25516 break;
25517 }
25518 }
25519 return false;
25520 }
25521
25522 /* Return true if there exists exact dependency for store & load, i.e.
25523 the same memory address is used in them. */
25524 static bool
25525 exact_store_load_dependency (rtx store, rtx load)
25526 {
25527 rtx set1, set2;
25528
25529 set1 = single_set (store);
25530 if (!set1)
25531 return false;
25532 if (!MEM_P (SET_DEST (set1)))
25533 return false;
25534 set2 = single_set (load);
25535 if (!set2)
25536 return false;
25537 if (exact_dependency_1 (SET_DEST (set1), SET_SRC (set2)))
25538 return true;
25539 return false;
25540 }
25541
25542 static int
25543 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
25544 {
25545 enum attr_type insn_type, dep_insn_type;
25546 enum attr_memory memory;
25547 rtx set, set2;
25548 int dep_insn_code_number;
25549
25550 /* Anti and output dependencies have zero cost on all CPUs. */
25551 if (REG_NOTE_KIND (link) != 0)
25552 return 0;
25553
25554 dep_insn_code_number = recog_memoized (dep_insn);
25555
25556 /* If we can't recognize the insns, we can't really do anything. */
25557 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
25558 return cost;
25559
25560 insn_type = get_attr_type (insn);
25561 dep_insn_type = get_attr_type (dep_insn);
25562
25563 switch (ix86_tune)
25564 {
25565 case PROCESSOR_PENTIUM:
25566 /* Address Generation Interlock adds a cycle of latency. */
25567 if (insn_type == TYPE_LEA)
25568 {
25569 rtx addr = PATTERN (insn);
25570
25571 if (GET_CODE (addr) == PARALLEL)
25572 addr = XVECEXP (addr, 0, 0);
25573
25574 gcc_assert (GET_CODE (addr) == SET);
25575
25576 addr = SET_SRC (addr);
25577 if (modified_in_p (addr, dep_insn))
25578 cost += 1;
25579 }
25580 else if (ix86_agi_dependent (dep_insn, insn))
25581 cost += 1;
25582
25583 /* ??? Compares pair with jump/setcc. */
25584 if (ix86_flags_dependent (insn, dep_insn, insn_type))
25585 cost = 0;
25586
25587 /* Floating point stores require value to be ready one cycle earlier. */
25588 if (insn_type == TYPE_FMOV
25589 && get_attr_memory (insn) == MEMORY_STORE
25590 && !ix86_agi_dependent (dep_insn, insn))
25591 cost += 1;
25592 break;
25593
25594 case PROCESSOR_PENTIUMPRO:
25595 /* INT->FP conversion is expensive. */
25596 if (get_attr_fp_int_src (dep_insn))
25597 cost += 5;
25598
25599 /* There is one cycle extra latency between an FP op and a store. */
25600 if (insn_type == TYPE_FMOV
25601 && (set = single_set (dep_insn)) != NULL_RTX
25602 && (set2 = single_set (insn)) != NULL_RTX
25603 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
25604 && MEM_P (SET_DEST (set2)))
25605 cost += 1;
25606
25607 memory = get_attr_memory (insn);
25608
25609 /* Show ability of reorder buffer to hide latency of load by executing
25610 in parallel with previous instruction in case
25611 previous instruction is not needed to compute the address. */
25612 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25613 && !ix86_agi_dependent (dep_insn, insn))
25614 {
25615 /* Claim moves to take one cycle, as core can issue one load
25616 at time and the next load can start cycle later. */
25617 if (dep_insn_type == TYPE_IMOV
25618 || dep_insn_type == TYPE_FMOV)
25619 cost = 1;
25620 else if (cost > 1)
25621 cost--;
25622 }
25623 break;
25624
25625 case PROCESSOR_K6:
25626 /* The esp dependency is resolved before
25627 the instruction is really finished. */
25628 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25629 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25630 return 1;
25631
25632 /* INT->FP conversion is expensive. */
25633 if (get_attr_fp_int_src (dep_insn))
25634 cost += 5;
25635
25636 memory = get_attr_memory (insn);
25637
25638 /* Show ability of reorder buffer to hide latency of load by executing
25639 in parallel with previous instruction in case
25640 previous instruction is not needed to compute the address. */
25641 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25642 && !ix86_agi_dependent (dep_insn, insn))
25643 {
25644 /* Claim moves to take one cycle, as core can issue one load
25645 at time and the next load can start cycle later. */
25646 if (dep_insn_type == TYPE_IMOV
25647 || dep_insn_type == TYPE_FMOV)
25648 cost = 1;
25649 else if (cost > 2)
25650 cost -= 2;
25651 else
25652 cost = 1;
25653 }
25654 break;
25655
25656 case PROCESSOR_AMDFAM10:
25657 case PROCESSOR_BDVER1:
25658 case PROCESSOR_BDVER2:
25659 case PROCESSOR_BDVER3:
25660 case PROCESSOR_BDVER4:
25661 case PROCESSOR_BTVER1:
25662 case PROCESSOR_BTVER2:
25663 case PROCESSOR_GENERIC:
25664 /* Stack engine allows to execute push&pop instructions in parall. */
25665 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25666 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25667 return 0;
25668 /* FALLTHRU */
25669
25670 case PROCESSOR_ATHLON:
25671 case PROCESSOR_K8:
25672 memory = get_attr_memory (insn);
25673
25674 /* Show ability of reorder buffer to hide latency of load by executing
25675 in parallel with previous instruction in case
25676 previous instruction is not needed to compute the address. */
25677 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25678 && !ix86_agi_dependent (dep_insn, insn))
25679 {
25680 enum attr_unit unit = get_attr_unit (insn);
25681 int loadcost = 3;
25682
25683 /* Because of the difference between the length of integer and
25684 floating unit pipeline preparation stages, the memory operands
25685 for floating point are cheaper.
25686
25687 ??? For Athlon it the difference is most probably 2. */
25688 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
25689 loadcost = 3;
25690 else
25691 loadcost = TARGET_ATHLON ? 2 : 0;
25692
25693 if (cost >= loadcost)
25694 cost -= loadcost;
25695 else
25696 cost = 0;
25697 }
25698 break;
25699
25700 case PROCESSOR_CORE2:
25701 case PROCESSOR_NEHALEM:
25702 case PROCESSOR_SANDYBRIDGE:
25703 case PROCESSOR_HASWELL:
25704 /* Stack engine allows to execute push&pop instructions in parall. */
25705 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25706 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25707 return 0;
25708
25709 memory = get_attr_memory (insn);
25710
25711 /* Show ability of reorder buffer to hide latency of load by executing
25712 in parallel with previous instruction in case
25713 previous instruction is not needed to compute the address. */
25714 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25715 && !ix86_agi_dependent (dep_insn, insn))
25716 {
25717 if (cost >= 4)
25718 cost -= 4;
25719 else
25720 cost = 0;
25721 }
25722 break;
25723
25724 case PROCESSOR_SILVERMONT:
25725 case PROCESSOR_INTEL:
25726 if (!reload_completed)
25727 return cost;
25728
25729 /* Increase cost of integer loads. */
25730 memory = get_attr_memory (dep_insn);
25731 if (memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25732 {
25733 enum attr_unit unit = get_attr_unit (dep_insn);
25734 if (unit == UNIT_INTEGER && cost == 1)
25735 {
25736 if (memory == MEMORY_LOAD)
25737 cost = 3;
25738 else
25739 {
25740 /* Increase cost of ld/st for short int types only
25741 because of store forwarding issue. */
25742 rtx set = single_set (dep_insn);
25743 if (set && (GET_MODE (SET_DEST (set)) == QImode
25744 || GET_MODE (SET_DEST (set)) == HImode))
25745 {
25746 /* Increase cost of store/load insn if exact
25747 dependence exists and it is load insn. */
25748 enum attr_memory insn_memory = get_attr_memory (insn);
25749 if (insn_memory == MEMORY_LOAD
25750 && exact_store_load_dependency (dep_insn, insn))
25751 cost = 3;
25752 }
25753 }
25754 }
25755 }
25756
25757 default:
25758 break;
25759 }
25760
25761 return cost;
25762 }
25763
25764 /* How many alternative schedules to try. This should be as wide as the
25765 scheduling freedom in the DFA, but no wider. Making this value too
25766 large results extra work for the scheduler. */
25767
25768 static int
25769 ia32_multipass_dfa_lookahead (void)
25770 {
25771 switch (ix86_tune)
25772 {
25773 case PROCESSOR_PENTIUM:
25774 return 2;
25775
25776 case PROCESSOR_PENTIUMPRO:
25777 case PROCESSOR_K6:
25778 return 1;
25779
25780 case PROCESSOR_BDVER1:
25781 case PROCESSOR_BDVER2:
25782 case PROCESSOR_BDVER3:
25783 case PROCESSOR_BDVER4:
25784 /* We use lookahead value 4 for BD both before and after reload
25785 schedules. Plan is to have value 8 included for O3. */
25786 return 4;
25787
25788 case PROCESSOR_CORE2:
25789 case PROCESSOR_NEHALEM:
25790 case PROCESSOR_SANDYBRIDGE:
25791 case PROCESSOR_HASWELL:
25792 case PROCESSOR_BONNELL:
25793 case PROCESSOR_SILVERMONT:
25794 case PROCESSOR_INTEL:
25795 /* Generally, we want haifa-sched:max_issue() to look ahead as far
25796 as many instructions can be executed on a cycle, i.e.,
25797 issue_rate. I wonder why tuning for many CPUs does not do this. */
25798 if (reload_completed)
25799 return ix86_issue_rate ();
25800 /* Don't use lookahead for pre-reload schedule to save compile time. */
25801 return 0;
25802
25803 default:
25804 return 0;
25805 }
25806 }
25807
25808 /* Return true if target platform supports macro-fusion. */
25809
25810 static bool
25811 ix86_macro_fusion_p ()
25812 {
25813 return TARGET_FUSE_CMP_AND_BRANCH;
25814 }
25815
25816 /* Check whether current microarchitecture support macro fusion
25817 for insn pair "CONDGEN + CONDJMP". Refer to
25818 "Intel Architectures Optimization Reference Manual". */
25819
25820 static bool
25821 ix86_macro_fusion_pair_p (rtx condgen, rtx condjmp)
25822 {
25823 rtx src, dest;
25824 rtx single_set = single_set (condgen);
25825 enum rtx_code ccode;
25826 rtx compare_set = NULL_RTX, test_if, cond;
25827 rtx alu_set = NULL_RTX, addr = NULL_RTX;
25828
25829 if (!any_condjump_p (condjmp))
25830 return false;
25831
25832 if (get_attr_type (condgen) != TYPE_TEST
25833 && get_attr_type (condgen) != TYPE_ICMP
25834 && get_attr_type (condgen) != TYPE_INCDEC
25835 && get_attr_type (condgen) != TYPE_ALU)
25836 return false;
25837
25838 if (single_set == NULL_RTX
25839 && !TARGET_FUSE_ALU_AND_BRANCH)
25840 return false;
25841
25842 if (single_set != NULL_RTX)
25843 compare_set = single_set;
25844 else
25845 {
25846 int i;
25847 rtx pat = PATTERN (condgen);
25848 for (i = 0; i < XVECLEN (pat, 0); i++)
25849 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
25850 {
25851 rtx set_src = SET_SRC (XVECEXP (pat, 0, i));
25852 if (GET_CODE (set_src) == COMPARE)
25853 compare_set = XVECEXP (pat, 0, i);
25854 else
25855 alu_set = XVECEXP (pat, 0, i);
25856 }
25857 }
25858 if (compare_set == NULL_RTX)
25859 return false;
25860 src = SET_SRC (compare_set);
25861 if (GET_CODE (src) != COMPARE)
25862 return false;
25863
25864 /* Macro-fusion for cmp/test MEM-IMM + conditional jmp is not
25865 supported. */
25866 if ((MEM_P (XEXP (src, 0))
25867 && CONST_INT_P (XEXP (src, 1)))
25868 || (MEM_P (XEXP (src, 1))
25869 && CONST_INT_P (XEXP (src, 0))))
25870 return false;
25871
25872 /* No fusion for RIP-relative address. */
25873 if (MEM_P (XEXP (src, 0)))
25874 addr = XEXP (XEXP (src, 0), 0);
25875 else if (MEM_P (XEXP (src, 1)))
25876 addr = XEXP (XEXP (src, 1), 0);
25877
25878 if (addr) {
25879 ix86_address parts;
25880 int ok = ix86_decompose_address (addr, &parts);
25881 gcc_assert (ok);
25882
25883 if (rip_relative_addr_p (&parts))
25884 return false;
25885 }
25886
25887 test_if = SET_SRC (pc_set (condjmp));
25888 cond = XEXP (test_if, 0);
25889 ccode = GET_CODE (cond);
25890 /* Check whether conditional jump use Sign or Overflow Flags. */
25891 if (!TARGET_FUSE_CMP_AND_BRANCH_SOFLAGS
25892 && (ccode == GE
25893 || ccode == GT
25894 || ccode == LE
25895 || ccode == LT))
25896 return false;
25897
25898 /* Return true for TYPE_TEST and TYPE_ICMP. */
25899 if (get_attr_type (condgen) == TYPE_TEST
25900 || get_attr_type (condgen) == TYPE_ICMP)
25901 return true;
25902
25903 /* The following is the case that macro-fusion for alu + jmp. */
25904 if (!TARGET_FUSE_ALU_AND_BRANCH || !alu_set)
25905 return false;
25906
25907 /* No fusion for alu op with memory destination operand. */
25908 dest = SET_DEST (alu_set);
25909 if (MEM_P (dest))
25910 return false;
25911
25912 /* Macro-fusion for inc/dec + unsigned conditional jump is not
25913 supported. */
25914 if (get_attr_type (condgen) == TYPE_INCDEC
25915 && (ccode == GEU
25916 || ccode == GTU
25917 || ccode == LEU
25918 || ccode == LTU))
25919 return false;
25920
25921 return true;
25922 }
25923
25924 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
25925 execution. It is applied if
25926 (1) IMUL instruction is on the top of list;
25927 (2) There exists the only producer of independent IMUL instruction in
25928 ready list.
25929 Return index of IMUL producer if it was found and -1 otherwise. */
25930 static int
25931 do_reorder_for_imul (rtx *ready, int n_ready)
25932 {
25933 rtx insn, set, insn1, insn2;
25934 sd_iterator_def sd_it;
25935 dep_t dep;
25936 int index = -1;
25937 int i;
25938
25939 if (!TARGET_BONNELL)
25940 return index;
25941
25942 /* Check that IMUL instruction is on the top of ready list. */
25943 insn = ready[n_ready - 1];
25944 set = single_set (insn);
25945 if (!set)
25946 return index;
25947 if (!(GET_CODE (SET_SRC (set)) == MULT
25948 && GET_MODE (SET_SRC (set)) == SImode))
25949 return index;
25950
25951 /* Search for producer of independent IMUL instruction. */
25952 for (i = n_ready - 2; i >= 0; i--)
25953 {
25954 insn = ready[i];
25955 if (!NONDEBUG_INSN_P (insn))
25956 continue;
25957 /* Skip IMUL instruction. */
25958 insn2 = PATTERN (insn);
25959 if (GET_CODE (insn2) == PARALLEL)
25960 insn2 = XVECEXP (insn2, 0, 0);
25961 if (GET_CODE (insn2) == SET
25962 && GET_CODE (SET_SRC (insn2)) == MULT
25963 && GET_MODE (SET_SRC (insn2)) == SImode)
25964 continue;
25965
25966 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
25967 {
25968 rtx con;
25969 con = DEP_CON (dep);
25970 if (!NONDEBUG_INSN_P (con))
25971 continue;
25972 insn1 = PATTERN (con);
25973 if (GET_CODE (insn1) == PARALLEL)
25974 insn1 = XVECEXP (insn1, 0, 0);
25975
25976 if (GET_CODE (insn1) == SET
25977 && GET_CODE (SET_SRC (insn1)) == MULT
25978 && GET_MODE (SET_SRC (insn1)) == SImode)
25979 {
25980 sd_iterator_def sd_it1;
25981 dep_t dep1;
25982 /* Check if there is no other dependee for IMUL. */
25983 index = i;
25984 FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1)
25985 {
25986 rtx pro;
25987 pro = DEP_PRO (dep1);
25988 if (!NONDEBUG_INSN_P (pro))
25989 continue;
25990 if (pro != insn)
25991 index = -1;
25992 }
25993 if (index >= 0)
25994 break;
25995 }
25996 }
25997 if (index >= 0)
25998 break;
25999 }
26000 return index;
26001 }
26002
26003 /* Try to find the best candidate on the top of ready list if two insns
26004 have the same priority - candidate is best if its dependees were
26005 scheduled earlier. Applied for Silvermont only.
26006 Return true if top 2 insns must be interchanged. */
26007 static bool
26008 swap_top_of_ready_list (rtx *ready, int n_ready)
26009 {
26010 rtx top = ready[n_ready - 1];
26011 rtx next = ready[n_ready - 2];
26012 rtx set;
26013 sd_iterator_def sd_it;
26014 dep_t dep;
26015 int clock1 = -1;
26016 int clock2 = -1;
26017 #define INSN_TICK(INSN) (HID (INSN)->tick)
26018
26019 if (!TARGET_SILVERMONT && !TARGET_INTEL)
26020 return false;
26021
26022 if (!NONDEBUG_INSN_P (top))
26023 return false;
26024 if (!NONJUMP_INSN_P (top))
26025 return false;
26026 if (!NONDEBUG_INSN_P (next))
26027 return false;
26028 if (!NONJUMP_INSN_P (next))
26029 return false;
26030 set = single_set (top);
26031 if (!set)
26032 return false;
26033 set = single_set (next);
26034 if (!set)
26035 return false;
26036
26037 if (INSN_PRIORITY_KNOWN (top) && INSN_PRIORITY_KNOWN (next))
26038 {
26039 if (INSN_PRIORITY (top) != INSN_PRIORITY (next))
26040 return false;
26041 /* Determine winner more precise. */
26042 FOR_EACH_DEP (top, SD_LIST_RES_BACK, sd_it, dep)
26043 {
26044 rtx pro;
26045 pro = DEP_PRO (dep);
26046 if (!NONDEBUG_INSN_P (pro))
26047 continue;
26048 if (INSN_TICK (pro) > clock1)
26049 clock1 = INSN_TICK (pro);
26050 }
26051 FOR_EACH_DEP (next, SD_LIST_RES_BACK, sd_it, dep)
26052 {
26053 rtx pro;
26054 pro = DEP_PRO (dep);
26055 if (!NONDEBUG_INSN_P (pro))
26056 continue;
26057 if (INSN_TICK (pro) > clock2)
26058 clock2 = INSN_TICK (pro);
26059 }
26060
26061 if (clock1 == clock2)
26062 {
26063 /* Determine winner - load must win. */
26064 enum attr_memory memory1, memory2;
26065 memory1 = get_attr_memory (top);
26066 memory2 = get_attr_memory (next);
26067 if (memory2 == MEMORY_LOAD && memory1 != MEMORY_LOAD)
26068 return true;
26069 }
26070 return (bool) (clock2 < clock1);
26071 }
26072 return false;
26073 #undef INSN_TICK
26074 }
26075
26076 /* Perform possible reodering of ready list for Atom/Silvermont only.
26077 Return issue rate. */
26078 static int
26079 ix86_sched_reorder (FILE *dump, int sched_verbose, rtx *ready, int *pn_ready,
26080 int clock_var)
26081 {
26082 int issue_rate = -1;
26083 int n_ready = *pn_ready;
26084 int i;
26085 rtx insn;
26086 int index = -1;
26087
26088 /* Set up issue rate. */
26089 issue_rate = ix86_issue_rate ();
26090
26091 /* Do reodering for BONNELL/SILVERMONT only. */
26092 if (!TARGET_BONNELL && !TARGET_SILVERMONT && !TARGET_INTEL)
26093 return issue_rate;
26094
26095 /* Nothing to do if ready list contains only 1 instruction. */
26096 if (n_ready <= 1)
26097 return issue_rate;
26098
26099 /* Do reodering for post-reload scheduler only. */
26100 if (!reload_completed)
26101 return issue_rate;
26102
26103 if ((index = do_reorder_for_imul (ready, n_ready)) >= 0)
26104 {
26105 if (sched_verbose > 1)
26106 fprintf (dump, ";;\tatom sched_reorder: put %d insn on top\n",
26107 INSN_UID (ready[index]));
26108
26109 /* Put IMUL producer (ready[index]) at the top of ready list. */
26110 insn = ready[index];
26111 for (i = index; i < n_ready - 1; i++)
26112 ready[i] = ready[i + 1];
26113 ready[n_ready - 1] = insn;
26114 return issue_rate;
26115 }
26116 if (clock_var != 0 && swap_top_of_ready_list (ready, n_ready))
26117 {
26118 if (sched_verbose > 1)
26119 fprintf (dump, ";;\tslm sched_reorder: swap %d and %d insns\n",
26120 INSN_UID (ready[n_ready - 1]), INSN_UID (ready[n_ready - 2]));
26121 /* Swap 2 top elements of ready list. */
26122 insn = ready[n_ready - 1];
26123 ready[n_ready - 1] = ready[n_ready - 2];
26124 ready[n_ready - 2] = insn;
26125 }
26126 return issue_rate;
26127 }
26128
26129 static bool
26130 ix86_class_likely_spilled_p (reg_class_t);
26131
26132 /* Returns true if lhs of insn is HW function argument register and set up
26133 is_spilled to true if it is likely spilled HW register. */
26134 static bool
26135 insn_is_function_arg (rtx insn, bool* is_spilled)
26136 {
26137 rtx dst;
26138
26139 if (!NONDEBUG_INSN_P (insn))
26140 return false;
26141 /* Call instructions are not movable, ignore it. */
26142 if (CALL_P (insn))
26143 return false;
26144 insn = PATTERN (insn);
26145 if (GET_CODE (insn) == PARALLEL)
26146 insn = XVECEXP (insn, 0, 0);
26147 if (GET_CODE (insn) != SET)
26148 return false;
26149 dst = SET_DEST (insn);
26150 if (REG_P (dst) && HARD_REGISTER_P (dst)
26151 && ix86_function_arg_regno_p (REGNO (dst)))
26152 {
26153 /* Is it likely spilled HW register? */
26154 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
26155 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
26156 *is_spilled = true;
26157 return true;
26158 }
26159 return false;
26160 }
26161
26162 /* Add output dependencies for chain of function adjacent arguments if only
26163 there is a move to likely spilled HW register. Return first argument
26164 if at least one dependence was added or NULL otherwise. */
26165 static rtx
26166 add_parameter_dependencies (rtx call, rtx head)
26167 {
26168 rtx insn;
26169 rtx last = call;
26170 rtx first_arg = NULL;
26171 bool is_spilled = false;
26172
26173 head = PREV_INSN (head);
26174
26175 /* Find nearest to call argument passing instruction. */
26176 while (true)
26177 {
26178 last = PREV_INSN (last);
26179 if (last == head)
26180 return NULL;
26181 if (!NONDEBUG_INSN_P (last))
26182 continue;
26183 if (insn_is_function_arg (last, &is_spilled))
26184 break;
26185 return NULL;
26186 }
26187
26188 first_arg = last;
26189 while (true)
26190 {
26191 insn = PREV_INSN (last);
26192 if (!INSN_P (insn))
26193 break;
26194 if (insn == head)
26195 break;
26196 if (!NONDEBUG_INSN_P (insn))
26197 {
26198 last = insn;
26199 continue;
26200 }
26201 if (insn_is_function_arg (insn, &is_spilled))
26202 {
26203 /* Add output depdendence between two function arguments if chain
26204 of output arguments contains likely spilled HW registers. */
26205 if (is_spilled)
26206 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26207 first_arg = last = insn;
26208 }
26209 else
26210 break;
26211 }
26212 if (!is_spilled)
26213 return NULL;
26214 return first_arg;
26215 }
26216
26217 /* Add output or anti dependency from insn to first_arg to restrict its code
26218 motion. */
26219 static void
26220 avoid_func_arg_motion (rtx first_arg, rtx insn)
26221 {
26222 rtx set;
26223 rtx tmp;
26224
26225 set = single_set (insn);
26226 if (!set)
26227 return;
26228 tmp = SET_DEST (set);
26229 if (REG_P (tmp))
26230 {
26231 /* Add output dependency to the first function argument. */
26232 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26233 return;
26234 }
26235 /* Add anti dependency. */
26236 add_dependence (first_arg, insn, REG_DEP_ANTI);
26237 }
26238
26239 /* Avoid cross block motion of function argument through adding dependency
26240 from the first non-jump instruction in bb. */
26241 static void
26242 add_dependee_for_func_arg (rtx arg, basic_block bb)
26243 {
26244 rtx insn = BB_END (bb);
26245
26246 while (insn)
26247 {
26248 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
26249 {
26250 rtx set = single_set (insn);
26251 if (set)
26252 {
26253 avoid_func_arg_motion (arg, insn);
26254 return;
26255 }
26256 }
26257 if (insn == BB_HEAD (bb))
26258 return;
26259 insn = PREV_INSN (insn);
26260 }
26261 }
26262
26263 /* Hook for pre-reload schedule - avoid motion of function arguments
26264 passed in likely spilled HW registers. */
26265 static void
26266 ix86_dependencies_evaluation_hook (rtx head, rtx tail)
26267 {
26268 rtx insn;
26269 rtx first_arg = NULL;
26270 if (reload_completed)
26271 return;
26272 while (head != tail && DEBUG_INSN_P (head))
26273 head = NEXT_INSN (head);
26274 for (insn = tail; insn != head; insn = PREV_INSN (insn))
26275 if (INSN_P (insn) && CALL_P (insn))
26276 {
26277 first_arg = add_parameter_dependencies (insn, head);
26278 if (first_arg)
26279 {
26280 /* Add dependee for first argument to predecessors if only
26281 region contains more than one block. */
26282 basic_block bb = BLOCK_FOR_INSN (insn);
26283 int rgn = CONTAINING_RGN (bb->index);
26284 int nr_blks = RGN_NR_BLOCKS (rgn);
26285 /* Skip trivial regions and region head blocks that can have
26286 predecessors outside of region. */
26287 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
26288 {
26289 edge e;
26290 edge_iterator ei;
26291
26292 /* Regions are SCCs with the exception of selective
26293 scheduling with pipelining of outer blocks enabled.
26294 So also check that immediate predecessors of a non-head
26295 block are in the same region. */
26296 FOR_EACH_EDGE (e, ei, bb->preds)
26297 {
26298 /* Avoid creating of loop-carried dependencies through
26299 using topological ordering in the region. */
26300 if (rgn == CONTAINING_RGN (e->src->index)
26301 && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
26302 add_dependee_for_func_arg (first_arg, e->src);
26303 }
26304 }
26305 insn = first_arg;
26306 if (insn == head)
26307 break;
26308 }
26309 }
26310 else if (first_arg)
26311 avoid_func_arg_motion (first_arg, insn);
26312 }
26313
26314 /* Hook for pre-reload schedule - set priority of moves from likely spilled
26315 HW registers to maximum, to schedule them at soon as possible. These are
26316 moves from function argument registers at the top of the function entry
26317 and moves from function return value registers after call. */
26318 static int
26319 ix86_adjust_priority (rtx insn, int priority)
26320 {
26321 rtx set;
26322
26323 if (reload_completed)
26324 return priority;
26325
26326 if (!NONDEBUG_INSN_P (insn))
26327 return priority;
26328
26329 set = single_set (insn);
26330 if (set)
26331 {
26332 rtx tmp = SET_SRC (set);
26333 if (REG_P (tmp)
26334 && HARD_REGISTER_P (tmp)
26335 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
26336 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
26337 return current_sched_info->sched_max_insns_priority;
26338 }
26339
26340 return priority;
26341 }
26342
26343 /* Model decoder of Core 2/i7.
26344 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
26345 track the instruction fetch block boundaries and make sure that long
26346 (9+ bytes) instructions are assigned to D0. */
26347
26348 /* Maximum length of an insn that can be handled by
26349 a secondary decoder unit. '8' for Core 2/i7. */
26350 static int core2i7_secondary_decoder_max_insn_size;
26351
26352 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
26353 '16' for Core 2/i7. */
26354 static int core2i7_ifetch_block_size;
26355
26356 /* Maximum number of instructions decoder can handle per cycle.
26357 '6' for Core 2/i7. */
26358 static int core2i7_ifetch_block_max_insns;
26359
26360 typedef struct ix86_first_cycle_multipass_data_ *
26361 ix86_first_cycle_multipass_data_t;
26362 typedef const struct ix86_first_cycle_multipass_data_ *
26363 const_ix86_first_cycle_multipass_data_t;
26364
26365 /* A variable to store target state across calls to max_issue within
26366 one cycle. */
26367 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
26368 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
26369
26370 /* Initialize DATA. */
26371 static void
26372 core2i7_first_cycle_multipass_init (void *_data)
26373 {
26374 ix86_first_cycle_multipass_data_t data
26375 = (ix86_first_cycle_multipass_data_t) _data;
26376
26377 data->ifetch_block_len = 0;
26378 data->ifetch_block_n_insns = 0;
26379 data->ready_try_change = NULL;
26380 data->ready_try_change_size = 0;
26381 }
26382
26383 /* Advancing the cycle; reset ifetch block counts. */
26384 static void
26385 core2i7_dfa_post_advance_cycle (void)
26386 {
26387 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
26388
26389 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
26390
26391 data->ifetch_block_len = 0;
26392 data->ifetch_block_n_insns = 0;
26393 }
26394
26395 static int min_insn_size (rtx);
26396
26397 /* Filter out insns from ready_try that the core will not be able to issue
26398 on current cycle due to decoder. */
26399 static void
26400 core2i7_first_cycle_multipass_filter_ready_try
26401 (const_ix86_first_cycle_multipass_data_t data,
26402 signed char *ready_try, int n_ready, bool first_cycle_insn_p)
26403 {
26404 while (n_ready--)
26405 {
26406 rtx insn;
26407 int insn_size;
26408
26409 if (ready_try[n_ready])
26410 continue;
26411
26412 insn = get_ready_element (n_ready);
26413 insn_size = min_insn_size (insn);
26414
26415 if (/* If this is a too long an insn for a secondary decoder ... */
26416 (!first_cycle_insn_p
26417 && insn_size > core2i7_secondary_decoder_max_insn_size)
26418 /* ... or it would not fit into the ifetch block ... */
26419 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
26420 /* ... or the decoder is full already ... */
26421 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
26422 /* ... mask the insn out. */
26423 {
26424 ready_try[n_ready] = 1;
26425
26426 if (data->ready_try_change)
26427 bitmap_set_bit (data->ready_try_change, n_ready);
26428 }
26429 }
26430 }
26431
26432 /* Prepare for a new round of multipass lookahead scheduling. */
26433 static void
26434 core2i7_first_cycle_multipass_begin (void *_data,
26435 signed char *ready_try, int n_ready,
26436 bool first_cycle_insn_p)
26437 {
26438 ix86_first_cycle_multipass_data_t data
26439 = (ix86_first_cycle_multipass_data_t) _data;
26440 const_ix86_first_cycle_multipass_data_t prev_data
26441 = ix86_first_cycle_multipass_data;
26442
26443 /* Restore the state from the end of the previous round. */
26444 data->ifetch_block_len = prev_data->ifetch_block_len;
26445 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
26446
26447 /* Filter instructions that cannot be issued on current cycle due to
26448 decoder restrictions. */
26449 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
26450 first_cycle_insn_p);
26451 }
26452
26453 /* INSN is being issued in current solution. Account for its impact on
26454 the decoder model. */
26455 static void
26456 core2i7_first_cycle_multipass_issue (void *_data,
26457 signed char *ready_try, int n_ready,
26458 rtx insn, const void *_prev_data)
26459 {
26460 ix86_first_cycle_multipass_data_t data
26461 = (ix86_first_cycle_multipass_data_t) _data;
26462 const_ix86_first_cycle_multipass_data_t prev_data
26463 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
26464
26465 int insn_size = min_insn_size (insn);
26466
26467 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
26468 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
26469 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
26470 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
26471
26472 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
26473 if (!data->ready_try_change)
26474 {
26475 data->ready_try_change = sbitmap_alloc (n_ready);
26476 data->ready_try_change_size = n_ready;
26477 }
26478 else if (data->ready_try_change_size < n_ready)
26479 {
26480 data->ready_try_change = sbitmap_resize (data->ready_try_change,
26481 n_ready, 0);
26482 data->ready_try_change_size = n_ready;
26483 }
26484 bitmap_clear (data->ready_try_change);
26485
26486 /* Filter out insns from ready_try that the core will not be able to issue
26487 on current cycle due to decoder. */
26488 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
26489 false);
26490 }
26491
26492 /* Revert the effect on ready_try. */
26493 static void
26494 core2i7_first_cycle_multipass_backtrack (const void *_data,
26495 signed char *ready_try,
26496 int n_ready ATTRIBUTE_UNUSED)
26497 {
26498 const_ix86_first_cycle_multipass_data_t data
26499 = (const_ix86_first_cycle_multipass_data_t) _data;
26500 unsigned int i = 0;
26501 sbitmap_iterator sbi;
26502
26503 gcc_assert (bitmap_last_set_bit (data->ready_try_change) < n_ready);
26504 EXECUTE_IF_SET_IN_BITMAP (data->ready_try_change, 0, i, sbi)
26505 {
26506 ready_try[i] = 0;
26507 }
26508 }
26509
26510 /* Save the result of multipass lookahead scheduling for the next round. */
26511 static void
26512 core2i7_first_cycle_multipass_end (const void *_data)
26513 {
26514 const_ix86_first_cycle_multipass_data_t data
26515 = (const_ix86_first_cycle_multipass_data_t) _data;
26516 ix86_first_cycle_multipass_data_t next_data
26517 = ix86_first_cycle_multipass_data;
26518
26519 if (data != NULL)
26520 {
26521 next_data->ifetch_block_len = data->ifetch_block_len;
26522 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
26523 }
26524 }
26525
26526 /* Deallocate target data. */
26527 static void
26528 core2i7_first_cycle_multipass_fini (void *_data)
26529 {
26530 ix86_first_cycle_multipass_data_t data
26531 = (ix86_first_cycle_multipass_data_t) _data;
26532
26533 if (data->ready_try_change)
26534 {
26535 sbitmap_free (data->ready_try_change);
26536 data->ready_try_change = NULL;
26537 data->ready_try_change_size = 0;
26538 }
26539 }
26540
26541 /* Prepare for scheduling pass. */
26542 static void
26543 ix86_sched_init_global (FILE *, int, int)
26544 {
26545 /* Install scheduling hooks for current CPU. Some of these hooks are used
26546 in time-critical parts of the scheduler, so we only set them up when
26547 they are actually used. */
26548 switch (ix86_tune)
26549 {
26550 case PROCESSOR_CORE2:
26551 case PROCESSOR_NEHALEM:
26552 case PROCESSOR_SANDYBRIDGE:
26553 case PROCESSOR_HASWELL:
26554 /* Do not perform multipass scheduling for pre-reload schedule
26555 to save compile time. */
26556 if (reload_completed)
26557 {
26558 targetm.sched.dfa_post_advance_cycle
26559 = core2i7_dfa_post_advance_cycle;
26560 targetm.sched.first_cycle_multipass_init
26561 = core2i7_first_cycle_multipass_init;
26562 targetm.sched.first_cycle_multipass_begin
26563 = core2i7_first_cycle_multipass_begin;
26564 targetm.sched.first_cycle_multipass_issue
26565 = core2i7_first_cycle_multipass_issue;
26566 targetm.sched.first_cycle_multipass_backtrack
26567 = core2i7_first_cycle_multipass_backtrack;
26568 targetm.sched.first_cycle_multipass_end
26569 = core2i7_first_cycle_multipass_end;
26570 targetm.sched.first_cycle_multipass_fini
26571 = core2i7_first_cycle_multipass_fini;
26572
26573 /* Set decoder parameters. */
26574 core2i7_secondary_decoder_max_insn_size = 8;
26575 core2i7_ifetch_block_size = 16;
26576 core2i7_ifetch_block_max_insns = 6;
26577 break;
26578 }
26579 /* ... Fall through ... */
26580 default:
26581 targetm.sched.dfa_post_advance_cycle = NULL;
26582 targetm.sched.first_cycle_multipass_init = NULL;
26583 targetm.sched.first_cycle_multipass_begin = NULL;
26584 targetm.sched.first_cycle_multipass_issue = NULL;
26585 targetm.sched.first_cycle_multipass_backtrack = NULL;
26586 targetm.sched.first_cycle_multipass_end = NULL;
26587 targetm.sched.first_cycle_multipass_fini = NULL;
26588 break;
26589 }
26590 }
26591
26592 \f
26593 /* Compute the alignment given to a constant that is being placed in memory.
26594 EXP is the constant and ALIGN is the alignment that the object would
26595 ordinarily have.
26596 The value of this function is used instead of that alignment to align
26597 the object. */
26598
26599 int
26600 ix86_constant_alignment (tree exp, int align)
26601 {
26602 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
26603 || TREE_CODE (exp) == INTEGER_CST)
26604 {
26605 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
26606 return 64;
26607 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
26608 return 128;
26609 }
26610 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
26611 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
26612 return BITS_PER_WORD;
26613
26614 return align;
26615 }
26616
26617 /* Compute the alignment for a static variable.
26618 TYPE is the data type, and ALIGN is the alignment that
26619 the object would ordinarily have. The value of this function is used
26620 instead of that alignment to align the object. */
26621
26622 int
26623 ix86_data_alignment (tree type, int align, bool opt)
26624 {
26625 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
26626 for symbols from other compilation units or symbols that don't need
26627 to bind locally. In order to preserve some ABI compatibility with
26628 those compilers, ensure we don't decrease alignment from what we
26629 used to assume. */
26630
26631 int max_align_compat
26632 = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
26633
26634 /* A data structure, equal or greater than the size of a cache line
26635 (64 bytes in the Pentium 4 and other recent Intel processors, including
26636 processors based on Intel Core microarchitecture) should be aligned
26637 so that its base address is a multiple of a cache line size. */
26638
26639 int max_align
26640 = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
26641
26642 if (max_align < BITS_PER_WORD)
26643 max_align = BITS_PER_WORD;
26644
26645 if (opt
26646 && AGGREGATE_TYPE_P (type)
26647 && TYPE_SIZE (type)
26648 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
26649 {
26650 if (wi::geu_p (TYPE_SIZE (type), max_align_compat)
26651 && align < max_align_compat)
26652 align = max_align_compat;
26653 if (wi::geu_p (TYPE_SIZE (type), max_align)
26654 && align < max_align)
26655 align = max_align;
26656 }
26657
26658 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
26659 to 16byte boundary. */
26660 if (TARGET_64BIT)
26661 {
26662 if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
26663 && TYPE_SIZE (type)
26664 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
26665 && wi::geu_p (TYPE_SIZE (type), 128)
26666 && align < 128)
26667 return 128;
26668 }
26669
26670 if (!opt)
26671 return align;
26672
26673 if (TREE_CODE (type) == ARRAY_TYPE)
26674 {
26675 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
26676 return 64;
26677 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
26678 return 128;
26679 }
26680 else if (TREE_CODE (type) == COMPLEX_TYPE)
26681 {
26682
26683 if (TYPE_MODE (type) == DCmode && align < 64)
26684 return 64;
26685 if ((TYPE_MODE (type) == XCmode
26686 || TYPE_MODE (type) == TCmode) && align < 128)
26687 return 128;
26688 }
26689 else if ((TREE_CODE (type) == RECORD_TYPE
26690 || TREE_CODE (type) == UNION_TYPE
26691 || TREE_CODE (type) == QUAL_UNION_TYPE)
26692 && TYPE_FIELDS (type))
26693 {
26694 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
26695 return 64;
26696 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
26697 return 128;
26698 }
26699 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
26700 || TREE_CODE (type) == INTEGER_TYPE)
26701 {
26702 if (TYPE_MODE (type) == DFmode && align < 64)
26703 return 64;
26704 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
26705 return 128;
26706 }
26707
26708 return align;
26709 }
26710
26711 /* Compute the alignment for a local variable or a stack slot. EXP is
26712 the data type or decl itself, MODE is the widest mode available and
26713 ALIGN is the alignment that the object would ordinarily have. The
26714 value of this macro is used instead of that alignment to align the
26715 object. */
26716
26717 unsigned int
26718 ix86_local_alignment (tree exp, enum machine_mode mode,
26719 unsigned int align)
26720 {
26721 tree type, decl;
26722
26723 if (exp && DECL_P (exp))
26724 {
26725 type = TREE_TYPE (exp);
26726 decl = exp;
26727 }
26728 else
26729 {
26730 type = exp;
26731 decl = NULL;
26732 }
26733
26734 /* Don't do dynamic stack realignment for long long objects with
26735 -mpreferred-stack-boundary=2. */
26736 if (!TARGET_64BIT
26737 && align == 64
26738 && ix86_preferred_stack_boundary < 64
26739 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
26740 && (!type || !TYPE_USER_ALIGN (type))
26741 && (!decl || !DECL_USER_ALIGN (decl)))
26742 align = 32;
26743
26744 /* If TYPE is NULL, we are allocating a stack slot for caller-save
26745 register in MODE. We will return the largest alignment of XF
26746 and DF. */
26747 if (!type)
26748 {
26749 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
26750 align = GET_MODE_ALIGNMENT (DFmode);
26751 return align;
26752 }
26753
26754 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
26755 to 16byte boundary. Exact wording is:
26756
26757 An array uses the same alignment as its elements, except that a local or
26758 global array variable of length at least 16 bytes or
26759 a C99 variable-length array variable always has alignment of at least 16 bytes.
26760
26761 This was added to allow use of aligned SSE instructions at arrays. This
26762 rule is meant for static storage (where compiler can not do the analysis
26763 by itself). We follow it for automatic variables only when convenient.
26764 We fully control everything in the function compiled and functions from
26765 other unit can not rely on the alignment.
26766
26767 Exclude va_list type. It is the common case of local array where
26768 we can not benefit from the alignment.
26769
26770 TODO: Probably one should optimize for size only when var is not escaping. */
26771 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
26772 && TARGET_SSE)
26773 {
26774 if (AGGREGATE_TYPE_P (type)
26775 && (va_list_type_node == NULL_TREE
26776 || (TYPE_MAIN_VARIANT (type)
26777 != TYPE_MAIN_VARIANT (va_list_type_node)))
26778 && TYPE_SIZE (type)
26779 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
26780 && wi::geu_p (TYPE_SIZE (type), 16)
26781 && align < 128)
26782 return 128;
26783 }
26784 if (TREE_CODE (type) == ARRAY_TYPE)
26785 {
26786 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
26787 return 64;
26788 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
26789 return 128;
26790 }
26791 else if (TREE_CODE (type) == COMPLEX_TYPE)
26792 {
26793 if (TYPE_MODE (type) == DCmode && align < 64)
26794 return 64;
26795 if ((TYPE_MODE (type) == XCmode
26796 || TYPE_MODE (type) == TCmode) && align < 128)
26797 return 128;
26798 }
26799 else if ((TREE_CODE (type) == RECORD_TYPE
26800 || TREE_CODE (type) == UNION_TYPE
26801 || TREE_CODE (type) == QUAL_UNION_TYPE)
26802 && TYPE_FIELDS (type))
26803 {
26804 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
26805 return 64;
26806 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
26807 return 128;
26808 }
26809 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
26810 || TREE_CODE (type) == INTEGER_TYPE)
26811 {
26812
26813 if (TYPE_MODE (type) == DFmode && align < 64)
26814 return 64;
26815 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
26816 return 128;
26817 }
26818 return align;
26819 }
26820
26821 /* Compute the minimum required alignment for dynamic stack realignment
26822 purposes for a local variable, parameter or a stack slot. EXP is
26823 the data type or decl itself, MODE is its mode and ALIGN is the
26824 alignment that the object would ordinarily have. */
26825
26826 unsigned int
26827 ix86_minimum_alignment (tree exp, enum machine_mode mode,
26828 unsigned int align)
26829 {
26830 tree type, decl;
26831
26832 if (exp && DECL_P (exp))
26833 {
26834 type = TREE_TYPE (exp);
26835 decl = exp;
26836 }
26837 else
26838 {
26839 type = exp;
26840 decl = NULL;
26841 }
26842
26843 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
26844 return align;
26845
26846 /* Don't do dynamic stack realignment for long long objects with
26847 -mpreferred-stack-boundary=2. */
26848 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
26849 && (!type || !TYPE_USER_ALIGN (type))
26850 && (!decl || !DECL_USER_ALIGN (decl)))
26851 return 32;
26852
26853 return align;
26854 }
26855 \f
26856 /* Find a location for the static chain incoming to a nested function.
26857 This is a register, unless all free registers are used by arguments. */
26858
26859 static rtx
26860 ix86_static_chain (const_tree fndecl, bool incoming_p)
26861 {
26862 unsigned regno;
26863
26864 if (!DECL_STATIC_CHAIN (fndecl))
26865 return NULL;
26866
26867 if (TARGET_64BIT)
26868 {
26869 /* We always use R10 in 64-bit mode. */
26870 regno = R10_REG;
26871 }
26872 else
26873 {
26874 tree fntype;
26875 unsigned int ccvt;
26876
26877 /* By default in 32-bit mode we use ECX to pass the static chain. */
26878 regno = CX_REG;
26879
26880 fntype = TREE_TYPE (fndecl);
26881 ccvt = ix86_get_callcvt (fntype);
26882 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
26883 {
26884 /* Fastcall functions use ecx/edx for arguments, which leaves
26885 us with EAX for the static chain.
26886 Thiscall functions use ecx for arguments, which also
26887 leaves us with EAX for the static chain. */
26888 regno = AX_REG;
26889 }
26890 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
26891 {
26892 /* Thiscall functions use ecx for arguments, which leaves
26893 us with EAX and EDX for the static chain.
26894 We are using for abi-compatibility EAX. */
26895 regno = AX_REG;
26896 }
26897 else if (ix86_function_regparm (fntype, fndecl) == 3)
26898 {
26899 /* For regparm 3, we have no free call-clobbered registers in
26900 which to store the static chain. In order to implement this,
26901 we have the trampoline push the static chain to the stack.
26902 However, we can't push a value below the return address when
26903 we call the nested function directly, so we have to use an
26904 alternate entry point. For this we use ESI, and have the
26905 alternate entry point push ESI, so that things appear the
26906 same once we're executing the nested function. */
26907 if (incoming_p)
26908 {
26909 if (fndecl == current_function_decl)
26910 ix86_static_chain_on_stack = true;
26911 return gen_frame_mem (SImode,
26912 plus_constant (Pmode,
26913 arg_pointer_rtx, -8));
26914 }
26915 regno = SI_REG;
26916 }
26917 }
26918
26919 return gen_rtx_REG (Pmode, regno);
26920 }
26921
26922 /* Emit RTL insns to initialize the variable parts of a trampoline.
26923 FNDECL is the decl of the target address; M_TRAMP is a MEM for
26924 the trampoline, and CHAIN_VALUE is an RTX for the static chain
26925 to be passed to the target function. */
26926
26927 static void
26928 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
26929 {
26930 rtx mem, fnaddr;
26931 int opcode;
26932 int offset = 0;
26933
26934 fnaddr = XEXP (DECL_RTL (fndecl), 0);
26935
26936 if (TARGET_64BIT)
26937 {
26938 int size;
26939
26940 /* Load the function address to r11. Try to load address using
26941 the shorter movl instead of movabs. We may want to support
26942 movq for kernel mode, but kernel does not use trampolines at
26943 the moment. FNADDR is a 32bit address and may not be in
26944 DImode when ptr_mode == SImode. Always use movl in this
26945 case. */
26946 if (ptr_mode == SImode
26947 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
26948 {
26949 fnaddr = copy_addr_to_reg (fnaddr);
26950
26951 mem = adjust_address (m_tramp, HImode, offset);
26952 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
26953
26954 mem = adjust_address (m_tramp, SImode, offset + 2);
26955 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
26956 offset += 6;
26957 }
26958 else
26959 {
26960 mem = adjust_address (m_tramp, HImode, offset);
26961 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
26962
26963 mem = adjust_address (m_tramp, DImode, offset + 2);
26964 emit_move_insn (mem, fnaddr);
26965 offset += 10;
26966 }
26967
26968 /* Load static chain using movabs to r10. Use the shorter movl
26969 instead of movabs when ptr_mode == SImode. */
26970 if (ptr_mode == SImode)
26971 {
26972 opcode = 0xba41;
26973 size = 6;
26974 }
26975 else
26976 {
26977 opcode = 0xba49;
26978 size = 10;
26979 }
26980
26981 mem = adjust_address (m_tramp, HImode, offset);
26982 emit_move_insn (mem, gen_int_mode (opcode, HImode));
26983
26984 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
26985 emit_move_insn (mem, chain_value);
26986 offset += size;
26987
26988 /* Jump to r11; the last (unused) byte is a nop, only there to
26989 pad the write out to a single 32-bit store. */
26990 mem = adjust_address (m_tramp, SImode, offset);
26991 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
26992 offset += 4;
26993 }
26994 else
26995 {
26996 rtx disp, chain;
26997
26998 /* Depending on the static chain location, either load a register
26999 with a constant, or push the constant to the stack. All of the
27000 instructions are the same size. */
27001 chain = ix86_static_chain (fndecl, true);
27002 if (REG_P (chain))
27003 {
27004 switch (REGNO (chain))
27005 {
27006 case AX_REG:
27007 opcode = 0xb8; break;
27008 case CX_REG:
27009 opcode = 0xb9; break;
27010 default:
27011 gcc_unreachable ();
27012 }
27013 }
27014 else
27015 opcode = 0x68;
27016
27017 mem = adjust_address (m_tramp, QImode, offset);
27018 emit_move_insn (mem, gen_int_mode (opcode, QImode));
27019
27020 mem = adjust_address (m_tramp, SImode, offset + 1);
27021 emit_move_insn (mem, chain_value);
27022 offset += 5;
27023
27024 mem = adjust_address (m_tramp, QImode, offset);
27025 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
27026
27027 mem = adjust_address (m_tramp, SImode, offset + 1);
27028
27029 /* Compute offset from the end of the jmp to the target function.
27030 In the case in which the trampoline stores the static chain on
27031 the stack, we need to skip the first insn which pushes the
27032 (call-saved) register static chain; this push is 1 byte. */
27033 offset += 5;
27034 disp = expand_binop (SImode, sub_optab, fnaddr,
27035 plus_constant (Pmode, XEXP (m_tramp, 0),
27036 offset - (MEM_P (chain) ? 1 : 0)),
27037 NULL_RTX, 1, OPTAB_DIRECT);
27038 emit_move_insn (mem, disp);
27039 }
27040
27041 gcc_assert (offset <= TRAMPOLINE_SIZE);
27042
27043 #ifdef HAVE_ENABLE_EXECUTE_STACK
27044 #ifdef CHECK_EXECUTE_STACK_ENABLED
27045 if (CHECK_EXECUTE_STACK_ENABLED)
27046 #endif
27047 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
27048 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
27049 #endif
27050 }
27051 \f
27052 /* The following file contains several enumerations and data structures
27053 built from the definitions in i386-builtin-types.def. */
27054
27055 #include "i386-builtin-types.inc"
27056
27057 /* Table for the ix86 builtin non-function types. */
27058 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
27059
27060 /* Retrieve an element from the above table, building some of
27061 the types lazily. */
27062
27063 static tree
27064 ix86_get_builtin_type (enum ix86_builtin_type tcode)
27065 {
27066 unsigned int index;
27067 tree type, itype;
27068
27069 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
27070
27071 type = ix86_builtin_type_tab[(int) tcode];
27072 if (type != NULL)
27073 return type;
27074
27075 gcc_assert (tcode > IX86_BT_LAST_PRIM);
27076 if (tcode <= IX86_BT_LAST_VECT)
27077 {
27078 enum machine_mode mode;
27079
27080 index = tcode - IX86_BT_LAST_PRIM - 1;
27081 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
27082 mode = ix86_builtin_type_vect_mode[index];
27083
27084 type = build_vector_type_for_mode (itype, mode);
27085 }
27086 else
27087 {
27088 int quals;
27089
27090 index = tcode - IX86_BT_LAST_VECT - 1;
27091 if (tcode <= IX86_BT_LAST_PTR)
27092 quals = TYPE_UNQUALIFIED;
27093 else
27094 quals = TYPE_QUAL_CONST;
27095
27096 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
27097 if (quals != TYPE_UNQUALIFIED)
27098 itype = build_qualified_type (itype, quals);
27099
27100 type = build_pointer_type (itype);
27101 }
27102
27103 ix86_builtin_type_tab[(int) tcode] = type;
27104 return type;
27105 }
27106
27107 /* Table for the ix86 builtin function types. */
27108 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
27109
27110 /* Retrieve an element from the above table, building some of
27111 the types lazily. */
27112
27113 static tree
27114 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
27115 {
27116 tree type;
27117
27118 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
27119
27120 type = ix86_builtin_func_type_tab[(int) tcode];
27121 if (type != NULL)
27122 return type;
27123
27124 if (tcode <= IX86_BT_LAST_FUNC)
27125 {
27126 unsigned start = ix86_builtin_func_start[(int) tcode];
27127 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
27128 tree rtype, atype, args = void_list_node;
27129 unsigned i;
27130
27131 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
27132 for (i = after - 1; i > start; --i)
27133 {
27134 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
27135 args = tree_cons (NULL, atype, args);
27136 }
27137
27138 type = build_function_type (rtype, args);
27139 }
27140 else
27141 {
27142 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
27143 enum ix86_builtin_func_type icode;
27144
27145 icode = ix86_builtin_func_alias_base[index];
27146 type = ix86_get_builtin_func_type (icode);
27147 }
27148
27149 ix86_builtin_func_type_tab[(int) tcode] = type;
27150 return type;
27151 }
27152
27153
27154 /* Codes for all the SSE/MMX builtins. */
27155 enum ix86_builtins
27156 {
27157 IX86_BUILTIN_ADDPS,
27158 IX86_BUILTIN_ADDSS,
27159 IX86_BUILTIN_DIVPS,
27160 IX86_BUILTIN_DIVSS,
27161 IX86_BUILTIN_MULPS,
27162 IX86_BUILTIN_MULSS,
27163 IX86_BUILTIN_SUBPS,
27164 IX86_BUILTIN_SUBSS,
27165
27166 IX86_BUILTIN_CMPEQPS,
27167 IX86_BUILTIN_CMPLTPS,
27168 IX86_BUILTIN_CMPLEPS,
27169 IX86_BUILTIN_CMPGTPS,
27170 IX86_BUILTIN_CMPGEPS,
27171 IX86_BUILTIN_CMPNEQPS,
27172 IX86_BUILTIN_CMPNLTPS,
27173 IX86_BUILTIN_CMPNLEPS,
27174 IX86_BUILTIN_CMPNGTPS,
27175 IX86_BUILTIN_CMPNGEPS,
27176 IX86_BUILTIN_CMPORDPS,
27177 IX86_BUILTIN_CMPUNORDPS,
27178 IX86_BUILTIN_CMPEQSS,
27179 IX86_BUILTIN_CMPLTSS,
27180 IX86_BUILTIN_CMPLESS,
27181 IX86_BUILTIN_CMPNEQSS,
27182 IX86_BUILTIN_CMPNLTSS,
27183 IX86_BUILTIN_CMPNLESS,
27184 IX86_BUILTIN_CMPORDSS,
27185 IX86_BUILTIN_CMPUNORDSS,
27186
27187 IX86_BUILTIN_COMIEQSS,
27188 IX86_BUILTIN_COMILTSS,
27189 IX86_BUILTIN_COMILESS,
27190 IX86_BUILTIN_COMIGTSS,
27191 IX86_BUILTIN_COMIGESS,
27192 IX86_BUILTIN_COMINEQSS,
27193 IX86_BUILTIN_UCOMIEQSS,
27194 IX86_BUILTIN_UCOMILTSS,
27195 IX86_BUILTIN_UCOMILESS,
27196 IX86_BUILTIN_UCOMIGTSS,
27197 IX86_BUILTIN_UCOMIGESS,
27198 IX86_BUILTIN_UCOMINEQSS,
27199
27200 IX86_BUILTIN_CVTPI2PS,
27201 IX86_BUILTIN_CVTPS2PI,
27202 IX86_BUILTIN_CVTSI2SS,
27203 IX86_BUILTIN_CVTSI642SS,
27204 IX86_BUILTIN_CVTSS2SI,
27205 IX86_BUILTIN_CVTSS2SI64,
27206 IX86_BUILTIN_CVTTPS2PI,
27207 IX86_BUILTIN_CVTTSS2SI,
27208 IX86_BUILTIN_CVTTSS2SI64,
27209
27210 IX86_BUILTIN_MAXPS,
27211 IX86_BUILTIN_MAXSS,
27212 IX86_BUILTIN_MINPS,
27213 IX86_BUILTIN_MINSS,
27214
27215 IX86_BUILTIN_LOADUPS,
27216 IX86_BUILTIN_STOREUPS,
27217 IX86_BUILTIN_MOVSS,
27218
27219 IX86_BUILTIN_MOVHLPS,
27220 IX86_BUILTIN_MOVLHPS,
27221 IX86_BUILTIN_LOADHPS,
27222 IX86_BUILTIN_LOADLPS,
27223 IX86_BUILTIN_STOREHPS,
27224 IX86_BUILTIN_STORELPS,
27225
27226 IX86_BUILTIN_MASKMOVQ,
27227 IX86_BUILTIN_MOVMSKPS,
27228 IX86_BUILTIN_PMOVMSKB,
27229
27230 IX86_BUILTIN_MOVNTPS,
27231 IX86_BUILTIN_MOVNTQ,
27232
27233 IX86_BUILTIN_LOADDQU,
27234 IX86_BUILTIN_STOREDQU,
27235
27236 IX86_BUILTIN_PACKSSWB,
27237 IX86_BUILTIN_PACKSSDW,
27238 IX86_BUILTIN_PACKUSWB,
27239
27240 IX86_BUILTIN_PADDB,
27241 IX86_BUILTIN_PADDW,
27242 IX86_BUILTIN_PADDD,
27243 IX86_BUILTIN_PADDQ,
27244 IX86_BUILTIN_PADDSB,
27245 IX86_BUILTIN_PADDSW,
27246 IX86_BUILTIN_PADDUSB,
27247 IX86_BUILTIN_PADDUSW,
27248 IX86_BUILTIN_PSUBB,
27249 IX86_BUILTIN_PSUBW,
27250 IX86_BUILTIN_PSUBD,
27251 IX86_BUILTIN_PSUBQ,
27252 IX86_BUILTIN_PSUBSB,
27253 IX86_BUILTIN_PSUBSW,
27254 IX86_BUILTIN_PSUBUSB,
27255 IX86_BUILTIN_PSUBUSW,
27256
27257 IX86_BUILTIN_PAND,
27258 IX86_BUILTIN_PANDN,
27259 IX86_BUILTIN_POR,
27260 IX86_BUILTIN_PXOR,
27261
27262 IX86_BUILTIN_PAVGB,
27263 IX86_BUILTIN_PAVGW,
27264
27265 IX86_BUILTIN_PCMPEQB,
27266 IX86_BUILTIN_PCMPEQW,
27267 IX86_BUILTIN_PCMPEQD,
27268 IX86_BUILTIN_PCMPGTB,
27269 IX86_BUILTIN_PCMPGTW,
27270 IX86_BUILTIN_PCMPGTD,
27271
27272 IX86_BUILTIN_PMADDWD,
27273
27274 IX86_BUILTIN_PMAXSW,
27275 IX86_BUILTIN_PMAXUB,
27276 IX86_BUILTIN_PMINSW,
27277 IX86_BUILTIN_PMINUB,
27278
27279 IX86_BUILTIN_PMULHUW,
27280 IX86_BUILTIN_PMULHW,
27281 IX86_BUILTIN_PMULLW,
27282
27283 IX86_BUILTIN_PSADBW,
27284 IX86_BUILTIN_PSHUFW,
27285
27286 IX86_BUILTIN_PSLLW,
27287 IX86_BUILTIN_PSLLD,
27288 IX86_BUILTIN_PSLLQ,
27289 IX86_BUILTIN_PSRAW,
27290 IX86_BUILTIN_PSRAD,
27291 IX86_BUILTIN_PSRLW,
27292 IX86_BUILTIN_PSRLD,
27293 IX86_BUILTIN_PSRLQ,
27294 IX86_BUILTIN_PSLLWI,
27295 IX86_BUILTIN_PSLLDI,
27296 IX86_BUILTIN_PSLLQI,
27297 IX86_BUILTIN_PSRAWI,
27298 IX86_BUILTIN_PSRADI,
27299 IX86_BUILTIN_PSRLWI,
27300 IX86_BUILTIN_PSRLDI,
27301 IX86_BUILTIN_PSRLQI,
27302
27303 IX86_BUILTIN_PUNPCKHBW,
27304 IX86_BUILTIN_PUNPCKHWD,
27305 IX86_BUILTIN_PUNPCKHDQ,
27306 IX86_BUILTIN_PUNPCKLBW,
27307 IX86_BUILTIN_PUNPCKLWD,
27308 IX86_BUILTIN_PUNPCKLDQ,
27309
27310 IX86_BUILTIN_SHUFPS,
27311
27312 IX86_BUILTIN_RCPPS,
27313 IX86_BUILTIN_RCPSS,
27314 IX86_BUILTIN_RSQRTPS,
27315 IX86_BUILTIN_RSQRTPS_NR,
27316 IX86_BUILTIN_RSQRTSS,
27317 IX86_BUILTIN_RSQRTF,
27318 IX86_BUILTIN_SQRTPS,
27319 IX86_BUILTIN_SQRTPS_NR,
27320 IX86_BUILTIN_SQRTSS,
27321
27322 IX86_BUILTIN_UNPCKHPS,
27323 IX86_BUILTIN_UNPCKLPS,
27324
27325 IX86_BUILTIN_ANDPS,
27326 IX86_BUILTIN_ANDNPS,
27327 IX86_BUILTIN_ORPS,
27328 IX86_BUILTIN_XORPS,
27329
27330 IX86_BUILTIN_EMMS,
27331 IX86_BUILTIN_LDMXCSR,
27332 IX86_BUILTIN_STMXCSR,
27333 IX86_BUILTIN_SFENCE,
27334
27335 IX86_BUILTIN_FXSAVE,
27336 IX86_BUILTIN_FXRSTOR,
27337 IX86_BUILTIN_FXSAVE64,
27338 IX86_BUILTIN_FXRSTOR64,
27339
27340 IX86_BUILTIN_XSAVE,
27341 IX86_BUILTIN_XRSTOR,
27342 IX86_BUILTIN_XSAVE64,
27343 IX86_BUILTIN_XRSTOR64,
27344
27345 IX86_BUILTIN_XSAVEOPT,
27346 IX86_BUILTIN_XSAVEOPT64,
27347
27348 IX86_BUILTIN_XSAVEC,
27349 IX86_BUILTIN_XSAVEC64,
27350
27351 IX86_BUILTIN_XSAVES,
27352 IX86_BUILTIN_XRSTORS,
27353 IX86_BUILTIN_XSAVES64,
27354 IX86_BUILTIN_XRSTORS64,
27355
27356 /* 3DNow! Original */
27357 IX86_BUILTIN_FEMMS,
27358 IX86_BUILTIN_PAVGUSB,
27359 IX86_BUILTIN_PF2ID,
27360 IX86_BUILTIN_PFACC,
27361 IX86_BUILTIN_PFADD,
27362 IX86_BUILTIN_PFCMPEQ,
27363 IX86_BUILTIN_PFCMPGE,
27364 IX86_BUILTIN_PFCMPGT,
27365 IX86_BUILTIN_PFMAX,
27366 IX86_BUILTIN_PFMIN,
27367 IX86_BUILTIN_PFMUL,
27368 IX86_BUILTIN_PFRCP,
27369 IX86_BUILTIN_PFRCPIT1,
27370 IX86_BUILTIN_PFRCPIT2,
27371 IX86_BUILTIN_PFRSQIT1,
27372 IX86_BUILTIN_PFRSQRT,
27373 IX86_BUILTIN_PFSUB,
27374 IX86_BUILTIN_PFSUBR,
27375 IX86_BUILTIN_PI2FD,
27376 IX86_BUILTIN_PMULHRW,
27377
27378 /* 3DNow! Athlon Extensions */
27379 IX86_BUILTIN_PF2IW,
27380 IX86_BUILTIN_PFNACC,
27381 IX86_BUILTIN_PFPNACC,
27382 IX86_BUILTIN_PI2FW,
27383 IX86_BUILTIN_PSWAPDSI,
27384 IX86_BUILTIN_PSWAPDSF,
27385
27386 /* SSE2 */
27387 IX86_BUILTIN_ADDPD,
27388 IX86_BUILTIN_ADDSD,
27389 IX86_BUILTIN_DIVPD,
27390 IX86_BUILTIN_DIVSD,
27391 IX86_BUILTIN_MULPD,
27392 IX86_BUILTIN_MULSD,
27393 IX86_BUILTIN_SUBPD,
27394 IX86_BUILTIN_SUBSD,
27395
27396 IX86_BUILTIN_CMPEQPD,
27397 IX86_BUILTIN_CMPLTPD,
27398 IX86_BUILTIN_CMPLEPD,
27399 IX86_BUILTIN_CMPGTPD,
27400 IX86_BUILTIN_CMPGEPD,
27401 IX86_BUILTIN_CMPNEQPD,
27402 IX86_BUILTIN_CMPNLTPD,
27403 IX86_BUILTIN_CMPNLEPD,
27404 IX86_BUILTIN_CMPNGTPD,
27405 IX86_BUILTIN_CMPNGEPD,
27406 IX86_BUILTIN_CMPORDPD,
27407 IX86_BUILTIN_CMPUNORDPD,
27408 IX86_BUILTIN_CMPEQSD,
27409 IX86_BUILTIN_CMPLTSD,
27410 IX86_BUILTIN_CMPLESD,
27411 IX86_BUILTIN_CMPNEQSD,
27412 IX86_BUILTIN_CMPNLTSD,
27413 IX86_BUILTIN_CMPNLESD,
27414 IX86_BUILTIN_CMPORDSD,
27415 IX86_BUILTIN_CMPUNORDSD,
27416
27417 IX86_BUILTIN_COMIEQSD,
27418 IX86_BUILTIN_COMILTSD,
27419 IX86_BUILTIN_COMILESD,
27420 IX86_BUILTIN_COMIGTSD,
27421 IX86_BUILTIN_COMIGESD,
27422 IX86_BUILTIN_COMINEQSD,
27423 IX86_BUILTIN_UCOMIEQSD,
27424 IX86_BUILTIN_UCOMILTSD,
27425 IX86_BUILTIN_UCOMILESD,
27426 IX86_BUILTIN_UCOMIGTSD,
27427 IX86_BUILTIN_UCOMIGESD,
27428 IX86_BUILTIN_UCOMINEQSD,
27429
27430 IX86_BUILTIN_MAXPD,
27431 IX86_BUILTIN_MAXSD,
27432 IX86_BUILTIN_MINPD,
27433 IX86_BUILTIN_MINSD,
27434
27435 IX86_BUILTIN_ANDPD,
27436 IX86_BUILTIN_ANDNPD,
27437 IX86_BUILTIN_ORPD,
27438 IX86_BUILTIN_XORPD,
27439
27440 IX86_BUILTIN_SQRTPD,
27441 IX86_BUILTIN_SQRTSD,
27442
27443 IX86_BUILTIN_UNPCKHPD,
27444 IX86_BUILTIN_UNPCKLPD,
27445
27446 IX86_BUILTIN_SHUFPD,
27447
27448 IX86_BUILTIN_LOADUPD,
27449 IX86_BUILTIN_STOREUPD,
27450 IX86_BUILTIN_MOVSD,
27451
27452 IX86_BUILTIN_LOADHPD,
27453 IX86_BUILTIN_LOADLPD,
27454
27455 IX86_BUILTIN_CVTDQ2PD,
27456 IX86_BUILTIN_CVTDQ2PS,
27457
27458 IX86_BUILTIN_CVTPD2DQ,
27459 IX86_BUILTIN_CVTPD2PI,
27460 IX86_BUILTIN_CVTPD2PS,
27461 IX86_BUILTIN_CVTTPD2DQ,
27462 IX86_BUILTIN_CVTTPD2PI,
27463
27464 IX86_BUILTIN_CVTPI2PD,
27465 IX86_BUILTIN_CVTSI2SD,
27466 IX86_BUILTIN_CVTSI642SD,
27467
27468 IX86_BUILTIN_CVTSD2SI,
27469 IX86_BUILTIN_CVTSD2SI64,
27470 IX86_BUILTIN_CVTSD2SS,
27471 IX86_BUILTIN_CVTSS2SD,
27472 IX86_BUILTIN_CVTTSD2SI,
27473 IX86_BUILTIN_CVTTSD2SI64,
27474
27475 IX86_BUILTIN_CVTPS2DQ,
27476 IX86_BUILTIN_CVTPS2PD,
27477 IX86_BUILTIN_CVTTPS2DQ,
27478
27479 IX86_BUILTIN_MOVNTI,
27480 IX86_BUILTIN_MOVNTI64,
27481 IX86_BUILTIN_MOVNTPD,
27482 IX86_BUILTIN_MOVNTDQ,
27483
27484 IX86_BUILTIN_MOVQ128,
27485
27486 /* SSE2 MMX */
27487 IX86_BUILTIN_MASKMOVDQU,
27488 IX86_BUILTIN_MOVMSKPD,
27489 IX86_BUILTIN_PMOVMSKB128,
27490
27491 IX86_BUILTIN_PACKSSWB128,
27492 IX86_BUILTIN_PACKSSDW128,
27493 IX86_BUILTIN_PACKUSWB128,
27494
27495 IX86_BUILTIN_PADDB128,
27496 IX86_BUILTIN_PADDW128,
27497 IX86_BUILTIN_PADDD128,
27498 IX86_BUILTIN_PADDQ128,
27499 IX86_BUILTIN_PADDSB128,
27500 IX86_BUILTIN_PADDSW128,
27501 IX86_BUILTIN_PADDUSB128,
27502 IX86_BUILTIN_PADDUSW128,
27503 IX86_BUILTIN_PSUBB128,
27504 IX86_BUILTIN_PSUBW128,
27505 IX86_BUILTIN_PSUBD128,
27506 IX86_BUILTIN_PSUBQ128,
27507 IX86_BUILTIN_PSUBSB128,
27508 IX86_BUILTIN_PSUBSW128,
27509 IX86_BUILTIN_PSUBUSB128,
27510 IX86_BUILTIN_PSUBUSW128,
27511
27512 IX86_BUILTIN_PAND128,
27513 IX86_BUILTIN_PANDN128,
27514 IX86_BUILTIN_POR128,
27515 IX86_BUILTIN_PXOR128,
27516
27517 IX86_BUILTIN_PAVGB128,
27518 IX86_BUILTIN_PAVGW128,
27519
27520 IX86_BUILTIN_PCMPEQB128,
27521 IX86_BUILTIN_PCMPEQW128,
27522 IX86_BUILTIN_PCMPEQD128,
27523 IX86_BUILTIN_PCMPGTB128,
27524 IX86_BUILTIN_PCMPGTW128,
27525 IX86_BUILTIN_PCMPGTD128,
27526
27527 IX86_BUILTIN_PMADDWD128,
27528
27529 IX86_BUILTIN_PMAXSW128,
27530 IX86_BUILTIN_PMAXUB128,
27531 IX86_BUILTIN_PMINSW128,
27532 IX86_BUILTIN_PMINUB128,
27533
27534 IX86_BUILTIN_PMULUDQ,
27535 IX86_BUILTIN_PMULUDQ128,
27536 IX86_BUILTIN_PMULHUW128,
27537 IX86_BUILTIN_PMULHW128,
27538 IX86_BUILTIN_PMULLW128,
27539
27540 IX86_BUILTIN_PSADBW128,
27541 IX86_BUILTIN_PSHUFHW,
27542 IX86_BUILTIN_PSHUFLW,
27543 IX86_BUILTIN_PSHUFD,
27544
27545 IX86_BUILTIN_PSLLDQI128,
27546 IX86_BUILTIN_PSLLWI128,
27547 IX86_BUILTIN_PSLLDI128,
27548 IX86_BUILTIN_PSLLQI128,
27549 IX86_BUILTIN_PSRAWI128,
27550 IX86_BUILTIN_PSRADI128,
27551 IX86_BUILTIN_PSRLDQI128,
27552 IX86_BUILTIN_PSRLWI128,
27553 IX86_BUILTIN_PSRLDI128,
27554 IX86_BUILTIN_PSRLQI128,
27555
27556 IX86_BUILTIN_PSLLDQ128,
27557 IX86_BUILTIN_PSLLW128,
27558 IX86_BUILTIN_PSLLD128,
27559 IX86_BUILTIN_PSLLQ128,
27560 IX86_BUILTIN_PSRAW128,
27561 IX86_BUILTIN_PSRAD128,
27562 IX86_BUILTIN_PSRLW128,
27563 IX86_BUILTIN_PSRLD128,
27564 IX86_BUILTIN_PSRLQ128,
27565
27566 IX86_BUILTIN_PUNPCKHBW128,
27567 IX86_BUILTIN_PUNPCKHWD128,
27568 IX86_BUILTIN_PUNPCKHDQ128,
27569 IX86_BUILTIN_PUNPCKHQDQ128,
27570 IX86_BUILTIN_PUNPCKLBW128,
27571 IX86_BUILTIN_PUNPCKLWD128,
27572 IX86_BUILTIN_PUNPCKLDQ128,
27573 IX86_BUILTIN_PUNPCKLQDQ128,
27574
27575 IX86_BUILTIN_CLFLUSH,
27576 IX86_BUILTIN_MFENCE,
27577 IX86_BUILTIN_LFENCE,
27578 IX86_BUILTIN_PAUSE,
27579
27580 IX86_BUILTIN_FNSTENV,
27581 IX86_BUILTIN_FLDENV,
27582 IX86_BUILTIN_FNSTSW,
27583 IX86_BUILTIN_FNCLEX,
27584
27585 IX86_BUILTIN_BSRSI,
27586 IX86_BUILTIN_BSRDI,
27587 IX86_BUILTIN_RDPMC,
27588 IX86_BUILTIN_RDTSC,
27589 IX86_BUILTIN_RDTSCP,
27590 IX86_BUILTIN_ROLQI,
27591 IX86_BUILTIN_ROLHI,
27592 IX86_BUILTIN_RORQI,
27593 IX86_BUILTIN_RORHI,
27594
27595 /* SSE3. */
27596 IX86_BUILTIN_ADDSUBPS,
27597 IX86_BUILTIN_HADDPS,
27598 IX86_BUILTIN_HSUBPS,
27599 IX86_BUILTIN_MOVSHDUP,
27600 IX86_BUILTIN_MOVSLDUP,
27601 IX86_BUILTIN_ADDSUBPD,
27602 IX86_BUILTIN_HADDPD,
27603 IX86_BUILTIN_HSUBPD,
27604 IX86_BUILTIN_LDDQU,
27605
27606 IX86_BUILTIN_MONITOR,
27607 IX86_BUILTIN_MWAIT,
27608
27609 /* SSSE3. */
27610 IX86_BUILTIN_PHADDW,
27611 IX86_BUILTIN_PHADDD,
27612 IX86_BUILTIN_PHADDSW,
27613 IX86_BUILTIN_PHSUBW,
27614 IX86_BUILTIN_PHSUBD,
27615 IX86_BUILTIN_PHSUBSW,
27616 IX86_BUILTIN_PMADDUBSW,
27617 IX86_BUILTIN_PMULHRSW,
27618 IX86_BUILTIN_PSHUFB,
27619 IX86_BUILTIN_PSIGNB,
27620 IX86_BUILTIN_PSIGNW,
27621 IX86_BUILTIN_PSIGND,
27622 IX86_BUILTIN_PALIGNR,
27623 IX86_BUILTIN_PABSB,
27624 IX86_BUILTIN_PABSW,
27625 IX86_BUILTIN_PABSD,
27626
27627 IX86_BUILTIN_PHADDW128,
27628 IX86_BUILTIN_PHADDD128,
27629 IX86_BUILTIN_PHADDSW128,
27630 IX86_BUILTIN_PHSUBW128,
27631 IX86_BUILTIN_PHSUBD128,
27632 IX86_BUILTIN_PHSUBSW128,
27633 IX86_BUILTIN_PMADDUBSW128,
27634 IX86_BUILTIN_PMULHRSW128,
27635 IX86_BUILTIN_PSHUFB128,
27636 IX86_BUILTIN_PSIGNB128,
27637 IX86_BUILTIN_PSIGNW128,
27638 IX86_BUILTIN_PSIGND128,
27639 IX86_BUILTIN_PALIGNR128,
27640 IX86_BUILTIN_PABSB128,
27641 IX86_BUILTIN_PABSW128,
27642 IX86_BUILTIN_PABSD128,
27643
27644 /* AMDFAM10 - SSE4A New Instructions. */
27645 IX86_BUILTIN_MOVNTSD,
27646 IX86_BUILTIN_MOVNTSS,
27647 IX86_BUILTIN_EXTRQI,
27648 IX86_BUILTIN_EXTRQ,
27649 IX86_BUILTIN_INSERTQI,
27650 IX86_BUILTIN_INSERTQ,
27651
27652 /* SSE4.1. */
27653 IX86_BUILTIN_BLENDPD,
27654 IX86_BUILTIN_BLENDPS,
27655 IX86_BUILTIN_BLENDVPD,
27656 IX86_BUILTIN_BLENDVPS,
27657 IX86_BUILTIN_PBLENDVB128,
27658 IX86_BUILTIN_PBLENDW128,
27659
27660 IX86_BUILTIN_DPPD,
27661 IX86_BUILTIN_DPPS,
27662
27663 IX86_BUILTIN_INSERTPS128,
27664
27665 IX86_BUILTIN_MOVNTDQA,
27666 IX86_BUILTIN_MPSADBW128,
27667 IX86_BUILTIN_PACKUSDW128,
27668 IX86_BUILTIN_PCMPEQQ,
27669 IX86_BUILTIN_PHMINPOSUW128,
27670
27671 IX86_BUILTIN_PMAXSB128,
27672 IX86_BUILTIN_PMAXSD128,
27673 IX86_BUILTIN_PMAXUD128,
27674 IX86_BUILTIN_PMAXUW128,
27675
27676 IX86_BUILTIN_PMINSB128,
27677 IX86_BUILTIN_PMINSD128,
27678 IX86_BUILTIN_PMINUD128,
27679 IX86_BUILTIN_PMINUW128,
27680
27681 IX86_BUILTIN_PMOVSXBW128,
27682 IX86_BUILTIN_PMOVSXBD128,
27683 IX86_BUILTIN_PMOVSXBQ128,
27684 IX86_BUILTIN_PMOVSXWD128,
27685 IX86_BUILTIN_PMOVSXWQ128,
27686 IX86_BUILTIN_PMOVSXDQ128,
27687
27688 IX86_BUILTIN_PMOVZXBW128,
27689 IX86_BUILTIN_PMOVZXBD128,
27690 IX86_BUILTIN_PMOVZXBQ128,
27691 IX86_BUILTIN_PMOVZXWD128,
27692 IX86_BUILTIN_PMOVZXWQ128,
27693 IX86_BUILTIN_PMOVZXDQ128,
27694
27695 IX86_BUILTIN_PMULDQ128,
27696 IX86_BUILTIN_PMULLD128,
27697
27698 IX86_BUILTIN_ROUNDSD,
27699 IX86_BUILTIN_ROUNDSS,
27700
27701 IX86_BUILTIN_ROUNDPD,
27702 IX86_BUILTIN_ROUNDPS,
27703
27704 IX86_BUILTIN_FLOORPD,
27705 IX86_BUILTIN_CEILPD,
27706 IX86_BUILTIN_TRUNCPD,
27707 IX86_BUILTIN_RINTPD,
27708 IX86_BUILTIN_ROUNDPD_AZ,
27709
27710 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX,
27711 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX,
27712 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX,
27713
27714 IX86_BUILTIN_FLOORPS,
27715 IX86_BUILTIN_CEILPS,
27716 IX86_BUILTIN_TRUNCPS,
27717 IX86_BUILTIN_RINTPS,
27718 IX86_BUILTIN_ROUNDPS_AZ,
27719
27720 IX86_BUILTIN_FLOORPS_SFIX,
27721 IX86_BUILTIN_CEILPS_SFIX,
27722 IX86_BUILTIN_ROUNDPS_AZ_SFIX,
27723
27724 IX86_BUILTIN_PTESTZ,
27725 IX86_BUILTIN_PTESTC,
27726 IX86_BUILTIN_PTESTNZC,
27727
27728 IX86_BUILTIN_VEC_INIT_V2SI,
27729 IX86_BUILTIN_VEC_INIT_V4HI,
27730 IX86_BUILTIN_VEC_INIT_V8QI,
27731 IX86_BUILTIN_VEC_EXT_V2DF,
27732 IX86_BUILTIN_VEC_EXT_V2DI,
27733 IX86_BUILTIN_VEC_EXT_V4SF,
27734 IX86_BUILTIN_VEC_EXT_V4SI,
27735 IX86_BUILTIN_VEC_EXT_V8HI,
27736 IX86_BUILTIN_VEC_EXT_V2SI,
27737 IX86_BUILTIN_VEC_EXT_V4HI,
27738 IX86_BUILTIN_VEC_EXT_V16QI,
27739 IX86_BUILTIN_VEC_SET_V2DI,
27740 IX86_BUILTIN_VEC_SET_V4SF,
27741 IX86_BUILTIN_VEC_SET_V4SI,
27742 IX86_BUILTIN_VEC_SET_V8HI,
27743 IX86_BUILTIN_VEC_SET_V4HI,
27744 IX86_BUILTIN_VEC_SET_V16QI,
27745
27746 IX86_BUILTIN_VEC_PACK_SFIX,
27747 IX86_BUILTIN_VEC_PACK_SFIX256,
27748
27749 /* SSE4.2. */
27750 IX86_BUILTIN_CRC32QI,
27751 IX86_BUILTIN_CRC32HI,
27752 IX86_BUILTIN_CRC32SI,
27753 IX86_BUILTIN_CRC32DI,
27754
27755 IX86_BUILTIN_PCMPESTRI128,
27756 IX86_BUILTIN_PCMPESTRM128,
27757 IX86_BUILTIN_PCMPESTRA128,
27758 IX86_BUILTIN_PCMPESTRC128,
27759 IX86_BUILTIN_PCMPESTRO128,
27760 IX86_BUILTIN_PCMPESTRS128,
27761 IX86_BUILTIN_PCMPESTRZ128,
27762 IX86_BUILTIN_PCMPISTRI128,
27763 IX86_BUILTIN_PCMPISTRM128,
27764 IX86_BUILTIN_PCMPISTRA128,
27765 IX86_BUILTIN_PCMPISTRC128,
27766 IX86_BUILTIN_PCMPISTRO128,
27767 IX86_BUILTIN_PCMPISTRS128,
27768 IX86_BUILTIN_PCMPISTRZ128,
27769
27770 IX86_BUILTIN_PCMPGTQ,
27771
27772 /* AES instructions */
27773 IX86_BUILTIN_AESENC128,
27774 IX86_BUILTIN_AESENCLAST128,
27775 IX86_BUILTIN_AESDEC128,
27776 IX86_BUILTIN_AESDECLAST128,
27777 IX86_BUILTIN_AESIMC128,
27778 IX86_BUILTIN_AESKEYGENASSIST128,
27779
27780 /* PCLMUL instruction */
27781 IX86_BUILTIN_PCLMULQDQ128,
27782
27783 /* AVX */
27784 IX86_BUILTIN_ADDPD256,
27785 IX86_BUILTIN_ADDPS256,
27786 IX86_BUILTIN_ADDSUBPD256,
27787 IX86_BUILTIN_ADDSUBPS256,
27788 IX86_BUILTIN_ANDPD256,
27789 IX86_BUILTIN_ANDPS256,
27790 IX86_BUILTIN_ANDNPD256,
27791 IX86_BUILTIN_ANDNPS256,
27792 IX86_BUILTIN_BLENDPD256,
27793 IX86_BUILTIN_BLENDPS256,
27794 IX86_BUILTIN_BLENDVPD256,
27795 IX86_BUILTIN_BLENDVPS256,
27796 IX86_BUILTIN_DIVPD256,
27797 IX86_BUILTIN_DIVPS256,
27798 IX86_BUILTIN_DPPS256,
27799 IX86_BUILTIN_HADDPD256,
27800 IX86_BUILTIN_HADDPS256,
27801 IX86_BUILTIN_HSUBPD256,
27802 IX86_BUILTIN_HSUBPS256,
27803 IX86_BUILTIN_MAXPD256,
27804 IX86_BUILTIN_MAXPS256,
27805 IX86_BUILTIN_MINPD256,
27806 IX86_BUILTIN_MINPS256,
27807 IX86_BUILTIN_MULPD256,
27808 IX86_BUILTIN_MULPS256,
27809 IX86_BUILTIN_ORPD256,
27810 IX86_BUILTIN_ORPS256,
27811 IX86_BUILTIN_SHUFPD256,
27812 IX86_BUILTIN_SHUFPS256,
27813 IX86_BUILTIN_SUBPD256,
27814 IX86_BUILTIN_SUBPS256,
27815 IX86_BUILTIN_XORPD256,
27816 IX86_BUILTIN_XORPS256,
27817 IX86_BUILTIN_CMPSD,
27818 IX86_BUILTIN_CMPSS,
27819 IX86_BUILTIN_CMPPD,
27820 IX86_BUILTIN_CMPPS,
27821 IX86_BUILTIN_CMPPD256,
27822 IX86_BUILTIN_CMPPS256,
27823 IX86_BUILTIN_CVTDQ2PD256,
27824 IX86_BUILTIN_CVTDQ2PS256,
27825 IX86_BUILTIN_CVTPD2PS256,
27826 IX86_BUILTIN_CVTPS2DQ256,
27827 IX86_BUILTIN_CVTPS2PD256,
27828 IX86_BUILTIN_CVTTPD2DQ256,
27829 IX86_BUILTIN_CVTPD2DQ256,
27830 IX86_BUILTIN_CVTTPS2DQ256,
27831 IX86_BUILTIN_EXTRACTF128PD256,
27832 IX86_BUILTIN_EXTRACTF128PS256,
27833 IX86_BUILTIN_EXTRACTF128SI256,
27834 IX86_BUILTIN_VZEROALL,
27835 IX86_BUILTIN_VZEROUPPER,
27836 IX86_BUILTIN_VPERMILVARPD,
27837 IX86_BUILTIN_VPERMILVARPS,
27838 IX86_BUILTIN_VPERMILVARPD256,
27839 IX86_BUILTIN_VPERMILVARPS256,
27840 IX86_BUILTIN_VPERMILPD,
27841 IX86_BUILTIN_VPERMILPS,
27842 IX86_BUILTIN_VPERMILPD256,
27843 IX86_BUILTIN_VPERMILPS256,
27844 IX86_BUILTIN_VPERMIL2PD,
27845 IX86_BUILTIN_VPERMIL2PS,
27846 IX86_BUILTIN_VPERMIL2PD256,
27847 IX86_BUILTIN_VPERMIL2PS256,
27848 IX86_BUILTIN_VPERM2F128PD256,
27849 IX86_BUILTIN_VPERM2F128PS256,
27850 IX86_BUILTIN_VPERM2F128SI256,
27851 IX86_BUILTIN_VBROADCASTSS,
27852 IX86_BUILTIN_VBROADCASTSD256,
27853 IX86_BUILTIN_VBROADCASTSS256,
27854 IX86_BUILTIN_VBROADCASTPD256,
27855 IX86_BUILTIN_VBROADCASTPS256,
27856 IX86_BUILTIN_VINSERTF128PD256,
27857 IX86_BUILTIN_VINSERTF128PS256,
27858 IX86_BUILTIN_VINSERTF128SI256,
27859 IX86_BUILTIN_LOADUPD256,
27860 IX86_BUILTIN_LOADUPS256,
27861 IX86_BUILTIN_STOREUPD256,
27862 IX86_BUILTIN_STOREUPS256,
27863 IX86_BUILTIN_LDDQU256,
27864 IX86_BUILTIN_MOVNTDQ256,
27865 IX86_BUILTIN_MOVNTPD256,
27866 IX86_BUILTIN_MOVNTPS256,
27867 IX86_BUILTIN_LOADDQU256,
27868 IX86_BUILTIN_STOREDQU256,
27869 IX86_BUILTIN_MASKLOADPD,
27870 IX86_BUILTIN_MASKLOADPS,
27871 IX86_BUILTIN_MASKSTOREPD,
27872 IX86_BUILTIN_MASKSTOREPS,
27873 IX86_BUILTIN_MASKLOADPD256,
27874 IX86_BUILTIN_MASKLOADPS256,
27875 IX86_BUILTIN_MASKSTOREPD256,
27876 IX86_BUILTIN_MASKSTOREPS256,
27877 IX86_BUILTIN_MOVSHDUP256,
27878 IX86_BUILTIN_MOVSLDUP256,
27879 IX86_BUILTIN_MOVDDUP256,
27880
27881 IX86_BUILTIN_SQRTPD256,
27882 IX86_BUILTIN_SQRTPS256,
27883 IX86_BUILTIN_SQRTPS_NR256,
27884 IX86_BUILTIN_RSQRTPS256,
27885 IX86_BUILTIN_RSQRTPS_NR256,
27886
27887 IX86_BUILTIN_RCPPS256,
27888
27889 IX86_BUILTIN_ROUNDPD256,
27890 IX86_BUILTIN_ROUNDPS256,
27891
27892 IX86_BUILTIN_FLOORPD256,
27893 IX86_BUILTIN_CEILPD256,
27894 IX86_BUILTIN_TRUNCPD256,
27895 IX86_BUILTIN_RINTPD256,
27896 IX86_BUILTIN_ROUNDPD_AZ256,
27897
27898 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256,
27899 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256,
27900 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256,
27901
27902 IX86_BUILTIN_FLOORPS256,
27903 IX86_BUILTIN_CEILPS256,
27904 IX86_BUILTIN_TRUNCPS256,
27905 IX86_BUILTIN_RINTPS256,
27906 IX86_BUILTIN_ROUNDPS_AZ256,
27907
27908 IX86_BUILTIN_FLOORPS_SFIX256,
27909 IX86_BUILTIN_CEILPS_SFIX256,
27910 IX86_BUILTIN_ROUNDPS_AZ_SFIX256,
27911
27912 IX86_BUILTIN_UNPCKHPD256,
27913 IX86_BUILTIN_UNPCKLPD256,
27914 IX86_BUILTIN_UNPCKHPS256,
27915 IX86_BUILTIN_UNPCKLPS256,
27916
27917 IX86_BUILTIN_SI256_SI,
27918 IX86_BUILTIN_PS256_PS,
27919 IX86_BUILTIN_PD256_PD,
27920 IX86_BUILTIN_SI_SI256,
27921 IX86_BUILTIN_PS_PS256,
27922 IX86_BUILTIN_PD_PD256,
27923
27924 IX86_BUILTIN_VTESTZPD,
27925 IX86_BUILTIN_VTESTCPD,
27926 IX86_BUILTIN_VTESTNZCPD,
27927 IX86_BUILTIN_VTESTZPS,
27928 IX86_BUILTIN_VTESTCPS,
27929 IX86_BUILTIN_VTESTNZCPS,
27930 IX86_BUILTIN_VTESTZPD256,
27931 IX86_BUILTIN_VTESTCPD256,
27932 IX86_BUILTIN_VTESTNZCPD256,
27933 IX86_BUILTIN_VTESTZPS256,
27934 IX86_BUILTIN_VTESTCPS256,
27935 IX86_BUILTIN_VTESTNZCPS256,
27936 IX86_BUILTIN_PTESTZ256,
27937 IX86_BUILTIN_PTESTC256,
27938 IX86_BUILTIN_PTESTNZC256,
27939
27940 IX86_BUILTIN_MOVMSKPD256,
27941 IX86_BUILTIN_MOVMSKPS256,
27942
27943 /* AVX2 */
27944 IX86_BUILTIN_MPSADBW256,
27945 IX86_BUILTIN_PABSB256,
27946 IX86_BUILTIN_PABSW256,
27947 IX86_BUILTIN_PABSD256,
27948 IX86_BUILTIN_PACKSSDW256,
27949 IX86_BUILTIN_PACKSSWB256,
27950 IX86_BUILTIN_PACKUSDW256,
27951 IX86_BUILTIN_PACKUSWB256,
27952 IX86_BUILTIN_PADDB256,
27953 IX86_BUILTIN_PADDW256,
27954 IX86_BUILTIN_PADDD256,
27955 IX86_BUILTIN_PADDQ256,
27956 IX86_BUILTIN_PADDSB256,
27957 IX86_BUILTIN_PADDSW256,
27958 IX86_BUILTIN_PADDUSB256,
27959 IX86_BUILTIN_PADDUSW256,
27960 IX86_BUILTIN_PALIGNR256,
27961 IX86_BUILTIN_AND256I,
27962 IX86_BUILTIN_ANDNOT256I,
27963 IX86_BUILTIN_PAVGB256,
27964 IX86_BUILTIN_PAVGW256,
27965 IX86_BUILTIN_PBLENDVB256,
27966 IX86_BUILTIN_PBLENDVW256,
27967 IX86_BUILTIN_PCMPEQB256,
27968 IX86_BUILTIN_PCMPEQW256,
27969 IX86_BUILTIN_PCMPEQD256,
27970 IX86_BUILTIN_PCMPEQQ256,
27971 IX86_BUILTIN_PCMPGTB256,
27972 IX86_BUILTIN_PCMPGTW256,
27973 IX86_BUILTIN_PCMPGTD256,
27974 IX86_BUILTIN_PCMPGTQ256,
27975 IX86_BUILTIN_PHADDW256,
27976 IX86_BUILTIN_PHADDD256,
27977 IX86_BUILTIN_PHADDSW256,
27978 IX86_BUILTIN_PHSUBW256,
27979 IX86_BUILTIN_PHSUBD256,
27980 IX86_BUILTIN_PHSUBSW256,
27981 IX86_BUILTIN_PMADDUBSW256,
27982 IX86_BUILTIN_PMADDWD256,
27983 IX86_BUILTIN_PMAXSB256,
27984 IX86_BUILTIN_PMAXSW256,
27985 IX86_BUILTIN_PMAXSD256,
27986 IX86_BUILTIN_PMAXUB256,
27987 IX86_BUILTIN_PMAXUW256,
27988 IX86_BUILTIN_PMAXUD256,
27989 IX86_BUILTIN_PMINSB256,
27990 IX86_BUILTIN_PMINSW256,
27991 IX86_BUILTIN_PMINSD256,
27992 IX86_BUILTIN_PMINUB256,
27993 IX86_BUILTIN_PMINUW256,
27994 IX86_BUILTIN_PMINUD256,
27995 IX86_BUILTIN_PMOVMSKB256,
27996 IX86_BUILTIN_PMOVSXBW256,
27997 IX86_BUILTIN_PMOVSXBD256,
27998 IX86_BUILTIN_PMOVSXBQ256,
27999 IX86_BUILTIN_PMOVSXWD256,
28000 IX86_BUILTIN_PMOVSXWQ256,
28001 IX86_BUILTIN_PMOVSXDQ256,
28002 IX86_BUILTIN_PMOVZXBW256,
28003 IX86_BUILTIN_PMOVZXBD256,
28004 IX86_BUILTIN_PMOVZXBQ256,
28005 IX86_BUILTIN_PMOVZXWD256,
28006 IX86_BUILTIN_PMOVZXWQ256,
28007 IX86_BUILTIN_PMOVZXDQ256,
28008 IX86_BUILTIN_PMULDQ256,
28009 IX86_BUILTIN_PMULHRSW256,
28010 IX86_BUILTIN_PMULHUW256,
28011 IX86_BUILTIN_PMULHW256,
28012 IX86_BUILTIN_PMULLW256,
28013 IX86_BUILTIN_PMULLD256,
28014 IX86_BUILTIN_PMULUDQ256,
28015 IX86_BUILTIN_POR256,
28016 IX86_BUILTIN_PSADBW256,
28017 IX86_BUILTIN_PSHUFB256,
28018 IX86_BUILTIN_PSHUFD256,
28019 IX86_BUILTIN_PSHUFHW256,
28020 IX86_BUILTIN_PSHUFLW256,
28021 IX86_BUILTIN_PSIGNB256,
28022 IX86_BUILTIN_PSIGNW256,
28023 IX86_BUILTIN_PSIGND256,
28024 IX86_BUILTIN_PSLLDQI256,
28025 IX86_BUILTIN_PSLLWI256,
28026 IX86_BUILTIN_PSLLW256,
28027 IX86_BUILTIN_PSLLDI256,
28028 IX86_BUILTIN_PSLLD256,
28029 IX86_BUILTIN_PSLLQI256,
28030 IX86_BUILTIN_PSLLQ256,
28031 IX86_BUILTIN_PSRAWI256,
28032 IX86_BUILTIN_PSRAW256,
28033 IX86_BUILTIN_PSRADI256,
28034 IX86_BUILTIN_PSRAD256,
28035 IX86_BUILTIN_PSRLDQI256,
28036 IX86_BUILTIN_PSRLWI256,
28037 IX86_BUILTIN_PSRLW256,
28038 IX86_BUILTIN_PSRLDI256,
28039 IX86_BUILTIN_PSRLD256,
28040 IX86_BUILTIN_PSRLQI256,
28041 IX86_BUILTIN_PSRLQ256,
28042 IX86_BUILTIN_PSUBB256,
28043 IX86_BUILTIN_PSUBW256,
28044 IX86_BUILTIN_PSUBD256,
28045 IX86_BUILTIN_PSUBQ256,
28046 IX86_BUILTIN_PSUBSB256,
28047 IX86_BUILTIN_PSUBSW256,
28048 IX86_BUILTIN_PSUBUSB256,
28049 IX86_BUILTIN_PSUBUSW256,
28050 IX86_BUILTIN_PUNPCKHBW256,
28051 IX86_BUILTIN_PUNPCKHWD256,
28052 IX86_BUILTIN_PUNPCKHDQ256,
28053 IX86_BUILTIN_PUNPCKHQDQ256,
28054 IX86_BUILTIN_PUNPCKLBW256,
28055 IX86_BUILTIN_PUNPCKLWD256,
28056 IX86_BUILTIN_PUNPCKLDQ256,
28057 IX86_BUILTIN_PUNPCKLQDQ256,
28058 IX86_BUILTIN_PXOR256,
28059 IX86_BUILTIN_MOVNTDQA256,
28060 IX86_BUILTIN_VBROADCASTSS_PS,
28061 IX86_BUILTIN_VBROADCASTSS_PS256,
28062 IX86_BUILTIN_VBROADCASTSD_PD256,
28063 IX86_BUILTIN_VBROADCASTSI256,
28064 IX86_BUILTIN_PBLENDD256,
28065 IX86_BUILTIN_PBLENDD128,
28066 IX86_BUILTIN_PBROADCASTB256,
28067 IX86_BUILTIN_PBROADCASTW256,
28068 IX86_BUILTIN_PBROADCASTD256,
28069 IX86_BUILTIN_PBROADCASTQ256,
28070 IX86_BUILTIN_PBROADCASTB128,
28071 IX86_BUILTIN_PBROADCASTW128,
28072 IX86_BUILTIN_PBROADCASTD128,
28073 IX86_BUILTIN_PBROADCASTQ128,
28074 IX86_BUILTIN_VPERMVARSI256,
28075 IX86_BUILTIN_VPERMDF256,
28076 IX86_BUILTIN_VPERMVARSF256,
28077 IX86_BUILTIN_VPERMDI256,
28078 IX86_BUILTIN_VPERMTI256,
28079 IX86_BUILTIN_VEXTRACT128I256,
28080 IX86_BUILTIN_VINSERT128I256,
28081 IX86_BUILTIN_MASKLOADD,
28082 IX86_BUILTIN_MASKLOADQ,
28083 IX86_BUILTIN_MASKLOADD256,
28084 IX86_BUILTIN_MASKLOADQ256,
28085 IX86_BUILTIN_MASKSTORED,
28086 IX86_BUILTIN_MASKSTOREQ,
28087 IX86_BUILTIN_MASKSTORED256,
28088 IX86_BUILTIN_MASKSTOREQ256,
28089 IX86_BUILTIN_PSLLVV4DI,
28090 IX86_BUILTIN_PSLLVV2DI,
28091 IX86_BUILTIN_PSLLVV8SI,
28092 IX86_BUILTIN_PSLLVV4SI,
28093 IX86_BUILTIN_PSRAVV8SI,
28094 IX86_BUILTIN_PSRAVV4SI,
28095 IX86_BUILTIN_PSRLVV4DI,
28096 IX86_BUILTIN_PSRLVV2DI,
28097 IX86_BUILTIN_PSRLVV8SI,
28098 IX86_BUILTIN_PSRLVV4SI,
28099
28100 IX86_BUILTIN_GATHERSIV2DF,
28101 IX86_BUILTIN_GATHERSIV4DF,
28102 IX86_BUILTIN_GATHERDIV2DF,
28103 IX86_BUILTIN_GATHERDIV4DF,
28104 IX86_BUILTIN_GATHERSIV4SF,
28105 IX86_BUILTIN_GATHERSIV8SF,
28106 IX86_BUILTIN_GATHERDIV4SF,
28107 IX86_BUILTIN_GATHERDIV8SF,
28108 IX86_BUILTIN_GATHERSIV2DI,
28109 IX86_BUILTIN_GATHERSIV4DI,
28110 IX86_BUILTIN_GATHERDIV2DI,
28111 IX86_BUILTIN_GATHERDIV4DI,
28112 IX86_BUILTIN_GATHERSIV4SI,
28113 IX86_BUILTIN_GATHERSIV8SI,
28114 IX86_BUILTIN_GATHERDIV4SI,
28115 IX86_BUILTIN_GATHERDIV8SI,
28116
28117 /* AVX512F */
28118 IX86_BUILTIN_ADDPD512,
28119 IX86_BUILTIN_ADDPS512,
28120 IX86_BUILTIN_ADDSD_ROUND,
28121 IX86_BUILTIN_ADDSS_ROUND,
28122 IX86_BUILTIN_ALIGND512,
28123 IX86_BUILTIN_ALIGNQ512,
28124 IX86_BUILTIN_BLENDMD512,
28125 IX86_BUILTIN_BLENDMPD512,
28126 IX86_BUILTIN_BLENDMPS512,
28127 IX86_BUILTIN_BLENDMQ512,
28128 IX86_BUILTIN_BROADCASTF32X4_512,
28129 IX86_BUILTIN_BROADCASTF64X4_512,
28130 IX86_BUILTIN_BROADCASTI32X4_512,
28131 IX86_BUILTIN_BROADCASTI64X4_512,
28132 IX86_BUILTIN_BROADCASTSD512,
28133 IX86_BUILTIN_BROADCASTSS512,
28134 IX86_BUILTIN_CMPD512,
28135 IX86_BUILTIN_CMPPD512,
28136 IX86_BUILTIN_CMPPS512,
28137 IX86_BUILTIN_CMPQ512,
28138 IX86_BUILTIN_CMPSD_MASK,
28139 IX86_BUILTIN_CMPSS_MASK,
28140 IX86_BUILTIN_COMIDF,
28141 IX86_BUILTIN_COMISF,
28142 IX86_BUILTIN_COMPRESSPD512,
28143 IX86_BUILTIN_COMPRESSPDSTORE512,
28144 IX86_BUILTIN_COMPRESSPS512,
28145 IX86_BUILTIN_COMPRESSPSSTORE512,
28146 IX86_BUILTIN_CVTDQ2PD512,
28147 IX86_BUILTIN_CVTDQ2PS512,
28148 IX86_BUILTIN_CVTPD2DQ512,
28149 IX86_BUILTIN_CVTPD2PS512,
28150 IX86_BUILTIN_CVTPD2UDQ512,
28151 IX86_BUILTIN_CVTPH2PS512,
28152 IX86_BUILTIN_CVTPS2DQ512,
28153 IX86_BUILTIN_CVTPS2PD512,
28154 IX86_BUILTIN_CVTPS2PH512,
28155 IX86_BUILTIN_CVTPS2UDQ512,
28156 IX86_BUILTIN_CVTSD2SS_ROUND,
28157 IX86_BUILTIN_CVTSI2SD64,
28158 IX86_BUILTIN_CVTSI2SS32,
28159 IX86_BUILTIN_CVTSI2SS64,
28160 IX86_BUILTIN_CVTSS2SD_ROUND,
28161 IX86_BUILTIN_CVTTPD2DQ512,
28162 IX86_BUILTIN_CVTTPD2UDQ512,
28163 IX86_BUILTIN_CVTTPS2DQ512,
28164 IX86_BUILTIN_CVTTPS2UDQ512,
28165 IX86_BUILTIN_CVTUDQ2PD512,
28166 IX86_BUILTIN_CVTUDQ2PS512,
28167 IX86_BUILTIN_CVTUSI2SD32,
28168 IX86_BUILTIN_CVTUSI2SD64,
28169 IX86_BUILTIN_CVTUSI2SS32,
28170 IX86_BUILTIN_CVTUSI2SS64,
28171 IX86_BUILTIN_DIVPD512,
28172 IX86_BUILTIN_DIVPS512,
28173 IX86_BUILTIN_DIVSD_ROUND,
28174 IX86_BUILTIN_DIVSS_ROUND,
28175 IX86_BUILTIN_EXPANDPD512,
28176 IX86_BUILTIN_EXPANDPD512Z,
28177 IX86_BUILTIN_EXPANDPDLOAD512,
28178 IX86_BUILTIN_EXPANDPDLOAD512Z,
28179 IX86_BUILTIN_EXPANDPS512,
28180 IX86_BUILTIN_EXPANDPS512Z,
28181 IX86_BUILTIN_EXPANDPSLOAD512,
28182 IX86_BUILTIN_EXPANDPSLOAD512Z,
28183 IX86_BUILTIN_EXTRACTF32X4,
28184 IX86_BUILTIN_EXTRACTF64X4,
28185 IX86_BUILTIN_EXTRACTI32X4,
28186 IX86_BUILTIN_EXTRACTI64X4,
28187 IX86_BUILTIN_FIXUPIMMPD512_MASK,
28188 IX86_BUILTIN_FIXUPIMMPD512_MASKZ,
28189 IX86_BUILTIN_FIXUPIMMPS512_MASK,
28190 IX86_BUILTIN_FIXUPIMMPS512_MASKZ,
28191 IX86_BUILTIN_FIXUPIMMSD128_MASK,
28192 IX86_BUILTIN_FIXUPIMMSD128_MASKZ,
28193 IX86_BUILTIN_FIXUPIMMSS128_MASK,
28194 IX86_BUILTIN_FIXUPIMMSS128_MASKZ,
28195 IX86_BUILTIN_GETEXPPD512,
28196 IX86_BUILTIN_GETEXPPS512,
28197 IX86_BUILTIN_GETEXPSD128,
28198 IX86_BUILTIN_GETEXPSS128,
28199 IX86_BUILTIN_GETMANTPD512,
28200 IX86_BUILTIN_GETMANTPS512,
28201 IX86_BUILTIN_GETMANTSD128,
28202 IX86_BUILTIN_GETMANTSS128,
28203 IX86_BUILTIN_INSERTF32X4,
28204 IX86_BUILTIN_INSERTF64X4,
28205 IX86_BUILTIN_INSERTI32X4,
28206 IX86_BUILTIN_INSERTI64X4,
28207 IX86_BUILTIN_LOADAPD512,
28208 IX86_BUILTIN_LOADAPS512,
28209 IX86_BUILTIN_LOADDQUDI512,
28210 IX86_BUILTIN_LOADDQUSI512,
28211 IX86_BUILTIN_LOADUPD512,
28212 IX86_BUILTIN_LOADUPS512,
28213 IX86_BUILTIN_MAXPD512,
28214 IX86_BUILTIN_MAXPS512,
28215 IX86_BUILTIN_MAXSD_ROUND,
28216 IX86_BUILTIN_MAXSS_ROUND,
28217 IX86_BUILTIN_MINPD512,
28218 IX86_BUILTIN_MINPS512,
28219 IX86_BUILTIN_MINSD_ROUND,
28220 IX86_BUILTIN_MINSS_ROUND,
28221 IX86_BUILTIN_MOVAPD512,
28222 IX86_BUILTIN_MOVAPS512,
28223 IX86_BUILTIN_MOVDDUP512,
28224 IX86_BUILTIN_MOVDQA32LOAD512,
28225 IX86_BUILTIN_MOVDQA32STORE512,
28226 IX86_BUILTIN_MOVDQA32_512,
28227 IX86_BUILTIN_MOVDQA64LOAD512,
28228 IX86_BUILTIN_MOVDQA64STORE512,
28229 IX86_BUILTIN_MOVDQA64_512,
28230 IX86_BUILTIN_MOVNTDQ512,
28231 IX86_BUILTIN_MOVNTDQA512,
28232 IX86_BUILTIN_MOVNTPD512,
28233 IX86_BUILTIN_MOVNTPS512,
28234 IX86_BUILTIN_MOVSHDUP512,
28235 IX86_BUILTIN_MOVSLDUP512,
28236 IX86_BUILTIN_MULPD512,
28237 IX86_BUILTIN_MULPS512,
28238 IX86_BUILTIN_MULSD_ROUND,
28239 IX86_BUILTIN_MULSS_ROUND,
28240 IX86_BUILTIN_PABSD512,
28241 IX86_BUILTIN_PABSQ512,
28242 IX86_BUILTIN_PADDD512,
28243 IX86_BUILTIN_PADDQ512,
28244 IX86_BUILTIN_PANDD512,
28245 IX86_BUILTIN_PANDND512,
28246 IX86_BUILTIN_PANDNQ512,
28247 IX86_BUILTIN_PANDQ512,
28248 IX86_BUILTIN_PBROADCASTD512,
28249 IX86_BUILTIN_PBROADCASTD512_GPR,
28250 IX86_BUILTIN_PBROADCASTMB512,
28251 IX86_BUILTIN_PBROADCASTMW512,
28252 IX86_BUILTIN_PBROADCASTQ512,
28253 IX86_BUILTIN_PBROADCASTQ512_GPR,
28254 IX86_BUILTIN_PBROADCASTQ512_MEM,
28255 IX86_BUILTIN_PCMPEQD512_MASK,
28256 IX86_BUILTIN_PCMPEQQ512_MASK,
28257 IX86_BUILTIN_PCMPGTD512_MASK,
28258 IX86_BUILTIN_PCMPGTQ512_MASK,
28259 IX86_BUILTIN_PCOMPRESSD512,
28260 IX86_BUILTIN_PCOMPRESSDSTORE512,
28261 IX86_BUILTIN_PCOMPRESSQ512,
28262 IX86_BUILTIN_PCOMPRESSQSTORE512,
28263 IX86_BUILTIN_PEXPANDD512,
28264 IX86_BUILTIN_PEXPANDD512Z,
28265 IX86_BUILTIN_PEXPANDDLOAD512,
28266 IX86_BUILTIN_PEXPANDDLOAD512Z,
28267 IX86_BUILTIN_PEXPANDQ512,
28268 IX86_BUILTIN_PEXPANDQ512Z,
28269 IX86_BUILTIN_PEXPANDQLOAD512,
28270 IX86_BUILTIN_PEXPANDQLOAD512Z,
28271 IX86_BUILTIN_PMAXSD512,
28272 IX86_BUILTIN_PMAXSQ512,
28273 IX86_BUILTIN_PMAXUD512,
28274 IX86_BUILTIN_PMAXUQ512,
28275 IX86_BUILTIN_PMINSD512,
28276 IX86_BUILTIN_PMINSQ512,
28277 IX86_BUILTIN_PMINUD512,
28278 IX86_BUILTIN_PMINUQ512,
28279 IX86_BUILTIN_PMOVDB512,
28280 IX86_BUILTIN_PMOVDB512_MEM,
28281 IX86_BUILTIN_PMOVDW512,
28282 IX86_BUILTIN_PMOVDW512_MEM,
28283 IX86_BUILTIN_PMOVQB512,
28284 IX86_BUILTIN_PMOVQB512_MEM,
28285 IX86_BUILTIN_PMOVQD512,
28286 IX86_BUILTIN_PMOVQD512_MEM,
28287 IX86_BUILTIN_PMOVQW512,
28288 IX86_BUILTIN_PMOVQW512_MEM,
28289 IX86_BUILTIN_PMOVSDB512,
28290 IX86_BUILTIN_PMOVSDB512_MEM,
28291 IX86_BUILTIN_PMOVSDW512,
28292 IX86_BUILTIN_PMOVSDW512_MEM,
28293 IX86_BUILTIN_PMOVSQB512,
28294 IX86_BUILTIN_PMOVSQB512_MEM,
28295 IX86_BUILTIN_PMOVSQD512,
28296 IX86_BUILTIN_PMOVSQD512_MEM,
28297 IX86_BUILTIN_PMOVSQW512,
28298 IX86_BUILTIN_PMOVSQW512_MEM,
28299 IX86_BUILTIN_PMOVSXBD512,
28300 IX86_BUILTIN_PMOVSXBQ512,
28301 IX86_BUILTIN_PMOVSXDQ512,
28302 IX86_BUILTIN_PMOVSXWD512,
28303 IX86_BUILTIN_PMOVSXWQ512,
28304 IX86_BUILTIN_PMOVUSDB512,
28305 IX86_BUILTIN_PMOVUSDB512_MEM,
28306 IX86_BUILTIN_PMOVUSDW512,
28307 IX86_BUILTIN_PMOVUSDW512_MEM,
28308 IX86_BUILTIN_PMOVUSQB512,
28309 IX86_BUILTIN_PMOVUSQB512_MEM,
28310 IX86_BUILTIN_PMOVUSQD512,
28311 IX86_BUILTIN_PMOVUSQD512_MEM,
28312 IX86_BUILTIN_PMOVUSQW512,
28313 IX86_BUILTIN_PMOVUSQW512_MEM,
28314 IX86_BUILTIN_PMOVZXBD512,
28315 IX86_BUILTIN_PMOVZXBQ512,
28316 IX86_BUILTIN_PMOVZXDQ512,
28317 IX86_BUILTIN_PMOVZXWD512,
28318 IX86_BUILTIN_PMOVZXWQ512,
28319 IX86_BUILTIN_PMULDQ512,
28320 IX86_BUILTIN_PMULLD512,
28321 IX86_BUILTIN_PMULUDQ512,
28322 IX86_BUILTIN_PORD512,
28323 IX86_BUILTIN_PORQ512,
28324 IX86_BUILTIN_PROLD512,
28325 IX86_BUILTIN_PROLQ512,
28326 IX86_BUILTIN_PROLVD512,
28327 IX86_BUILTIN_PROLVQ512,
28328 IX86_BUILTIN_PRORD512,
28329 IX86_BUILTIN_PRORQ512,
28330 IX86_BUILTIN_PRORVD512,
28331 IX86_BUILTIN_PRORVQ512,
28332 IX86_BUILTIN_PSHUFD512,
28333 IX86_BUILTIN_PSLLD512,
28334 IX86_BUILTIN_PSLLDI512,
28335 IX86_BUILTIN_PSLLQ512,
28336 IX86_BUILTIN_PSLLQI512,
28337 IX86_BUILTIN_PSLLVV16SI,
28338 IX86_BUILTIN_PSLLVV8DI,
28339 IX86_BUILTIN_PSRAD512,
28340 IX86_BUILTIN_PSRADI512,
28341 IX86_BUILTIN_PSRAQ512,
28342 IX86_BUILTIN_PSRAQI512,
28343 IX86_BUILTIN_PSRAVV16SI,
28344 IX86_BUILTIN_PSRAVV8DI,
28345 IX86_BUILTIN_PSRLD512,
28346 IX86_BUILTIN_PSRLDI512,
28347 IX86_BUILTIN_PSRLQ512,
28348 IX86_BUILTIN_PSRLQI512,
28349 IX86_BUILTIN_PSRLVV16SI,
28350 IX86_BUILTIN_PSRLVV8DI,
28351 IX86_BUILTIN_PSUBD512,
28352 IX86_BUILTIN_PSUBQ512,
28353 IX86_BUILTIN_PTESTMD512,
28354 IX86_BUILTIN_PTESTMQ512,
28355 IX86_BUILTIN_PTESTNMD512,
28356 IX86_BUILTIN_PTESTNMQ512,
28357 IX86_BUILTIN_PUNPCKHDQ512,
28358 IX86_BUILTIN_PUNPCKHQDQ512,
28359 IX86_BUILTIN_PUNPCKLDQ512,
28360 IX86_BUILTIN_PUNPCKLQDQ512,
28361 IX86_BUILTIN_PXORD512,
28362 IX86_BUILTIN_PXORQ512,
28363 IX86_BUILTIN_RCP14PD512,
28364 IX86_BUILTIN_RCP14PS512,
28365 IX86_BUILTIN_RCP14SD,
28366 IX86_BUILTIN_RCP14SS,
28367 IX86_BUILTIN_RNDSCALEPD,
28368 IX86_BUILTIN_RNDSCALEPS,
28369 IX86_BUILTIN_RNDSCALESD,
28370 IX86_BUILTIN_RNDSCALESS,
28371 IX86_BUILTIN_RSQRT14PD512,
28372 IX86_BUILTIN_RSQRT14PS512,
28373 IX86_BUILTIN_RSQRT14SD,
28374 IX86_BUILTIN_RSQRT14SS,
28375 IX86_BUILTIN_SCALEFPD512,
28376 IX86_BUILTIN_SCALEFPS512,
28377 IX86_BUILTIN_SCALEFSD,
28378 IX86_BUILTIN_SCALEFSS,
28379 IX86_BUILTIN_SHUFPD512,
28380 IX86_BUILTIN_SHUFPS512,
28381 IX86_BUILTIN_SHUF_F32x4,
28382 IX86_BUILTIN_SHUF_F64x2,
28383 IX86_BUILTIN_SHUF_I32x4,
28384 IX86_BUILTIN_SHUF_I64x2,
28385 IX86_BUILTIN_SQRTPD512,
28386 IX86_BUILTIN_SQRTPD512_MASK,
28387 IX86_BUILTIN_SQRTPS512_MASK,
28388 IX86_BUILTIN_SQRTPS_NR512,
28389 IX86_BUILTIN_SQRTSD_ROUND,
28390 IX86_BUILTIN_SQRTSS_ROUND,
28391 IX86_BUILTIN_STOREAPD512,
28392 IX86_BUILTIN_STOREAPS512,
28393 IX86_BUILTIN_STOREDQUDI512,
28394 IX86_BUILTIN_STOREDQUSI512,
28395 IX86_BUILTIN_STOREUPD512,
28396 IX86_BUILTIN_STOREUPS512,
28397 IX86_BUILTIN_SUBPD512,
28398 IX86_BUILTIN_SUBPS512,
28399 IX86_BUILTIN_SUBSD_ROUND,
28400 IX86_BUILTIN_SUBSS_ROUND,
28401 IX86_BUILTIN_UCMPD512,
28402 IX86_BUILTIN_UCMPQ512,
28403 IX86_BUILTIN_UNPCKHPD512,
28404 IX86_BUILTIN_UNPCKHPS512,
28405 IX86_BUILTIN_UNPCKLPD512,
28406 IX86_BUILTIN_UNPCKLPS512,
28407 IX86_BUILTIN_VCVTSD2SI32,
28408 IX86_BUILTIN_VCVTSD2SI64,
28409 IX86_BUILTIN_VCVTSD2USI32,
28410 IX86_BUILTIN_VCVTSD2USI64,
28411 IX86_BUILTIN_VCVTSS2SI32,
28412 IX86_BUILTIN_VCVTSS2SI64,
28413 IX86_BUILTIN_VCVTSS2USI32,
28414 IX86_BUILTIN_VCVTSS2USI64,
28415 IX86_BUILTIN_VCVTTSD2SI32,
28416 IX86_BUILTIN_VCVTTSD2SI64,
28417 IX86_BUILTIN_VCVTTSD2USI32,
28418 IX86_BUILTIN_VCVTTSD2USI64,
28419 IX86_BUILTIN_VCVTTSS2SI32,
28420 IX86_BUILTIN_VCVTTSS2SI64,
28421 IX86_BUILTIN_VCVTTSS2USI32,
28422 IX86_BUILTIN_VCVTTSS2USI64,
28423 IX86_BUILTIN_VFMADDPD512_MASK,
28424 IX86_BUILTIN_VFMADDPD512_MASK3,
28425 IX86_BUILTIN_VFMADDPD512_MASKZ,
28426 IX86_BUILTIN_VFMADDPS512_MASK,
28427 IX86_BUILTIN_VFMADDPS512_MASK3,
28428 IX86_BUILTIN_VFMADDPS512_MASKZ,
28429 IX86_BUILTIN_VFMADDSD3_ROUND,
28430 IX86_BUILTIN_VFMADDSS3_ROUND,
28431 IX86_BUILTIN_VFMADDSUBPD512_MASK,
28432 IX86_BUILTIN_VFMADDSUBPD512_MASK3,
28433 IX86_BUILTIN_VFMADDSUBPD512_MASKZ,
28434 IX86_BUILTIN_VFMADDSUBPS512_MASK,
28435 IX86_BUILTIN_VFMADDSUBPS512_MASK3,
28436 IX86_BUILTIN_VFMADDSUBPS512_MASKZ,
28437 IX86_BUILTIN_VFMSUBADDPD512_MASK3,
28438 IX86_BUILTIN_VFMSUBADDPS512_MASK3,
28439 IX86_BUILTIN_VFMSUBPD512_MASK3,
28440 IX86_BUILTIN_VFMSUBPS512_MASK3,
28441 IX86_BUILTIN_VFMSUBSD3_MASK3,
28442 IX86_BUILTIN_VFMSUBSS3_MASK3,
28443 IX86_BUILTIN_VFNMADDPD512_MASK,
28444 IX86_BUILTIN_VFNMADDPS512_MASK,
28445 IX86_BUILTIN_VFNMSUBPD512_MASK,
28446 IX86_BUILTIN_VFNMSUBPD512_MASK3,
28447 IX86_BUILTIN_VFNMSUBPS512_MASK,
28448 IX86_BUILTIN_VFNMSUBPS512_MASK3,
28449 IX86_BUILTIN_VPCLZCNTD512,
28450 IX86_BUILTIN_VPCLZCNTQ512,
28451 IX86_BUILTIN_VPCONFLICTD512,
28452 IX86_BUILTIN_VPCONFLICTQ512,
28453 IX86_BUILTIN_VPERMDF512,
28454 IX86_BUILTIN_VPERMDI512,
28455 IX86_BUILTIN_VPERMI2VARD512,
28456 IX86_BUILTIN_VPERMI2VARPD512,
28457 IX86_BUILTIN_VPERMI2VARPS512,
28458 IX86_BUILTIN_VPERMI2VARQ512,
28459 IX86_BUILTIN_VPERMILPD512,
28460 IX86_BUILTIN_VPERMILPS512,
28461 IX86_BUILTIN_VPERMILVARPD512,
28462 IX86_BUILTIN_VPERMILVARPS512,
28463 IX86_BUILTIN_VPERMT2VARD512,
28464 IX86_BUILTIN_VPERMT2VARD512_MASKZ,
28465 IX86_BUILTIN_VPERMT2VARPD512,
28466 IX86_BUILTIN_VPERMT2VARPD512_MASKZ,
28467 IX86_BUILTIN_VPERMT2VARPS512,
28468 IX86_BUILTIN_VPERMT2VARPS512_MASKZ,
28469 IX86_BUILTIN_VPERMT2VARQ512,
28470 IX86_BUILTIN_VPERMT2VARQ512_MASKZ,
28471 IX86_BUILTIN_VPERMVARDF512,
28472 IX86_BUILTIN_VPERMVARDI512,
28473 IX86_BUILTIN_VPERMVARSF512,
28474 IX86_BUILTIN_VPERMVARSI512,
28475 IX86_BUILTIN_VTERNLOGD512_MASK,
28476 IX86_BUILTIN_VTERNLOGD512_MASKZ,
28477 IX86_BUILTIN_VTERNLOGQ512_MASK,
28478 IX86_BUILTIN_VTERNLOGQ512_MASKZ,
28479
28480 /* Mask arithmetic operations */
28481 IX86_BUILTIN_KAND16,
28482 IX86_BUILTIN_KANDN16,
28483 IX86_BUILTIN_KNOT16,
28484 IX86_BUILTIN_KOR16,
28485 IX86_BUILTIN_KORTESTC16,
28486 IX86_BUILTIN_KORTESTZ16,
28487 IX86_BUILTIN_KUNPCKBW,
28488 IX86_BUILTIN_KXNOR16,
28489 IX86_BUILTIN_KXOR16,
28490 IX86_BUILTIN_KMOV16,
28491
28492 /* Alternate 4 and 8 element gather/scatter for the vectorizer
28493 where all operands are 32-byte or 64-byte wide respectively. */
28494 IX86_BUILTIN_GATHERALTSIV4DF,
28495 IX86_BUILTIN_GATHERALTDIV8SF,
28496 IX86_BUILTIN_GATHERALTSIV4DI,
28497 IX86_BUILTIN_GATHERALTDIV8SI,
28498 IX86_BUILTIN_GATHER3ALTDIV16SF,
28499 IX86_BUILTIN_GATHER3ALTDIV16SI,
28500 IX86_BUILTIN_GATHER3ALTSIV8DF,
28501 IX86_BUILTIN_GATHER3ALTSIV8DI,
28502 IX86_BUILTIN_GATHER3DIV16SF,
28503 IX86_BUILTIN_GATHER3DIV16SI,
28504 IX86_BUILTIN_GATHER3DIV8DF,
28505 IX86_BUILTIN_GATHER3DIV8DI,
28506 IX86_BUILTIN_GATHER3SIV16SF,
28507 IX86_BUILTIN_GATHER3SIV16SI,
28508 IX86_BUILTIN_GATHER3SIV8DF,
28509 IX86_BUILTIN_GATHER3SIV8DI,
28510 IX86_BUILTIN_SCATTERDIV16SF,
28511 IX86_BUILTIN_SCATTERDIV16SI,
28512 IX86_BUILTIN_SCATTERDIV8DF,
28513 IX86_BUILTIN_SCATTERDIV8DI,
28514 IX86_BUILTIN_SCATTERSIV16SF,
28515 IX86_BUILTIN_SCATTERSIV16SI,
28516 IX86_BUILTIN_SCATTERSIV8DF,
28517 IX86_BUILTIN_SCATTERSIV8DI,
28518
28519 /* AVX512PF */
28520 IX86_BUILTIN_GATHERPFQPD,
28521 IX86_BUILTIN_GATHERPFDPS,
28522 IX86_BUILTIN_GATHERPFDPD,
28523 IX86_BUILTIN_GATHERPFQPS,
28524 IX86_BUILTIN_SCATTERPFDPD,
28525 IX86_BUILTIN_SCATTERPFDPS,
28526 IX86_BUILTIN_SCATTERPFQPD,
28527 IX86_BUILTIN_SCATTERPFQPS,
28528
28529 /* AVX-512ER */
28530 IX86_BUILTIN_EXP2PD_MASK,
28531 IX86_BUILTIN_EXP2PS_MASK,
28532 IX86_BUILTIN_EXP2PS,
28533 IX86_BUILTIN_RCP28PD,
28534 IX86_BUILTIN_RCP28PS,
28535 IX86_BUILTIN_RCP28SD,
28536 IX86_BUILTIN_RCP28SS,
28537 IX86_BUILTIN_RSQRT28PD,
28538 IX86_BUILTIN_RSQRT28PS,
28539 IX86_BUILTIN_RSQRT28SD,
28540 IX86_BUILTIN_RSQRT28SS,
28541
28542 /* SHA builtins. */
28543 IX86_BUILTIN_SHA1MSG1,
28544 IX86_BUILTIN_SHA1MSG2,
28545 IX86_BUILTIN_SHA1NEXTE,
28546 IX86_BUILTIN_SHA1RNDS4,
28547 IX86_BUILTIN_SHA256MSG1,
28548 IX86_BUILTIN_SHA256MSG2,
28549 IX86_BUILTIN_SHA256RNDS2,
28550
28551 /* CLFLUSHOPT instructions. */
28552 IX86_BUILTIN_CLFLUSHOPT,
28553
28554 /* TFmode support builtins. */
28555 IX86_BUILTIN_INFQ,
28556 IX86_BUILTIN_HUGE_VALQ,
28557 IX86_BUILTIN_FABSQ,
28558 IX86_BUILTIN_COPYSIGNQ,
28559
28560 /* Vectorizer support builtins. */
28561 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512,
28562 IX86_BUILTIN_CPYSGNPS,
28563 IX86_BUILTIN_CPYSGNPD,
28564 IX86_BUILTIN_CPYSGNPS256,
28565 IX86_BUILTIN_CPYSGNPS512,
28566 IX86_BUILTIN_CPYSGNPD256,
28567 IX86_BUILTIN_CPYSGNPD512,
28568 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512,
28569 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512,
28570
28571
28572 /* FMA4 instructions. */
28573 IX86_BUILTIN_VFMADDSS,
28574 IX86_BUILTIN_VFMADDSD,
28575 IX86_BUILTIN_VFMADDPS,
28576 IX86_BUILTIN_VFMADDPD,
28577 IX86_BUILTIN_VFMADDPS256,
28578 IX86_BUILTIN_VFMADDPD256,
28579 IX86_BUILTIN_VFMADDSUBPS,
28580 IX86_BUILTIN_VFMADDSUBPD,
28581 IX86_BUILTIN_VFMADDSUBPS256,
28582 IX86_BUILTIN_VFMADDSUBPD256,
28583
28584 /* FMA3 instructions. */
28585 IX86_BUILTIN_VFMADDSS3,
28586 IX86_BUILTIN_VFMADDSD3,
28587
28588 /* XOP instructions. */
28589 IX86_BUILTIN_VPCMOV,
28590 IX86_BUILTIN_VPCMOV_V2DI,
28591 IX86_BUILTIN_VPCMOV_V4SI,
28592 IX86_BUILTIN_VPCMOV_V8HI,
28593 IX86_BUILTIN_VPCMOV_V16QI,
28594 IX86_BUILTIN_VPCMOV_V4SF,
28595 IX86_BUILTIN_VPCMOV_V2DF,
28596 IX86_BUILTIN_VPCMOV256,
28597 IX86_BUILTIN_VPCMOV_V4DI256,
28598 IX86_BUILTIN_VPCMOV_V8SI256,
28599 IX86_BUILTIN_VPCMOV_V16HI256,
28600 IX86_BUILTIN_VPCMOV_V32QI256,
28601 IX86_BUILTIN_VPCMOV_V8SF256,
28602 IX86_BUILTIN_VPCMOV_V4DF256,
28603
28604 IX86_BUILTIN_VPPERM,
28605
28606 IX86_BUILTIN_VPMACSSWW,
28607 IX86_BUILTIN_VPMACSWW,
28608 IX86_BUILTIN_VPMACSSWD,
28609 IX86_BUILTIN_VPMACSWD,
28610 IX86_BUILTIN_VPMACSSDD,
28611 IX86_BUILTIN_VPMACSDD,
28612 IX86_BUILTIN_VPMACSSDQL,
28613 IX86_BUILTIN_VPMACSSDQH,
28614 IX86_BUILTIN_VPMACSDQL,
28615 IX86_BUILTIN_VPMACSDQH,
28616 IX86_BUILTIN_VPMADCSSWD,
28617 IX86_BUILTIN_VPMADCSWD,
28618
28619 IX86_BUILTIN_VPHADDBW,
28620 IX86_BUILTIN_VPHADDBD,
28621 IX86_BUILTIN_VPHADDBQ,
28622 IX86_BUILTIN_VPHADDWD,
28623 IX86_BUILTIN_VPHADDWQ,
28624 IX86_BUILTIN_VPHADDDQ,
28625 IX86_BUILTIN_VPHADDUBW,
28626 IX86_BUILTIN_VPHADDUBD,
28627 IX86_BUILTIN_VPHADDUBQ,
28628 IX86_BUILTIN_VPHADDUWD,
28629 IX86_BUILTIN_VPHADDUWQ,
28630 IX86_BUILTIN_VPHADDUDQ,
28631 IX86_BUILTIN_VPHSUBBW,
28632 IX86_BUILTIN_VPHSUBWD,
28633 IX86_BUILTIN_VPHSUBDQ,
28634
28635 IX86_BUILTIN_VPROTB,
28636 IX86_BUILTIN_VPROTW,
28637 IX86_BUILTIN_VPROTD,
28638 IX86_BUILTIN_VPROTQ,
28639 IX86_BUILTIN_VPROTB_IMM,
28640 IX86_BUILTIN_VPROTW_IMM,
28641 IX86_BUILTIN_VPROTD_IMM,
28642 IX86_BUILTIN_VPROTQ_IMM,
28643
28644 IX86_BUILTIN_VPSHLB,
28645 IX86_BUILTIN_VPSHLW,
28646 IX86_BUILTIN_VPSHLD,
28647 IX86_BUILTIN_VPSHLQ,
28648 IX86_BUILTIN_VPSHAB,
28649 IX86_BUILTIN_VPSHAW,
28650 IX86_BUILTIN_VPSHAD,
28651 IX86_BUILTIN_VPSHAQ,
28652
28653 IX86_BUILTIN_VFRCZSS,
28654 IX86_BUILTIN_VFRCZSD,
28655 IX86_BUILTIN_VFRCZPS,
28656 IX86_BUILTIN_VFRCZPD,
28657 IX86_BUILTIN_VFRCZPS256,
28658 IX86_BUILTIN_VFRCZPD256,
28659
28660 IX86_BUILTIN_VPCOMEQUB,
28661 IX86_BUILTIN_VPCOMNEUB,
28662 IX86_BUILTIN_VPCOMLTUB,
28663 IX86_BUILTIN_VPCOMLEUB,
28664 IX86_BUILTIN_VPCOMGTUB,
28665 IX86_BUILTIN_VPCOMGEUB,
28666 IX86_BUILTIN_VPCOMFALSEUB,
28667 IX86_BUILTIN_VPCOMTRUEUB,
28668
28669 IX86_BUILTIN_VPCOMEQUW,
28670 IX86_BUILTIN_VPCOMNEUW,
28671 IX86_BUILTIN_VPCOMLTUW,
28672 IX86_BUILTIN_VPCOMLEUW,
28673 IX86_BUILTIN_VPCOMGTUW,
28674 IX86_BUILTIN_VPCOMGEUW,
28675 IX86_BUILTIN_VPCOMFALSEUW,
28676 IX86_BUILTIN_VPCOMTRUEUW,
28677
28678 IX86_BUILTIN_VPCOMEQUD,
28679 IX86_BUILTIN_VPCOMNEUD,
28680 IX86_BUILTIN_VPCOMLTUD,
28681 IX86_BUILTIN_VPCOMLEUD,
28682 IX86_BUILTIN_VPCOMGTUD,
28683 IX86_BUILTIN_VPCOMGEUD,
28684 IX86_BUILTIN_VPCOMFALSEUD,
28685 IX86_BUILTIN_VPCOMTRUEUD,
28686
28687 IX86_BUILTIN_VPCOMEQUQ,
28688 IX86_BUILTIN_VPCOMNEUQ,
28689 IX86_BUILTIN_VPCOMLTUQ,
28690 IX86_BUILTIN_VPCOMLEUQ,
28691 IX86_BUILTIN_VPCOMGTUQ,
28692 IX86_BUILTIN_VPCOMGEUQ,
28693 IX86_BUILTIN_VPCOMFALSEUQ,
28694 IX86_BUILTIN_VPCOMTRUEUQ,
28695
28696 IX86_BUILTIN_VPCOMEQB,
28697 IX86_BUILTIN_VPCOMNEB,
28698 IX86_BUILTIN_VPCOMLTB,
28699 IX86_BUILTIN_VPCOMLEB,
28700 IX86_BUILTIN_VPCOMGTB,
28701 IX86_BUILTIN_VPCOMGEB,
28702 IX86_BUILTIN_VPCOMFALSEB,
28703 IX86_BUILTIN_VPCOMTRUEB,
28704
28705 IX86_BUILTIN_VPCOMEQW,
28706 IX86_BUILTIN_VPCOMNEW,
28707 IX86_BUILTIN_VPCOMLTW,
28708 IX86_BUILTIN_VPCOMLEW,
28709 IX86_BUILTIN_VPCOMGTW,
28710 IX86_BUILTIN_VPCOMGEW,
28711 IX86_BUILTIN_VPCOMFALSEW,
28712 IX86_BUILTIN_VPCOMTRUEW,
28713
28714 IX86_BUILTIN_VPCOMEQD,
28715 IX86_BUILTIN_VPCOMNED,
28716 IX86_BUILTIN_VPCOMLTD,
28717 IX86_BUILTIN_VPCOMLED,
28718 IX86_BUILTIN_VPCOMGTD,
28719 IX86_BUILTIN_VPCOMGED,
28720 IX86_BUILTIN_VPCOMFALSED,
28721 IX86_BUILTIN_VPCOMTRUED,
28722
28723 IX86_BUILTIN_VPCOMEQQ,
28724 IX86_BUILTIN_VPCOMNEQ,
28725 IX86_BUILTIN_VPCOMLTQ,
28726 IX86_BUILTIN_VPCOMLEQ,
28727 IX86_BUILTIN_VPCOMGTQ,
28728 IX86_BUILTIN_VPCOMGEQ,
28729 IX86_BUILTIN_VPCOMFALSEQ,
28730 IX86_BUILTIN_VPCOMTRUEQ,
28731
28732 /* LWP instructions. */
28733 IX86_BUILTIN_LLWPCB,
28734 IX86_BUILTIN_SLWPCB,
28735 IX86_BUILTIN_LWPVAL32,
28736 IX86_BUILTIN_LWPVAL64,
28737 IX86_BUILTIN_LWPINS32,
28738 IX86_BUILTIN_LWPINS64,
28739
28740 IX86_BUILTIN_CLZS,
28741
28742 /* RTM */
28743 IX86_BUILTIN_XBEGIN,
28744 IX86_BUILTIN_XEND,
28745 IX86_BUILTIN_XABORT,
28746 IX86_BUILTIN_XTEST,
28747
28748 /* BMI instructions. */
28749 IX86_BUILTIN_BEXTR32,
28750 IX86_BUILTIN_BEXTR64,
28751 IX86_BUILTIN_CTZS,
28752
28753 /* TBM instructions. */
28754 IX86_BUILTIN_BEXTRI32,
28755 IX86_BUILTIN_BEXTRI64,
28756
28757 /* BMI2 instructions. */
28758 IX86_BUILTIN_BZHI32,
28759 IX86_BUILTIN_BZHI64,
28760 IX86_BUILTIN_PDEP32,
28761 IX86_BUILTIN_PDEP64,
28762 IX86_BUILTIN_PEXT32,
28763 IX86_BUILTIN_PEXT64,
28764
28765 /* ADX instructions. */
28766 IX86_BUILTIN_ADDCARRYX32,
28767 IX86_BUILTIN_ADDCARRYX64,
28768
28769 /* FSGSBASE instructions. */
28770 IX86_BUILTIN_RDFSBASE32,
28771 IX86_BUILTIN_RDFSBASE64,
28772 IX86_BUILTIN_RDGSBASE32,
28773 IX86_BUILTIN_RDGSBASE64,
28774 IX86_BUILTIN_WRFSBASE32,
28775 IX86_BUILTIN_WRFSBASE64,
28776 IX86_BUILTIN_WRGSBASE32,
28777 IX86_BUILTIN_WRGSBASE64,
28778
28779 /* RDRND instructions. */
28780 IX86_BUILTIN_RDRAND16_STEP,
28781 IX86_BUILTIN_RDRAND32_STEP,
28782 IX86_BUILTIN_RDRAND64_STEP,
28783
28784 /* RDSEED instructions. */
28785 IX86_BUILTIN_RDSEED16_STEP,
28786 IX86_BUILTIN_RDSEED32_STEP,
28787 IX86_BUILTIN_RDSEED64_STEP,
28788
28789 /* F16C instructions. */
28790 IX86_BUILTIN_CVTPH2PS,
28791 IX86_BUILTIN_CVTPH2PS256,
28792 IX86_BUILTIN_CVTPS2PH,
28793 IX86_BUILTIN_CVTPS2PH256,
28794
28795 /* CFString built-in for darwin */
28796 IX86_BUILTIN_CFSTRING,
28797
28798 /* Builtins to get CPU type and supported features. */
28799 IX86_BUILTIN_CPU_INIT,
28800 IX86_BUILTIN_CPU_IS,
28801 IX86_BUILTIN_CPU_SUPPORTS,
28802
28803 /* Read/write FLAGS register built-ins. */
28804 IX86_BUILTIN_READ_FLAGS,
28805 IX86_BUILTIN_WRITE_FLAGS,
28806
28807 IX86_BUILTIN_MAX
28808 };
28809
28810 /* Table for the ix86 builtin decls. */
28811 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
28812
28813 /* Table of all of the builtin functions that are possible with different ISA's
28814 but are waiting to be built until a function is declared to use that
28815 ISA. */
28816 struct builtin_isa {
28817 const char *name; /* function name */
28818 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
28819 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
28820 bool const_p; /* true if the declaration is constant */
28821 bool set_and_not_built_p;
28822 };
28823
28824 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
28825
28826
28827 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
28828 of which isa_flags to use in the ix86_builtins_isa array. Stores the
28829 function decl in the ix86_builtins array. Returns the function decl or
28830 NULL_TREE, if the builtin was not added.
28831
28832 If the front end has a special hook for builtin functions, delay adding
28833 builtin functions that aren't in the current ISA until the ISA is changed
28834 with function specific optimization. Doing so, can save about 300K for the
28835 default compiler. When the builtin is expanded, check at that time whether
28836 it is valid.
28837
28838 If the front end doesn't have a special hook, record all builtins, even if
28839 it isn't an instruction set in the current ISA in case the user uses
28840 function specific options for a different ISA, so that we don't get scope
28841 errors if a builtin is added in the middle of a function scope. */
28842
28843 static inline tree
28844 def_builtin (HOST_WIDE_INT mask, const char *name,
28845 enum ix86_builtin_func_type tcode,
28846 enum ix86_builtins code)
28847 {
28848 tree decl = NULL_TREE;
28849
28850 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
28851 {
28852 ix86_builtins_isa[(int) code].isa = mask;
28853
28854 mask &= ~OPTION_MASK_ISA_64BIT;
28855 if (mask == 0
28856 || (mask & ix86_isa_flags) != 0
28857 || (lang_hooks.builtin_function
28858 == lang_hooks.builtin_function_ext_scope))
28859
28860 {
28861 tree type = ix86_get_builtin_func_type (tcode);
28862 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
28863 NULL, NULL_TREE);
28864 ix86_builtins[(int) code] = decl;
28865 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
28866 }
28867 else
28868 {
28869 ix86_builtins[(int) code] = NULL_TREE;
28870 ix86_builtins_isa[(int) code].tcode = tcode;
28871 ix86_builtins_isa[(int) code].name = name;
28872 ix86_builtins_isa[(int) code].const_p = false;
28873 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
28874 }
28875 }
28876
28877 return decl;
28878 }
28879
28880 /* Like def_builtin, but also marks the function decl "const". */
28881
28882 static inline tree
28883 def_builtin_const (HOST_WIDE_INT mask, const char *name,
28884 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
28885 {
28886 tree decl = def_builtin (mask, name, tcode, code);
28887 if (decl)
28888 TREE_READONLY (decl) = 1;
28889 else
28890 ix86_builtins_isa[(int) code].const_p = true;
28891
28892 return decl;
28893 }
28894
28895 /* Add any new builtin functions for a given ISA that may not have been
28896 declared. This saves a bit of space compared to adding all of the
28897 declarations to the tree, even if we didn't use them. */
28898
28899 static void
28900 ix86_add_new_builtins (HOST_WIDE_INT isa)
28901 {
28902 int i;
28903
28904 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
28905 {
28906 if ((ix86_builtins_isa[i].isa & isa) != 0
28907 && ix86_builtins_isa[i].set_and_not_built_p)
28908 {
28909 tree decl, type;
28910
28911 /* Don't define the builtin again. */
28912 ix86_builtins_isa[i].set_and_not_built_p = false;
28913
28914 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
28915 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
28916 type, i, BUILT_IN_MD, NULL,
28917 NULL_TREE);
28918
28919 ix86_builtins[i] = decl;
28920 if (ix86_builtins_isa[i].const_p)
28921 TREE_READONLY (decl) = 1;
28922 }
28923 }
28924 }
28925
28926 /* Bits for builtin_description.flag. */
28927
28928 /* Set when we don't support the comparison natively, and should
28929 swap_comparison in order to support it. */
28930 #define BUILTIN_DESC_SWAP_OPERANDS 1
28931
28932 struct builtin_description
28933 {
28934 const HOST_WIDE_INT mask;
28935 const enum insn_code icode;
28936 const char *const name;
28937 const enum ix86_builtins code;
28938 const enum rtx_code comparison;
28939 const int flag;
28940 };
28941
28942 static const struct builtin_description bdesc_comi[] =
28943 {
28944 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
28945 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
28946 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
28947 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
28948 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
28949 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
28950 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
28951 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
28952 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
28953 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
28954 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
28955 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
28956 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
28957 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
28958 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
28959 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
28960 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
28961 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
28962 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
28963 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
28964 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
28965 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
28966 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
28967 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
28968 };
28969
28970 static const struct builtin_description bdesc_pcmpestr[] =
28971 {
28972 /* SSE4.2 */
28973 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
28974 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
28975 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
28976 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
28977 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
28978 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
28979 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
28980 };
28981
28982 static const struct builtin_description bdesc_pcmpistr[] =
28983 {
28984 /* SSE4.2 */
28985 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
28986 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
28987 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
28988 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
28989 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
28990 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
28991 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
28992 };
28993
28994 /* Special builtins with variable number of arguments. */
28995 static const struct builtin_description bdesc_special_args[] =
28996 {
28997 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
28998 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
28999 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
29000
29001 /* 80387 (for use internally for atomic compound assignment). */
29002 { 0, CODE_FOR_fnstenv, "__builtin_ia32_fnstenv", IX86_BUILTIN_FNSTENV, UNKNOWN, (int) VOID_FTYPE_PVOID },
29003 { 0, CODE_FOR_fldenv, "__builtin_ia32_fldenv", IX86_BUILTIN_FLDENV, UNKNOWN, (int) VOID_FTYPE_PCVOID },
29004 { 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW, UNKNOWN, (int) USHORT_FTYPE_VOID },
29005 { 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX, UNKNOWN, (int) VOID_FTYPE_VOID },
29006
29007 /* MMX */
29008 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
29009
29010 /* 3DNow! */
29011 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
29012
29013 /* FXSR, XSAVE, XSAVEOPT, XSAVEC and XSAVES. */
29014 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID },
29015 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID },
29016 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29017 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29018 { OPTION_MASK_ISA_XSAVEOPT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29019 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xsaves", IX86_BUILTIN_XSAVES, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29020 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xrstors", IX86_BUILTIN_XRSTORS, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29021 { OPTION_MASK_ISA_XSAVEC, CODE_FOR_nothing, "__builtin_ia32_xsavec", IX86_BUILTIN_XSAVEC, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29022
29023 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID },
29024 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID },
29025 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29026 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29027 { OPTION_MASK_ISA_XSAVEOPT | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29028 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaves64", IX86_BUILTIN_XSAVES64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29029 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstors64", IX86_BUILTIN_XRSTORS64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29030 { OPTION_MASK_ISA_XSAVEC | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsavec64", IX86_BUILTIN_XSAVEC64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29031
29032 /* SSE */
29033 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storeups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
29034 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
29035 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
29036
29037 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
29038 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
29039 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
29040 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
29041
29042 /* SSE or 3DNow!A */
29043 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
29044 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntq, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
29045
29046 /* SSE2 */
29047 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
29048 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
29049 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storeupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
29050 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedquv16qi, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
29051 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
29052 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
29053 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
29054 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG },
29055 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
29056 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddquv16qi, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
29057
29058 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
29059 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
29060
29061 /* SSE3 */
29062 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
29063
29064 /* SSE4.1 */
29065 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
29066
29067 /* SSE4A */
29068 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
29069 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
29070
29071 /* AVX */
29072 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
29073 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
29074
29075 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
29076 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
29077 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
29078 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
29079 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
29080
29081 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
29082 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
29083 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
29084 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
29085 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddquv32qi, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
29086 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedquv32qi, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
29087 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
29088
29089 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
29090 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
29091 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
29092
29093 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
29094 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
29095 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
29096 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
29097 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
29098 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
29099 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
29100 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
29101
29102 /* AVX2 */
29103 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_movntdqa, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256, UNKNOWN, (int) V4DI_FTYPE_PV4DI },
29104 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI },
29105 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI },
29106 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd256, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI },
29107 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq256, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI },
29108 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_V4SI },
29109 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_V2DI },
29110 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI },
29111 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI },
29112
29113 /* AVX512F */
29114 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16sf_mask, "__builtin_ia32_compressstoresf512_mask", IX86_BUILTIN_COMPRESSPSSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
29115 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16si_mask, "__builtin_ia32_compressstoresi512_mask", IX86_BUILTIN_PCOMPRESSDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
29116 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8df_mask, "__builtin_ia32_compressstoredf512_mask", IX86_BUILTIN_COMPRESSPDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
29117 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8di_mask, "__builtin_ia32_compressstoredi512_mask", IX86_BUILTIN_PCOMPRESSQSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
29118 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandloadsf512_mask", IX86_BUILTIN_EXPANDPSLOAD512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29119 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandloadsf512_maskz", IX86_BUILTIN_EXPANDPSLOAD512Z, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29120 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandloadsi512_mask", IX86_BUILTIN_PEXPANDDLOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29121 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandloadsi512_maskz", IX86_BUILTIN_PEXPANDDLOAD512Z, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29122 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expandloaddf512_mask", IX86_BUILTIN_EXPANDPDLOAD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29123 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expandloaddf512_maskz", IX86_BUILTIN_EXPANDPDLOAD512Z, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29124 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expandloaddi512_mask", IX86_BUILTIN_PEXPANDQLOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29125 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expandloaddi512_maskz", IX86_BUILTIN_PEXPANDQLOAD512Z, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29126 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv16si_mask, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29127 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv8di_mask, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29128 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadupd512_mask, "__builtin_ia32_loadupd512_mask", IX86_BUILTIN_LOADUPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29129 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadups512_mask, "__builtin_ia32_loadups512_mask", IX86_BUILTIN_LOADUPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29130 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadaps512_mask", IX86_BUILTIN_LOADAPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29131 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32load512_mask", IX86_BUILTIN_MOVDQA32LOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29132 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadapd512_mask", IX86_BUILTIN_LOADAPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29133 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64load512_mask", IX86_BUILTIN_MOVDQA64LOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29134 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv16sf, "__builtin_ia32_movntps512", IX86_BUILTIN_MOVNTPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF },
29135 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8df, "__builtin_ia32_movntpd512", IX86_BUILTIN_MOVNTPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF },
29136 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8di, "__builtin_ia32_movntdq512", IX86_BUILTIN_MOVNTDQ512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI },
29137 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntdqa, "__builtin_ia32_movntdqa512", IX86_BUILTIN_MOVNTDQA512, UNKNOWN, (int) V8DI_FTYPE_PV8DI },
29138 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
29139 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
29140 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeupd512_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
29141 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask_store, "__builtin_ia32_pmovusqd512mem_mask", IX86_BUILTIN_PMOVUSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
29142 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask_store, "__builtin_ia32_pmovsqd512mem_mask", IX86_BUILTIN_PMOVSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
29143 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask_store, "__builtin_ia32_pmovqd512mem_mask", IX86_BUILTIN_PMOVQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
29144 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovusqw512mem_mask", IX86_BUILTIN_PMOVUSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
29145 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovsqw512mem_mask", IX86_BUILTIN_PMOVSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
29146 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovqw512mem_mask", IX86_BUILTIN_PMOVQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
29147 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovusdw512mem_mask", IX86_BUILTIN_PMOVUSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
29148 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovsdw512mem_mask", IX86_BUILTIN_PMOVSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
29149 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovdw512mem_mask", IX86_BUILTIN_PMOVDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
29150 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovqb512mem_mask", IX86_BUILTIN_PMOVQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
29151 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovusqb512mem_mask", IX86_BUILTIN_PMOVUSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
29152 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovsqb512mem_mask", IX86_BUILTIN_PMOVSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
29153 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovusdb512mem_mask", IX86_BUILTIN_PMOVUSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
29154 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovsdb512mem_mask", IX86_BUILTIN_PMOVSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
29155 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovdb512mem_mask", IX86_BUILTIN_PMOVDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
29156 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeups512_mask, "__builtin_ia32_storeups512_mask", IX86_BUILTIN_STOREUPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
29157 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeaps512_mask", IX86_BUILTIN_STOREAPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
29158 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_movdqa32store512_mask", IX86_BUILTIN_MOVDQA32STORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
29159 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeapd512_mask", IX86_BUILTIN_STOREAPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
29160 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_movdqa64store512_mask", IX86_BUILTIN_MOVDQA64STORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
29161
29162 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
29163 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
29164 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
29165 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
29166 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
29167 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
29168
29169 /* FSGSBASE */
29170 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
29171 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
29172 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
29173 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
29174 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
29175 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
29176 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
29177 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
29178
29179 /* RTM */
29180 { OPTION_MASK_ISA_RTM, CODE_FOR_xbegin, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
29181 { OPTION_MASK_ISA_RTM, CODE_FOR_xend, "__builtin_ia32_xend", IX86_BUILTIN_XEND, UNKNOWN, (int) VOID_FTYPE_VOID },
29182 { OPTION_MASK_ISA_RTM, CODE_FOR_xtest, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST, UNKNOWN, (int) INT_FTYPE_VOID },
29183 };
29184
29185 /* Builtins with variable number of arguments. */
29186 static const struct builtin_description bdesc_args[] =
29187 {
29188 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
29189 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
29190 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
29191 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
29192 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
29193 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
29194 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
29195
29196 /* MMX */
29197 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29198 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29199 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29200 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29201 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29202 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29203
29204 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29205 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29206 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29207 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29208 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29209 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29210 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29211 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29212
29213 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29214 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29215
29216 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29217 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29218 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29219 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29220
29221 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29222 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29223 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29224 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29225 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29226 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29227
29228 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29229 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29230 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29231 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29232 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
29233 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
29234
29235 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
29236 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
29237 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
29238
29239 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
29240
29241 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
29242 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
29243 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
29244 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
29245 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
29246 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
29247
29248 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
29249 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
29250 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
29251 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
29252 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
29253 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
29254
29255 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
29256 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
29257 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
29258 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
29259
29260 /* 3DNow! */
29261 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
29262 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
29263 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
29264 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
29265
29266 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29267 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29268 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29269 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
29270 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
29271 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
29272 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29273 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29274 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29275 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29276 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29277 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29278 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29279 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29280 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29281
29282 /* 3DNow!A */
29283 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
29284 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
29285 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
29286 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
29287 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29288 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29289
29290 /* SSE */
29291 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
29292 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29293 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29294 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29295 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29296 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29297 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
29298 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
29299 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
29300 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
29301 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
29302 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
29303
29304 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29305
29306 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29307 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29308 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29309 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29310 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29311 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29312 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29313 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29314
29315 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
29316 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
29317 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
29318 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
29319 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
29320 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29321 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
29322 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
29323 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
29324 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
29325 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
29326 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29327 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
29328 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
29329 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
29330 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29331 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
29332 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
29333 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
29334 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29335
29336 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29337 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29338 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29339 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29340
29341 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29342 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29343 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29344 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29345
29346 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29347
29348 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29349 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29350 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29351 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29352 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29353
29354 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
29355 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
29356 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
29357
29358 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
29359
29360 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
29361 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
29362 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
29363
29364 { OPTION_MASK_ISA_SSE, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
29365 { OPTION_MASK_ISA_SSE, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
29366
29367 /* SSE MMX or 3Dnow!A */
29368 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29369 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29370 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29371
29372 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29373 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29374 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29375 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29376
29377 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
29378 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
29379
29380 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
29381
29382 /* SSE2 */
29383 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29384
29385 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
29386 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
29387 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
29388 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
29389 { OPTION_MASK_ISA_SSE2, CODE_FOR_floatv4siv4sf2, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
29390
29391 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
29392 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
29393 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
29394 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
29395 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
29396
29397 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
29398
29399 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
29400 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
29401 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
29402 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
29403
29404 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_fix_notruncv4sfv4si, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
29405 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
29406 { OPTION_MASK_ISA_SSE2, CODE_FOR_fix_truncv4sfv4si2, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
29407
29408 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29409 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29410 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29411 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29412 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29413 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29414 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29415 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29416
29417 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
29418 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
29419 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
29420 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
29421 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
29422 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29423 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
29424 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
29425 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
29426 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
29427 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
29428 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29429 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
29430 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
29431 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
29432 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29433 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
29434 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
29435 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
29436 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29437
29438 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29439 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29440 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29441 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29442
29443 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29444 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29445 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29446 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29447
29448 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29449
29450 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29451 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29452 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29453
29454 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
29455
29456 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29457 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29458 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29459 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29460 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29461 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29462 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29463 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29464
29465 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29466 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29467 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29468 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29469 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29470 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29471 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29472 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29473
29474 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29475 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
29476
29477 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29478 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29479 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29480 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29481
29482 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29483 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29484
29485 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29486 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29487 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29488 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29489 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29490 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29491
29492 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29493 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29494 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29495 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29496
29497 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29498 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29499 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29500 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29501 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29502 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29503 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29504 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29505
29506 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
29507 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
29508 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
29509
29510 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29511 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
29512
29513 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
29514 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_widen_umult_even_v4si, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
29515
29516 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
29517
29518 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
29519 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
29520 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
29521 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
29522
29523 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
29524 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
29525 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
29526 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
29527 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
29528 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
29529 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
29530
29531 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
29532 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
29533 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
29534 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
29535 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
29536 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
29537 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
29538
29539 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
29540 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
29541 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
29542 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
29543
29544 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
29545 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
29546 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
29547
29548 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
29549
29550 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
29551
29552 /* SSE2 MMX */
29553 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
29554 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
29555
29556 /* SSE3 */
29557 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
29558 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29559
29560 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29561 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29562 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29563 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29564 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29565 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29566
29567 /* SSSE3 */
29568 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
29569 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
29570 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
29571 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
29572 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
29573 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
29574
29575 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29576 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29577 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29578 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29579 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29580 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29581 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29582 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29583 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29584 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29585 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29586 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29587 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
29588 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
29589 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29590 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29591 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29592 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29593 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29594 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29595 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29596 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29597 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29598 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29599
29600 /* SSSE3. */
29601 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
29602 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
29603
29604 /* SSE4.1 */
29605 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29606 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29607 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
29608 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
29609 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29610 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29611 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29612 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
29613 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
29614 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
29615
29616 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
29617 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
29618 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
29619 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
29620 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
29621 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
29622 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
29623 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
29624 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
29625 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
29626 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
29627 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
29628 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
29629
29630 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
29631 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29632 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29633 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29634 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29635 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29636 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29637 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29638 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29639 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29640 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
29641 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29642
29643 /* SSE4.1 */
29644 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
29645 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
29646 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29647 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29648
29649 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
29650 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
29651 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
29652 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
29653
29654 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
29655 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
29656
29657 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF },
29658 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
29659
29660 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
29661 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
29662 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
29663 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
29664
29665 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V4SF_ROUND },
29666 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V4SF_ROUND },
29667
29668 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29669 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2_sfix, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX, UNKNOWN, (int) V4SI_FTYPE_V4SF },
29670
29671 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
29672 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
29673 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
29674
29675 /* SSE4.2 */
29676 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29677 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
29678 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
29679 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29680 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29681
29682 /* SSE4A */
29683 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
29684 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
29685 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
29686 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29687
29688 /* AES */
29689 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
29690 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
29691
29692 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29693 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29694 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29695 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29696
29697 /* PCLMUL */
29698 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
29699
29700 /* AVX */
29701 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29702 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29703 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29704 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29705 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29706 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29707 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29708 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29709 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29710 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29711 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29712 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29713 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29714 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29715 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29716 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29717 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29718 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29719 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29720 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29721 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29722 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29723 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29724 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29725 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29726 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29727
29728 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
29729 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
29730 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
29731 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
29732
29733 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29734 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29735 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
29736 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
29737 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29738 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29739 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29740 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29741 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29742 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29743 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29744 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29745 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29746 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
29747 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
29748 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
29749 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv4siv4df2, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
29750 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv8siv8sf2, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
29751 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
29752 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_fix_notruncv8sfv8si, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
29753 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
29754 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv4dfv4si2, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
29755 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
29756 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv8sfv8si2, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
29757 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29758 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29759 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
29760 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
29761 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
29762 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
29763 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
29764 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
29765 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
29766 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
29767
29768 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29769 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29770 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
29771
29772 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
29773 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29774 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29775 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29776 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29777
29778 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29779
29780 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
29781 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
29782
29783 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
29784 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
29785 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
29786 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
29787
29788 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
29789 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
29790
29791 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
29792 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
29793
29794 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
29795 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
29796 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
29797 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
29798
29799 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V8SF_ROUND },
29800 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V8SF_ROUND },
29801
29802 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29803 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2_sfix, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
29804
29805 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29806 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29807 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29808 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29809
29810 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
29811 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
29812 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
29813 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
29814 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
29815 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
29816
29817 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
29818 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
29819 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
29820 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
29821 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
29822 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
29823 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
29824 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
29825 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
29826 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
29827 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
29828 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
29829 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
29830 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
29831 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
29832
29833 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
29834 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
29835
29836 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29837 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29838
29839 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_pack_sfix_v4df, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
29840
29841 /* AVX2 */
29842 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
29843 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
29844 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv16hi2, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI },
29845 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv8si2, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI },
29846 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packssdw, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
29847 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packsswb, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
29848 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packusdw, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
29849 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packuswb, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
29850 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv32qi3, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29851 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv16hi3, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29852 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv8si3, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29853 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv4di3, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29854 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv32qi3, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29855 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv16hi3, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29856 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv32qi3, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29857 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv16hi3, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29858 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv2ti, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT },
29859 { OPTION_MASK_ISA_AVX2, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29860 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29861 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29862 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29863 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI },
29864 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT },
29865 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv32qi3, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29866 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv16hi3, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29867 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv8si3, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29868 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv4di3, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29869 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv32qi3, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29870 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv16hi3, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29871 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv8si3, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29872 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv4di3, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29873 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddwv16hi3, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29874 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phadddv8si3, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29875 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddswv16hi3, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29876 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubwv16hi3, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29877 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubdv8si3, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29878 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubswv16hi3, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29879 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddubsw256, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
29880 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddwd, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI },
29881 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv32qi3, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29882 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv16hi3, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29883 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv8si3 , "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29884 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv32qi3, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29885 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv16hi3, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29886 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv8si3 , "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29887 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv32qi3, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29888 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv16hi3, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29889 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv8si3 , "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29890 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv32qi3, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29891 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv16hi3, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29892 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv8si3 , "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29893 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmovmskb, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256, UNKNOWN, (int) INT_FTYPE_V32QI },
29894 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv16qiv16hi2, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
29895 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8qiv8si2 , "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
29896 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4qiv4di2 , "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
29897 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8hiv8si2 , "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
29898 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4hiv4di2 , "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
29899 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4siv4di2 , "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
29900 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv16qiv16hi2, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
29901 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8qiv8si2 , "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
29902 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4qiv4di2 , "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
29903 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8hiv8si2 , "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
29904 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2 , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
29905 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2 , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
29906 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_smult_even_v8si, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
29907 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29908 { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29909 { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29910 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29911 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256 , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29912 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_umult_even_v8si, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
29913 { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29914 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
29915 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29916 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufdv3, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT },
29917 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufhwv3, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
29918 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshuflwv3, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
29919 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv32qi3, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29920 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv16hi3, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29921 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv8si3 , "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29922 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlv2ti3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
29923 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
29924 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
29925 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
29926 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
29927 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
29928 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
29929 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
29930 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
29931 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
29932 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
29933 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrv2ti3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
29934 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
29935 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
29936 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
29937 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
29938 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
29939 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
29940 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv32qi3, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29941 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv16hi3, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29942 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv8si3, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29943 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv4di3, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29944 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv32qi3, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29945 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv16hi3, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29946 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv32qi3, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29947 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv16hi3, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29948 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv32qi, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29949 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv16hi, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29950 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv8si, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29951 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv4di, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29952 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv32qi, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29953 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv16hi, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29954 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv8si, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29955 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv4di, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29956 { OPTION_MASK_ISA_AVX2, CODE_FOR_xorv4di3, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29957 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29958 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF },
29959 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF },
29960 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vbroadcasti128_v4di, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
29961 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv4si, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
29962 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv8si, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
29963 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv32qi, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256, UNKNOWN, (int) V32QI_FTYPE_V16QI },
29964 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16hi, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256, UNKNOWN, (int) V16HI_FTYPE_V8HI },
29965 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8si, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256, UNKNOWN, (int) V8SI_FTYPE_V4SI },
29966 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4di, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
29967 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16qi, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
29968 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8hi, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
29969 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4si, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
29970 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
29971 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29972 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
29973 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
29974 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
29975 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
29976 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_extracti128, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
29977 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_inserti128, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
29978 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29979 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29980 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29981 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4si, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29982 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv8si, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29983 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv4si, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29984 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4di, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29985 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv2di, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29986 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv8si, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29987 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4si, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29988
29989 { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
29990
29991 /* BMI */
29992 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29993 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29994 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
29995
29996 /* TBM */
29997 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29998 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29999
30000 /* F16C */
30001 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
30002 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
30003 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
30004 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
30005
30006 /* BMI2 */
30007 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
30008 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
30009 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
30010 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
30011 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
30012 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
30013
30014 /* AVX512F */
30015 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv16si_mask, "__builtin_ia32_alignd512_mask", IX86_BUILTIN_ALIGND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
30016 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv8di_mask, "__builtin_ia32_alignq512_mask", IX86_BUILTIN_ALIGNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
30017 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16si, "__builtin_ia32_blendmd_512_mask", IX86_BUILTIN_BLENDMD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30018 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8df, "__builtin_ia32_blendmpd_512_mask", IX86_BUILTIN_BLENDMPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30019 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16sf, "__builtin_ia32_blendmps_512_mask", IX86_BUILTIN_BLENDMPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30020 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8di, "__builtin_ia32_blendmq_512_mask", IX86_BUILTIN_BLENDMQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30021 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x4_512", IX86_BUILTIN_BROADCASTF32X4_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
30022 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8df_mask, "__builtin_ia32_broadcastf64x4_512", IX86_BUILTIN_BROADCASTF64X4_512, UNKNOWN, (int) V8DF_FTYPE_V4DF_V8DF_QI },
30023 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16si_mask, "__builtin_ia32_broadcasti32x4_512", IX86_BUILTIN_BROADCASTI32X4_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
30024 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8di_mask, "__builtin_ia32_broadcasti64x4_512", IX86_BUILTIN_BROADCASTI64X4_512, UNKNOWN, (int) V8DI_FTYPE_V4DI_V8DI_QI },
30025 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8df_mask, "__builtin_ia32_broadcastsd512", IX86_BUILTIN_BROADCASTSD512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
30026 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16sf_mask, "__builtin_ia32_broadcastss512", IX86_BUILTIN_BROADCASTSS512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
30027 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16si3_mask, "__builtin_ia32_cmpd512_mask", IX86_BUILTIN_CMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
30028 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8di3_mask, "__builtin_ia32_cmpq512_mask", IX86_BUILTIN_CMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
30029 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8df_mask, "__builtin_ia32_compressdf512_mask", IX86_BUILTIN_COMPRESSPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30030 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16sf_mask, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30031 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
30032 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtps2ph512_mask, "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_HI },
30033 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv8siv8df_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
30034 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2sd32, "__builtin_ia32_cvtusi2sd32", IX86_BUILTIN_CVTUSI2SD32, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT },
30035 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30036 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30037 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandsf512_mask", IX86_BUILTIN_EXPANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30038 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandsf512_maskz", IX86_BUILTIN_EXPANDPS512Z, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30039 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf32x4_mask, "__builtin_ia32_extractf32x4_mask", IX86_BUILTIN_EXTRACTF32X4, UNKNOWN, (int) V4SF_FTYPE_V16SF_INT_V4SF_QI },
30040 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf64x4_mask, "__builtin_ia32_extractf64x4_mask", IX86_BUILTIN_EXTRACTF64X4, UNKNOWN, (int) V4DF_FTYPE_V8DF_INT_V4DF_QI },
30041 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti32x4_mask, "__builtin_ia32_extracti32x4_mask", IX86_BUILTIN_EXTRACTI32X4, UNKNOWN, (int) V4SI_FTYPE_V16SI_INT_V4SI_QI },
30042 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti64x4_mask, "__builtin_ia32_extracti64x4_mask", IX86_BUILTIN_EXTRACTI64X4, UNKNOWN, (int) V4DI_FTYPE_V8DI_INT_V4DI_QI },
30043 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf32x4_mask, "__builtin_ia32_insertf32x4_mask", IX86_BUILTIN_INSERTF32X4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI },
30044 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf64x4_mask, "__builtin_ia32_insertf64x4_mask", IX86_BUILTIN_INSERTF64X4, UNKNOWN, (int) V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI },
30045 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti32x4_mask, "__builtin_ia32_inserti32x4_mask", IX86_BUILTIN_INSERTI32X4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI },
30046 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti64x4_mask, "__builtin_ia32_inserti64x4_mask", IX86_BUILTIN_INSERTI64X4, UNKNOWN, (int) V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI },
30047 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_movapd512_mask", IX86_BUILTIN_MOVAPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30048 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_movaps512_mask", IX86_BUILTIN_MOVAPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30049 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movddup512_mask, "__builtin_ia32_movddup512_mask", IX86_BUILTIN_MOVDDUP512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30050 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32_512_mask", IX86_BUILTIN_MOVDQA32_512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30051 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64_512_mask", IX86_BUILTIN_MOVDQA64_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30052 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movshdup512_mask, "__builtin_ia32_movshdup512_mask", IX86_BUILTIN_MOVSHDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30053 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movsldup512_mask, "__builtin_ia32_movsldup512_mask", IX86_BUILTIN_MOVSLDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30054 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv16si2_mask, "__builtin_ia32_pabsd512_mask", IX86_BUILTIN_PABSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30055 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv8di2_mask, "__builtin_ia32_pabsq512_mask", IX86_BUILTIN_PABSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30056 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16si3_mask, "__builtin_ia32_paddd512_mask", IX86_BUILTIN_PADDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30057 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8di3_mask, "__builtin_ia32_paddq512_mask", IX86_BUILTIN_PADDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30058 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv16si3_mask, "__builtin_ia32_pandd512_mask", IX86_BUILTIN_PANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30059 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv16si3_mask, "__builtin_ia32_pandnd512_mask", IX86_BUILTIN_PANDND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30060 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv8di3_mask, "__builtin_ia32_pandnq512_mask", IX86_BUILTIN_PANDNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30061 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv8di3_mask, "__builtin_ia32_pandq512_mask", IX86_BUILTIN_PANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30062 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16si_mask, "__builtin_ia32_pbroadcastd512", IX86_BUILTIN_PBROADCASTD512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
30063 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv16si_mask, "__builtin_ia32_pbroadcastd512_gpr_mask", IX86_BUILTIN_PBROADCASTD512_GPR, UNKNOWN, (int) V16SI_FTYPE_SI_V16SI_HI },
30064 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_QI },
30065 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_HI },
30066 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
30067 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
30068 { OPTION_MASK_ISA_AVX512F & ~OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_memv8di_mask, "__builtin_ia32_pbroadcastq512_mem_mask", IX86_BUILTIN_PBROADCASTQ512_MEM, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
30069 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30070 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30071 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30072 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv8di3_mask, "__builtin_ia32_pcmpgtq512_mask", IX86_BUILTIN_PCMPGTQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30073 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16si_mask, "__builtin_ia32_compresssi512_mask", IX86_BUILTIN_PCOMPRESSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30074 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8di_mask, "__builtin_ia32_compressdi512_mask", IX86_BUILTIN_PCOMPRESSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30075 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandsi512_mask", IX86_BUILTIN_PEXPANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30076 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandsi512_maskz", IX86_BUILTIN_PEXPANDD512Z, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30077 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expanddi512_mask", IX86_BUILTIN_PEXPANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30078 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expanddi512_maskz", IX86_BUILTIN_PEXPANDQ512Z, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30079 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16si3_mask, "__builtin_ia32_pmaxsd512_mask", IX86_BUILTIN_PMAXSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30080 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8di3_mask, "__builtin_ia32_pmaxsq512_mask", IX86_BUILTIN_PMAXSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30081 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv16si3_mask, "__builtin_ia32_pmaxud512_mask", IX86_BUILTIN_PMAXUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30082 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv8di3_mask, "__builtin_ia32_pmaxuq512_mask", IX86_BUILTIN_PMAXUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30083 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16si3_mask, "__builtin_ia32_pminsd512_mask", IX86_BUILTIN_PMINSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30084 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8di3_mask, "__builtin_ia32_pminsq512_mask", IX86_BUILTIN_PMINSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30085 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv16si3_mask, "__builtin_ia32_pminud512_mask", IX86_BUILTIN_PMINUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30086 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv8di3_mask, "__builtin_ia32_pminuq512_mask", IX86_BUILTIN_PMINUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30087 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask, "__builtin_ia32_pmovdb512_mask", IX86_BUILTIN_PMOVDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
30088 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask, "__builtin_ia32_pmovdw512_mask", IX86_BUILTIN_PMOVDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
30089 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask, "__builtin_ia32_pmovqb512_mask", IX86_BUILTIN_PMOVQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
30090 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask, "__builtin_ia32_pmovqd512_mask", IX86_BUILTIN_PMOVQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
30091 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask, "__builtin_ia32_pmovqw512_mask", IX86_BUILTIN_PMOVQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
30092 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask, "__builtin_ia32_pmovsdb512_mask", IX86_BUILTIN_PMOVSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
30093 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask, "__builtin_ia32_pmovsdw512_mask", IX86_BUILTIN_PMOVSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
30094 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask, "__builtin_ia32_pmovsqb512_mask", IX86_BUILTIN_PMOVSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
30095 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask, "__builtin_ia32_pmovsqd512_mask", IX86_BUILTIN_PMOVSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
30096 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask, "__builtin_ia32_pmovsqw512_mask", IX86_BUILTIN_PMOVSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
30097 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16qiv16si2_mask, "__builtin_ia32_pmovsxbd512_mask", IX86_BUILTIN_PMOVSXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
30098 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8qiv8di2_mask, "__builtin_ia32_pmovsxbq512_mask", IX86_BUILTIN_PMOVSXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
30099 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8siv8di2_mask, "__builtin_ia32_pmovsxdq512_mask", IX86_BUILTIN_PMOVSXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
30100 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16hiv16si2_mask, "__builtin_ia32_pmovsxwd512_mask", IX86_BUILTIN_PMOVSXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
30101 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8hiv8di2_mask, "__builtin_ia32_pmovsxwq512_mask", IX86_BUILTIN_PMOVSXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
30102 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask, "__builtin_ia32_pmovusdb512_mask", IX86_BUILTIN_PMOVUSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
30103 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask, "__builtin_ia32_pmovusdw512_mask", IX86_BUILTIN_PMOVUSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
30104 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask, "__builtin_ia32_pmovusqb512_mask", IX86_BUILTIN_PMOVUSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
30105 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask, "__builtin_ia32_pmovusqd512_mask", IX86_BUILTIN_PMOVUSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
30106 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask, "__builtin_ia32_pmovusqw512_mask", IX86_BUILTIN_PMOVUSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
30107 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16qiv16si2_mask, "__builtin_ia32_pmovzxbd512_mask", IX86_BUILTIN_PMOVZXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
30108 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8qiv8di2_mask, "__builtin_ia32_pmovzxbq512_mask", IX86_BUILTIN_PMOVZXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
30109 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8siv8di2_mask, "__builtin_ia32_pmovzxdq512_mask", IX86_BUILTIN_PMOVZXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
30110 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16hiv16si2_mask, "__builtin_ia32_pmovzxwd512_mask", IX86_BUILTIN_PMOVZXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
30111 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8hiv8di2_mask, "__builtin_ia32_pmovzxwq512_mask", IX86_BUILTIN_PMOVZXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
30112 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_smult_even_v16si_mask, "__builtin_ia32_pmuldq512_mask", IX86_BUILTIN_PMULDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
30113 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16si3_mask, "__builtin_ia32_pmulld512_mask" , IX86_BUILTIN_PMULLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30114 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_umult_even_v16si_mask, "__builtin_ia32_pmuludq512_mask", IX86_BUILTIN_PMULUDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
30115 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv16si3_mask, "__builtin_ia32_pord512_mask", IX86_BUILTIN_PORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30116 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv8di3_mask, "__builtin_ia32_porq512_mask", IX86_BUILTIN_PORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30117 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv16si_mask, "__builtin_ia32_prold512_mask", IX86_BUILTIN_PROLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30118 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv8di_mask, "__builtin_ia32_prolq512_mask", IX86_BUILTIN_PROLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30119 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv16si_mask, "__builtin_ia32_prolvd512_mask", IX86_BUILTIN_PROLVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30120 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv8di_mask, "__builtin_ia32_prolvq512_mask", IX86_BUILTIN_PROLVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30121 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv16si_mask, "__builtin_ia32_prord512_mask", IX86_BUILTIN_PRORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30122 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv8di_mask, "__builtin_ia32_prorq512_mask", IX86_BUILTIN_PRORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30123 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv16si_mask, "__builtin_ia32_prorvd512_mask", IX86_BUILTIN_PRORVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30124 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv8di_mask, "__builtin_ia32_prorvq512_mask", IX86_BUILTIN_PRORVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30125 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pshufdv3_mask, "__builtin_ia32_pshufd512_mask", IX86_BUILTIN_PSHUFD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30126 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslld512_mask", IX86_BUILTIN_PSLLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
30127 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslldi512_mask", IX86_BUILTIN_PSLLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30128 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllq512_mask", IX86_BUILTIN_PSLLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
30129 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllqi512_mask", IX86_BUILTIN_PSLLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30130 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv16si_mask, "__builtin_ia32_psllv16si_mask", IX86_BUILTIN_PSLLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30131 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv8di_mask, "__builtin_ia32_psllv8di_mask", IX86_BUILTIN_PSLLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30132 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psrad512_mask", IX86_BUILTIN_PSRAD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
30133 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psradi512_mask", IX86_BUILTIN_PSRADI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30134 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraq512_mask", IX86_BUILTIN_PSRAQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
30135 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraqi512_mask", IX86_BUILTIN_PSRAQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30136 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv16si_mask, "__builtin_ia32_psrav16si_mask", IX86_BUILTIN_PSRAVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30137 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv8di_mask, "__builtin_ia32_psrav8di_mask", IX86_BUILTIN_PSRAVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30138 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrld512_mask", IX86_BUILTIN_PSRLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
30139 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrldi512_mask", IX86_BUILTIN_PSRLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30140 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlq512_mask", IX86_BUILTIN_PSRLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
30141 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlqi512_mask", IX86_BUILTIN_PSRLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30142 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv16si_mask, "__builtin_ia32_psrlv16si_mask", IX86_BUILTIN_PSRLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30143 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv8di_mask, "__builtin_ia32_psrlv8di_mask", IX86_BUILTIN_PSRLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30144 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16si3_mask, "__builtin_ia32_psubd512_mask", IX86_BUILTIN_PSUBD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30145 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8di3_mask, "__builtin_ia32_psubq512_mask", IX86_BUILTIN_PSUBQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30146 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv16si3_mask, "__builtin_ia32_ptestmd512", IX86_BUILTIN_PTESTMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30147 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv8di3_mask, "__builtin_ia32_ptestmq512", IX86_BUILTIN_PTESTMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30148 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv16si3_mask, "__builtin_ia32_ptestnmd512", IX86_BUILTIN_PTESTNMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30149 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv8di3_mask, "__builtin_ia32_ptestnmq512", IX86_BUILTIN_PTESTNMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30150 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv16si_mask, "__builtin_ia32_punpckhdq512_mask", IX86_BUILTIN_PUNPCKHDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30151 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv8di_mask, "__builtin_ia32_punpckhqdq512_mask", IX86_BUILTIN_PUNPCKHQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30152 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv16si_mask, "__builtin_ia32_punpckldq512_mask", IX86_BUILTIN_PUNPCKLDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30153 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv8di_mask, "__builtin_ia32_punpcklqdq512_mask", IX86_BUILTIN_PUNPCKLQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30154 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv16si3_mask, "__builtin_ia32_pxord512_mask", IX86_BUILTIN_PXORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30155 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv8di3_mask, "__builtin_ia32_pxorq512_mask", IX86_BUILTIN_PXORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30156 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v8df_mask, "__builtin_ia32_rcp14pd512_mask", IX86_BUILTIN_RCP14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30157 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v16sf_mask, "__builtin_ia32_rcp14ps512_mask", IX86_BUILTIN_RCP14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30158 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v2df, "__builtin_ia32_rcp14sd", IX86_BUILTIN_RCP14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30159 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v4sf, "__builtin_ia32_rcp14ss", IX86_BUILTIN_RCP14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30160 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30161 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30162 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v2df, "__builtin_ia32_rsqrt14sd", IX86_BUILTIN_RSQRT14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30163 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v4sf, "__builtin_ia32_rsqrt14ss", IX86_BUILTIN_RSQRT14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30164 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufpd512_mask, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
30165 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufps512_mask, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
30166 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
30167 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_mask", IX86_BUILTIN_SHUF_F64x2, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
30168 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_mask", IX86_BUILTIN_SHUF_I32x4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
30169 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_mask", IX86_BUILTIN_SHUF_I64x2, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
30170 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv16si3_mask, "__builtin_ia32_ucmpd512_mask", IX86_BUILTIN_UCMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
30171 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv8di3_mask, "__builtin_ia32_ucmpq512_mask", IX86_BUILTIN_UCMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
30172 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhpd512_mask, "__builtin_ia32_unpckhpd512_mask", IX86_BUILTIN_UNPCKHPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
30173 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhps512_mask, "__builtin_ia32_unpckhps512_mask", IX86_BUILTIN_UNPCKHPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
30174 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklpd512_mask, "__builtin_ia32_unpcklpd512_mask", IX86_BUILTIN_UNPCKLPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
30175 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklps512_mask, "__builtin_ia32_unpcklps512_mask", IX86_BUILTIN_UNPCKLPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
30176 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv16si2_mask, "__builtin_ia32_vplzcntd_512_mask", IX86_BUILTIN_VPCLZCNTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30177 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv8di2_mask, "__builtin_ia32_vplzcntq_512_mask", IX86_BUILTIN_VPCLZCNTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30178 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv16si_mask, "__builtin_ia32_vpconflictsi_512_mask", IX86_BUILTIN_VPCONFLICTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30179 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv8di_mask, "__builtin_ia32_vpconflictdi_512_mask", IX86_BUILTIN_VPCONFLICTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30180 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8df_mask, "__builtin_ia32_permdf512_mask", IX86_BUILTIN_VPERMDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
30181 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8di_mask, "__builtin_ia32_permdi512_mask", IX86_BUILTIN_VPERMDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30182 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16si3_mask, "__builtin_ia32_vpermi2vard512_mask", IX86_BUILTIN_VPERMI2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30183 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8df3_mask, "__builtin_ia32_vpermi2varpd512_mask", IX86_BUILTIN_VPERMI2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
30184 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16sf3_mask, "__builtin_ia32_vpermi2varps512_mask", IX86_BUILTIN_VPERMI2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
30185 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8di3_mask, "__builtin_ia32_vpermi2varq512_mask", IX86_BUILTIN_VPERMI2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30186 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv8df_mask, "__builtin_ia32_vpermilpd512_mask", IX86_BUILTIN_VPERMILPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
30187 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv16sf_mask, "__builtin_ia32_vpermilps512_mask", IX86_BUILTIN_VPERMILPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
30188 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv8df3_mask, "__builtin_ia32_vpermilvarpd512_mask", IX86_BUILTIN_VPERMILVARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
30189 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv16sf3_mask, "__builtin_ia32_vpermilvarps512_mask", IX86_BUILTIN_VPERMILVARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
30190 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_mask, "__builtin_ia32_vpermt2vard512_mask", IX86_BUILTIN_VPERMT2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30191 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_maskz, "__builtin_ia32_vpermt2vard512_maskz", IX86_BUILTIN_VPERMT2VARD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30192 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_mask, "__builtin_ia32_vpermt2varpd512_mask", IX86_BUILTIN_VPERMT2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
30193 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_maskz, "__builtin_ia32_vpermt2varpd512_maskz", IX86_BUILTIN_VPERMT2VARPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
30194 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_mask, "__builtin_ia32_vpermt2varps512_mask", IX86_BUILTIN_VPERMT2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
30195 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_maskz, "__builtin_ia32_vpermt2varps512_maskz", IX86_BUILTIN_VPERMT2VARPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
30196 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_mask, "__builtin_ia32_vpermt2varq512_mask", IX86_BUILTIN_VPERMT2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30197 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_maskz, "__builtin_ia32_vpermt2varq512_maskz", IX86_BUILTIN_VPERMT2VARQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30198 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8df_mask, "__builtin_ia32_permvardf512_mask", IX86_BUILTIN_VPERMVARDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
30199 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8di_mask, "__builtin_ia32_permvardi512_mask", IX86_BUILTIN_VPERMVARDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30200 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16sf_mask, "__builtin_ia32_permvarsf512_mask", IX86_BUILTIN_VPERMVARSF512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
30201 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16si_mask, "__builtin_ia32_permvarsi512_mask", IX86_BUILTIN_VPERMVARSI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30202 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_mask, "__builtin_ia32_pternlogd512_mask", IX86_BUILTIN_VTERNLOGD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
30203 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_maskz, "__builtin_ia32_pternlogd512_maskz", IX86_BUILTIN_VTERNLOGD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
30204 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_mask, "__builtin_ia32_pternlogq512_mask", IX86_BUILTIN_VTERNLOGQ512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
30205 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_maskz, "__builtin_ia32_pternlogq512_maskz", IX86_BUILTIN_VTERNLOGQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
30206
30207 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv16sf3, "__builtin_ia32_copysignps512", IX86_BUILTIN_CPYSGNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF },
30208 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv8df3, "__builtin_ia32_copysignpd512", IX86_BUILTIN_CPYSGNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF },
30209 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF },
30210 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sqrtv16sf2, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512, UNKNOWN, (int) V16SF_FTYPE_V16SF },
30211 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf, "__builtin_ia32_exp2ps", IX86_BUILTIN_EXP2PS, UNKNOWN, (int) V16SF_FTYPE_V16SF },
30212 { OPTION_MASK_ISA_AVX512F, CODE_FOR_roundv8df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix512", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF },
30213 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_floorpd_vec_pack_sfix512", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
30214 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
30215
30216 /* Mask arithmetic operations */
30217 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andhi3, "__builtin_ia32_kandhi", IX86_BUILTIN_KAND16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30218 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kandnhi, "__builtin_ia32_kandnhi", IX86_BUILTIN_KANDN16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30219 { OPTION_MASK_ISA_AVX512F, CODE_FOR_one_cmplhi2, "__builtin_ia32_knothi", IX86_BUILTIN_KNOT16, UNKNOWN, (int) HI_FTYPE_HI },
30220 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorhi3, "__builtin_ia32_korhi", IX86_BUILTIN_KOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30221 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestchi, "__builtin_ia32_kortestchi", IX86_BUILTIN_KORTESTC16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30222 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestzhi, "__builtin_ia32_kortestzhi", IX86_BUILTIN_KORTESTZ16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30223 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kunpckhi, "__builtin_ia32_kunpckhi", IX86_BUILTIN_KUNPCKBW, UNKNOWN, (int) HI_FTYPE_HI_HI },
30224 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kxnorhi, "__builtin_ia32_kxnorhi", IX86_BUILTIN_KXNOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30225 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorhi3, "__builtin_ia32_kxorhi", IX86_BUILTIN_KXOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30226 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kmovw, "__builtin_ia32_kmov16", IX86_BUILTIN_KMOV16, UNKNOWN, (int) HI_FTYPE_HI },
30227
30228 /* SHA */
30229 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg1, 0, IX86_BUILTIN_SHA1MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30230 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg2, 0, IX86_BUILTIN_SHA1MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30231 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1nexte, 0, IX86_BUILTIN_SHA1NEXTE, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30232 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1rnds4, 0, IX86_BUILTIN_SHA1RNDS4, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
30233 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg1, 0, IX86_BUILTIN_SHA256MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30234 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg2, 0, IX86_BUILTIN_SHA256MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30235 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256rnds2, 0, IX86_BUILTIN_SHA256RNDS2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
30236 };
30237
30238 /* Builtins with rounding support. */
30239 static const struct builtin_description bdesc_round_args[] =
30240 {
30241 /* AVX512F */
30242 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8df3_mask_round, "__builtin_ia32_addpd512_mask", IX86_BUILTIN_ADDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30243 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16sf3_mask_round, "__builtin_ia32_addps512_mask", IX86_BUILTIN_ADDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30244 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmaddv2df3_round, "__builtin_ia32_addsd_round", IX86_BUILTIN_ADDSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30245 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmaddv4sf3_round, "__builtin_ia32_addss_round", IX86_BUILTIN_ADDSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30246 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8df3_mask_round, "__builtin_ia32_cmppd512_mask", IX86_BUILTIN_CMPPD512, UNKNOWN, (int) QI_FTYPE_V8DF_V8DF_INT_QI_INT },
30247 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16sf3_mask_round, "__builtin_ia32_cmpps512_mask", IX86_BUILTIN_CMPPS512, UNKNOWN, (int) HI_FTYPE_V16SF_V16SF_INT_HI_INT },
30248 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv2df3_mask_round, "__builtin_ia32_cmpsd_mask", IX86_BUILTIN_CMPSD_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI_INT },
30249 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv4sf3_mask_round, "__builtin_ia32_cmpss_mask", IX86_BUILTIN_CMPSS_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI_INT },
30250 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_comi_round, "__builtin_ia32_vcomisd", IX86_BUILTIN_COMIDF, UNKNOWN, (int) INT_FTYPE_V2DF_V2DF_INT_INT },
30251 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_comi_round, "__builtin_ia32_vcomiss", IX86_BUILTIN_COMISF, UNKNOWN, (int) INT_FTYPE_V4SF_V4SF_INT_INT },
30252 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv16siv16sf2_mask_round, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
30253 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2dq512_mask_round, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30254 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2ps512_mask_round, "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT },
30255 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv8dfv8si_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30256 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtph2ps512_mask_round, "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT },
30257 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30258 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT },
30259 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30260 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2ss_round, "__builtin_ia32_cvtsd2ss_round", IX86_BUILTIN_CVTSD2SS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_INT },
30261 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq_round, "__builtin_ia32_cvtsi2sd64", IX86_BUILTIN_CVTSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT64_INT },
30262 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtsi2ss_round, "__builtin_ia32_cvtsi2ss32", IX86_BUILTIN_CVTSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_INT },
30263 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq_round, "__builtin_ia32_cvtsi2ss64", IX86_BUILTIN_CVTSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT64_INT },
30264 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtss2sd_round, "__builtin_ia32_cvtss2sd_round", IX86_BUILTIN_CVTSS2SD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_INT },
30265 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30266 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30267 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30268 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30269 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
30270 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2sd64_round, "__builtin_ia32_cvtusi2sd64", IX86_BUILTIN_CVTUSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT64_INT },
30271 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2ss32_round, "__builtin_ia32_cvtusi2ss32", IX86_BUILTIN_CVTUSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT_INT },
30272 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2ss64_round, "__builtin_ia32_cvtusi2ss64", IX86_BUILTIN_CVTUSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT64_INT },
30273 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv8df3_mask_round, "__builtin_ia32_divpd512_mask", IX86_BUILTIN_DIVPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30274 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv16sf3_mask_round, "__builtin_ia32_divps512_mask", IX86_BUILTIN_DIVPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30275 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmdivv2df3_round, "__builtin_ia32_divsd_round", IX86_BUILTIN_DIVSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30276 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmdivv4sf3_round, "__builtin_ia32_divss_round", IX86_BUILTIN_DIVSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30277 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_mask_round, "__builtin_ia32_fixupimmpd512_mask", IX86_BUILTIN_FIXUPIMMPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
30278 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_maskz_round, "__builtin_ia32_fixupimmpd512_maskz", IX86_BUILTIN_FIXUPIMMPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
30279 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_mask_round, "__builtin_ia32_fixupimmps512_mask", IX86_BUILTIN_FIXUPIMMPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
30280 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_maskz_round, "__builtin_ia32_fixupimmps512_maskz", IX86_BUILTIN_FIXUPIMMPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
30281 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_mask_round, "__builtin_ia32_fixupimmsd_mask", IX86_BUILTIN_FIXUPIMMSD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
30282 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_maskz_round, "__builtin_ia32_fixupimmsd_maskz", IX86_BUILTIN_FIXUPIMMSD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
30283 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_mask_round, "__builtin_ia32_fixupimmss_mask", IX86_BUILTIN_FIXUPIMMSS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
30284 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_maskz_round, "__builtin_ia32_fixupimmss_maskz", IX86_BUILTIN_FIXUPIMMSS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
30285 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv8df_mask_round, "__builtin_ia32_getexppd512_mask", IX86_BUILTIN_GETEXPPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30286 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv16sf_mask_round, "__builtin_ia32_getexpps512_mask", IX86_BUILTIN_GETEXPPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30287 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv2df_round, "__builtin_ia32_getexpsd128_round", IX86_BUILTIN_GETEXPSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30288 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv4sf_round, "__builtin_ia32_getexpss128_round", IX86_BUILTIN_GETEXPSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30289 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv8df_mask_round, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
30290 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv16sf_mask_round, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
30291 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv2df_round, "__builtin_ia32_getmantsd_round", IX86_BUILTIN_GETMANTSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
30292 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv4sf_round, "__builtin_ia32_getmantss_round", IX86_BUILTIN_GETMANTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
30293 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30294 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30295 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsmaxv2df3_round, "__builtin_ia32_maxsd_round", IX86_BUILTIN_MAXSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30296 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsmaxv4sf3_round, "__builtin_ia32_maxss_round", IX86_BUILTIN_MAXSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30297 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8df3_mask_round, "__builtin_ia32_minpd512_mask", IX86_BUILTIN_MINPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30298 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16sf3_mask_round, "__builtin_ia32_minps512_mask", IX86_BUILTIN_MINPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30299 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsminv2df3_round, "__builtin_ia32_minsd_round", IX86_BUILTIN_MINSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30300 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsminv4sf3_round, "__builtin_ia32_minss_round", IX86_BUILTIN_MINSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30301 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv8df3_mask_round, "__builtin_ia32_mulpd512_mask", IX86_BUILTIN_MULPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30302 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16sf3_mask_round, "__builtin_ia32_mulps512_mask", IX86_BUILTIN_MULPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30303 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmmulv2df3_round, "__builtin_ia32_mulsd_round", IX86_BUILTIN_MULSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30304 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmmulv4sf3_round, "__builtin_ia32_mulss_round", IX86_BUILTIN_MULSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30305 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev8df_mask_round, "__builtin_ia32_rndscalepd_mask", IX86_BUILTIN_RNDSCALEPD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
30306 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev16sf_mask_round, "__builtin_ia32_rndscaleps_mask", IX86_BUILTIN_RNDSCALEPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
30307 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev2df_round, "__builtin_ia32_rndscalesd_round", IX86_BUILTIN_RNDSCALESD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
30308 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev4sf_round, "__builtin_ia32_rndscaless_round", IX86_BUILTIN_RNDSCALESS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
30309 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv8df_mask_round, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30310 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv16sf_mask_round, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30311 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv2df_round, "__builtin_ia32_scalefsd_round", IX86_BUILTIN_SCALEFSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30312 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv4sf_round, "__builtin_ia32_scalefss_round", IX86_BUILTIN_SCALEFSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30313 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2_mask_round, "__builtin_ia32_sqrtpd512_mask", IX86_BUILTIN_SQRTPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30314 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv16sf2_mask_round, "__builtin_ia32_sqrtps512_mask", IX86_BUILTIN_SQRTPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30315 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsqrtv2df2_round, "__builtin_ia32_sqrtsd_round", IX86_BUILTIN_SQRTSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30316 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsqrtv4sf2_round, "__builtin_ia32_sqrtss_round", IX86_BUILTIN_SQRTSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30317 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8df3_mask_round, "__builtin_ia32_subpd512_mask", IX86_BUILTIN_SUBPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30318 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16sf3_mask_round, "__builtin_ia32_subps512_mask", IX86_BUILTIN_SUBPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30319 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsubv2df3_round, "__builtin_ia32_subsd_round", IX86_BUILTIN_SUBSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30320 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsubv4sf3_round, "__builtin_ia32_subss_round", IX86_BUILTIN_SUBSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30321 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2si_round, "__builtin_ia32_vcvtsd2si32", IX86_BUILTIN_VCVTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
30322 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq_round, "__builtin_ia32_vcvtsd2si64", IX86_BUILTIN_VCVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
30323 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtsd2usi_round, "__builtin_ia32_vcvtsd2usi32", IX86_BUILTIN_VCVTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
30324 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtsd2usiq_round, "__builtin_ia32_vcvtsd2usi64", IX86_BUILTIN_VCVTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
30325 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtss2si_round, "__builtin_ia32_vcvtss2si32", IX86_BUILTIN_VCVTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
30326 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq_round, "__builtin_ia32_vcvtss2si64", IX86_BUILTIN_VCVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
30327 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtss2usi_round, "__builtin_ia32_vcvtss2usi32", IX86_BUILTIN_VCVTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
30328 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtss2usiq_round, "__builtin_ia32_vcvtss2usi64", IX86_BUILTIN_VCVTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
30329 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvttsd2si_round, "__builtin_ia32_vcvttsd2si32", IX86_BUILTIN_VCVTTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
30330 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq_round, "__builtin_ia32_vcvttsd2si64", IX86_BUILTIN_VCVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
30331 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttsd2usi_round, "__builtin_ia32_vcvttsd2usi32", IX86_BUILTIN_VCVTTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
30332 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttsd2usiq_round, "__builtin_ia32_vcvttsd2usi64", IX86_BUILTIN_VCVTTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
30333 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvttss2si_round, "__builtin_ia32_vcvttss2si32", IX86_BUILTIN_VCVTTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
30334 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq_round, "__builtin_ia32_vcvttss2si64", IX86_BUILTIN_VCVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
30335 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttss2usi_round, "__builtin_ia32_vcvttss2usi32", IX86_BUILTIN_VCVTTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
30336 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttss2usiq_round, "__builtin_ia32_vcvttss2usi64", IX86_BUILTIN_VCVTTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
30337 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask_round, "__builtin_ia32_vfmaddpd512_mask", IX86_BUILTIN_VFMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30338 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask3_round, "__builtin_ia32_vfmaddpd512_mask3", IX86_BUILTIN_VFMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30339 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_maskz_round, "__builtin_ia32_vfmaddpd512_maskz", IX86_BUILTIN_VFMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30340 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask_round, "__builtin_ia32_vfmaddps512_mask", IX86_BUILTIN_VFMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30341 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask3_round, "__builtin_ia32_vfmaddps512_mask3", IX86_BUILTIN_VFMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30342 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_maskz_round, "__builtin_ia32_vfmaddps512_maskz", IX86_BUILTIN_VFMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30343 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v2df_round, "__builtin_ia32_vfmaddsd3_round", IX86_BUILTIN_VFMADDSD3_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_INT },
30344 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v4sf_round, "__builtin_ia32_vfmaddss3_round", IX86_BUILTIN_VFMADDSS3_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_INT },
30345 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask_round, "__builtin_ia32_vfmaddsubpd512_mask", IX86_BUILTIN_VFMADDSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30346 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask3_round, "__builtin_ia32_vfmaddsubpd512_mask3", IX86_BUILTIN_VFMADDSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30347 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_maskz_round, "__builtin_ia32_vfmaddsubpd512_maskz", IX86_BUILTIN_VFMADDSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30348 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask_round, "__builtin_ia32_vfmaddsubps512_mask", IX86_BUILTIN_VFMADDSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30349 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask3_round, "__builtin_ia32_vfmaddsubps512_mask3", IX86_BUILTIN_VFMADDSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30350 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_maskz_round, "__builtin_ia32_vfmaddsubps512_maskz", IX86_BUILTIN_VFMADDSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30351 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v8df_mask3_round, "__builtin_ia32_vfmsubaddpd512_mask3", IX86_BUILTIN_VFMSUBADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30352 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v16sf_mask3_round, "__builtin_ia32_vfmsubaddps512_mask3", IX86_BUILTIN_VFMSUBADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30353 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v8df_mask3_round, "__builtin_ia32_vfmsubpd512_mask3", IX86_BUILTIN_VFMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30354 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30355 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v8df_mask_round, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30356 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30357 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30358 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30359 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30360 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask3_round, "__builtin_ia32_vfnmsubps512_mask3", IX86_BUILTIN_VFNMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30361
30362 /* AVX512ER */
30363 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v8df_mask_round, "__builtin_ia32_exp2pd_mask", IX86_BUILTIN_EXP2PD_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30364 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf_mask_round, "__builtin_ia32_exp2ps_mask", IX86_BUILTIN_EXP2PS_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30365 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v8df_mask_round, "__builtin_ia32_rcp28pd_mask", IX86_BUILTIN_RCP28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30366 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v16sf_mask_round, "__builtin_ia32_rcp28ps_mask", IX86_BUILTIN_RCP28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30367 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v2df_round, "__builtin_ia32_rcp28sd_round", IX86_BUILTIN_RCP28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30368 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v4sf_round, "__builtin_ia32_rcp28ss_round", IX86_BUILTIN_RCP28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30369 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v8df_mask_round, "__builtin_ia32_rsqrt28pd_mask", IX86_BUILTIN_RSQRT28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30370 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v16sf_mask_round, "__builtin_ia32_rsqrt28ps_mask", IX86_BUILTIN_RSQRT28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30371 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v2df_round, "__builtin_ia32_rsqrt28sd_round", IX86_BUILTIN_RSQRT28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30372 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v4sf_round, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30373 };
30374
30375 /* FMA4 and XOP. */
30376 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
30377 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
30378 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
30379 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
30380 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
30381 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
30382 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
30383 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
30384 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
30385 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
30386 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
30387 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
30388 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
30389 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
30390 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
30391 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
30392 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
30393 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
30394 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
30395 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
30396 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
30397 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
30398 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
30399 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
30400 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
30401 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
30402 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
30403 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
30404 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
30405 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
30406 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
30407 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
30408 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
30409 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
30410 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
30411 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
30412 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
30413 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
30414 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
30415 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
30416 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
30417 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
30418 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
30419 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
30420 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
30421 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
30422 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
30423 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
30424 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
30425 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
30426 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
30427 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
30428
30429 static const struct builtin_description bdesc_multi_arg[] =
30430 {
30431 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
30432 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
30433 UNKNOWN, (int)MULTI_ARG_3_SF },
30434 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
30435 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
30436 UNKNOWN, (int)MULTI_ARG_3_DF },
30437
30438 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf,
30439 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3,
30440 UNKNOWN, (int)MULTI_ARG_3_SF },
30441 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df,
30442 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3,
30443 UNKNOWN, (int)MULTI_ARG_3_DF },
30444
30445 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
30446 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
30447 UNKNOWN, (int)MULTI_ARG_3_SF },
30448 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
30449 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
30450 UNKNOWN, (int)MULTI_ARG_3_DF },
30451 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
30452 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
30453 UNKNOWN, (int)MULTI_ARG_3_SF2 },
30454 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
30455 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
30456 UNKNOWN, (int)MULTI_ARG_3_DF2 },
30457
30458 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
30459 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
30460 UNKNOWN, (int)MULTI_ARG_3_SF },
30461 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
30462 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
30463 UNKNOWN, (int)MULTI_ARG_3_DF },
30464 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
30465 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
30466 UNKNOWN, (int)MULTI_ARG_3_SF2 },
30467 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
30468 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
30469 UNKNOWN, (int)MULTI_ARG_3_DF2 },
30470
30471 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
30472 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
30473 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
30474 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
30475 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
30476 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
30477 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
30478
30479 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
30480 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
30481 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
30482 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
30483 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
30484 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
30485 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
30486
30487 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
30488
30489 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
30490 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
30491 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30492 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30493 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
30494 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
30495 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30496 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30497 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30498 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30499 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30500 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30501
30502 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
30503 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
30504 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
30505 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
30506 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
30507 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
30508 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
30509 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
30510 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
30511 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
30512 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
30513 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
30514 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
30515 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
30516 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
30517 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
30518
30519 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_1_SF },
30520 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_1_DF },
30521 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
30522 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
30523 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
30524 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
30525
30526 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
30527 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
30528 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
30529 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
30530 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
30531 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
30532 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
30533 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
30534 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
30535 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
30536 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
30537 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
30538 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
30539 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
30540 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
30541
30542 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
30543 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
30544 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
30545 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
30546 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
30547 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
30548 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
30549
30550 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
30551 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
30552 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
30553 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
30554 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
30555 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
30556 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
30557
30558 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
30559 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
30560 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
30561 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
30562 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
30563 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
30564 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
30565
30566 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
30567 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
30568 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
30569 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
30570 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
30571 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
30572 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
30573
30574 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
30575 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
30576 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
30577 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
30578 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
30579 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
30580 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
30581
30582 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
30583 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
30584 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
30585 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
30586 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
30587 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
30588 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
30589
30590 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
30591 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
30592 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
30593 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
30594 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
30595 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
30596 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
30597
30598 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
30599 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
30600 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
30601 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
30602 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
30603 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
30604 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
30605
30606 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
30607 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
30608 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
30609 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
30610 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
30611 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
30612 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
30613 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
30614
30615 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
30616 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
30617 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
30618 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
30619 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
30620 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
30621 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
30622 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
30623
30624 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
30625 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
30626 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
30627 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
30628
30629 };
30630 \f
30631 /* TM vector builtins. */
30632
30633 /* Reuse the existing x86-specific `struct builtin_description' cause
30634 we're lazy. Add casts to make them fit. */
30635 static const struct builtin_description bdesc_tm[] =
30636 {
30637 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30638 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30639 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30640 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30641 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30642 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30643 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30644
30645 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30646 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30647 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30648 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30649 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30650 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30651 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30652
30653 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30654 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30655 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30656 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30657 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30658 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30659 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30660
30661 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
30662 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
30663 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
30664 };
30665
30666 /* TM callbacks. */
30667
30668 /* Return the builtin decl needed to load a vector of TYPE. */
30669
30670 static tree
30671 ix86_builtin_tm_load (tree type)
30672 {
30673 if (TREE_CODE (type) == VECTOR_TYPE)
30674 {
30675 switch (tree_to_uhwi (TYPE_SIZE (type)))
30676 {
30677 case 64:
30678 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64);
30679 case 128:
30680 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128);
30681 case 256:
30682 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256);
30683 }
30684 }
30685 return NULL_TREE;
30686 }
30687
30688 /* Return the builtin decl needed to store a vector of TYPE. */
30689
30690 static tree
30691 ix86_builtin_tm_store (tree type)
30692 {
30693 if (TREE_CODE (type) == VECTOR_TYPE)
30694 {
30695 switch (tree_to_uhwi (TYPE_SIZE (type)))
30696 {
30697 case 64:
30698 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64);
30699 case 128:
30700 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128);
30701 case 256:
30702 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256);
30703 }
30704 }
30705 return NULL_TREE;
30706 }
30707 \f
30708 /* Initialize the transactional memory vector load/store builtins. */
30709
30710 static void
30711 ix86_init_tm_builtins (void)
30712 {
30713 enum ix86_builtin_func_type ftype;
30714 const struct builtin_description *d;
30715 size_t i;
30716 tree decl;
30717 tree attrs_load, attrs_type_load, attrs_store, attrs_type_store;
30718 tree attrs_log, attrs_type_log;
30719
30720 if (!flag_tm)
30721 return;
30722
30723 /* If there are no builtins defined, we must be compiling in a
30724 language without trans-mem support. */
30725 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1))
30726 return;
30727
30728 /* Use whatever attributes a normal TM load has. */
30729 decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
30730 attrs_load = DECL_ATTRIBUTES (decl);
30731 attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30732 /* Use whatever attributes a normal TM store has. */
30733 decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1);
30734 attrs_store = DECL_ATTRIBUTES (decl);
30735 attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30736 /* Use whatever attributes a normal TM log has. */
30737 decl = builtin_decl_explicit (BUILT_IN_TM_LOG);
30738 attrs_log = DECL_ATTRIBUTES (decl);
30739 attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30740
30741 for (i = 0, d = bdesc_tm;
30742 i < ARRAY_SIZE (bdesc_tm);
30743 i++, d++)
30744 {
30745 if ((d->mask & ix86_isa_flags) != 0
30746 || (lang_hooks.builtin_function
30747 == lang_hooks.builtin_function_ext_scope))
30748 {
30749 tree type, attrs, attrs_type;
30750 enum built_in_function code = (enum built_in_function) d->code;
30751
30752 ftype = (enum ix86_builtin_func_type) d->flag;
30753 type = ix86_get_builtin_func_type (ftype);
30754
30755 if (BUILTIN_TM_LOAD_P (code))
30756 {
30757 attrs = attrs_load;
30758 attrs_type = attrs_type_load;
30759 }
30760 else if (BUILTIN_TM_STORE_P (code))
30761 {
30762 attrs = attrs_store;
30763 attrs_type = attrs_type_store;
30764 }
30765 else
30766 {
30767 attrs = attrs_log;
30768 attrs_type = attrs_type_log;
30769 }
30770 decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL,
30771 /* The builtin without the prefix for
30772 calling it directly. */
30773 d->name + strlen ("__builtin_"),
30774 attrs);
30775 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
30776 set the TYPE_ATTRIBUTES. */
30777 decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN);
30778
30779 set_builtin_decl (code, decl, false);
30780 }
30781 }
30782 }
30783
30784 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
30785 in the current target ISA to allow the user to compile particular modules
30786 with different target specific options that differ from the command line
30787 options. */
30788 static void
30789 ix86_init_mmx_sse_builtins (void)
30790 {
30791 const struct builtin_description * d;
30792 enum ix86_builtin_func_type ftype;
30793 size_t i;
30794
30795 /* Add all special builtins with variable number of operands. */
30796 for (i = 0, d = bdesc_special_args;
30797 i < ARRAY_SIZE (bdesc_special_args);
30798 i++, d++)
30799 {
30800 if (d->name == 0)
30801 continue;
30802
30803 ftype = (enum ix86_builtin_func_type) d->flag;
30804 def_builtin (d->mask, d->name, ftype, d->code);
30805 }
30806
30807 /* Add all builtins with variable number of operands. */
30808 for (i = 0, d = bdesc_args;
30809 i < ARRAY_SIZE (bdesc_args);
30810 i++, d++)
30811 {
30812 if (d->name == 0)
30813 continue;
30814
30815 ftype = (enum ix86_builtin_func_type) d->flag;
30816 def_builtin_const (d->mask, d->name, ftype, d->code);
30817 }
30818
30819 /* Add all builtins with rounding. */
30820 for (i = 0, d = bdesc_round_args;
30821 i < ARRAY_SIZE (bdesc_round_args);
30822 i++, d++)
30823 {
30824 if (d->name == 0)
30825 continue;
30826
30827 ftype = (enum ix86_builtin_func_type) d->flag;
30828 def_builtin_const (d->mask, d->name, ftype, d->code);
30829 }
30830
30831 /* pcmpestr[im] insns. */
30832 for (i = 0, d = bdesc_pcmpestr;
30833 i < ARRAY_SIZE (bdesc_pcmpestr);
30834 i++, d++)
30835 {
30836 if (d->code == IX86_BUILTIN_PCMPESTRM128)
30837 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
30838 else
30839 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
30840 def_builtin_const (d->mask, d->name, ftype, d->code);
30841 }
30842
30843 /* pcmpistr[im] insns. */
30844 for (i = 0, d = bdesc_pcmpistr;
30845 i < ARRAY_SIZE (bdesc_pcmpistr);
30846 i++, d++)
30847 {
30848 if (d->code == IX86_BUILTIN_PCMPISTRM128)
30849 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
30850 else
30851 ftype = INT_FTYPE_V16QI_V16QI_INT;
30852 def_builtin_const (d->mask, d->name, ftype, d->code);
30853 }
30854
30855 /* comi/ucomi insns. */
30856 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
30857 {
30858 if (d->mask == OPTION_MASK_ISA_SSE2)
30859 ftype = INT_FTYPE_V2DF_V2DF;
30860 else
30861 ftype = INT_FTYPE_V4SF_V4SF;
30862 def_builtin_const (d->mask, d->name, ftype, d->code);
30863 }
30864
30865 /* SSE */
30866 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
30867 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
30868 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
30869 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
30870
30871 /* SSE or 3DNow!A */
30872 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
30873 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
30874 IX86_BUILTIN_MASKMOVQ);
30875
30876 /* SSE2 */
30877 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
30878 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
30879
30880 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
30881 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
30882 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
30883 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
30884
30885 /* SSE3. */
30886 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
30887 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
30888 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
30889 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
30890
30891 /* AES */
30892 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
30893 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
30894 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
30895 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
30896 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
30897 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
30898 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
30899 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
30900 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
30901 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
30902 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
30903 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
30904
30905 /* PCLMUL */
30906 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
30907 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
30908
30909 /* RDRND */
30910 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
30911 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
30912 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
30913 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
30914 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
30915 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
30916 IX86_BUILTIN_RDRAND64_STEP);
30917
30918 /* AVX2 */
30919 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
30920 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
30921 IX86_BUILTIN_GATHERSIV2DF);
30922
30923 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
30924 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
30925 IX86_BUILTIN_GATHERSIV4DF);
30926
30927 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
30928 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
30929 IX86_BUILTIN_GATHERDIV2DF);
30930
30931 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
30932 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
30933 IX86_BUILTIN_GATHERDIV4DF);
30934
30935 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
30936 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
30937 IX86_BUILTIN_GATHERSIV4SF);
30938
30939 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
30940 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
30941 IX86_BUILTIN_GATHERSIV8SF);
30942
30943 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
30944 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
30945 IX86_BUILTIN_GATHERDIV4SF);
30946
30947 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
30948 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
30949 IX86_BUILTIN_GATHERDIV8SF);
30950
30951 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
30952 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
30953 IX86_BUILTIN_GATHERSIV2DI);
30954
30955 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
30956 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
30957 IX86_BUILTIN_GATHERSIV4DI);
30958
30959 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
30960 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
30961 IX86_BUILTIN_GATHERDIV2DI);
30962
30963 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
30964 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
30965 IX86_BUILTIN_GATHERDIV4DI);
30966
30967 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
30968 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
30969 IX86_BUILTIN_GATHERSIV4SI);
30970
30971 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
30972 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
30973 IX86_BUILTIN_GATHERSIV8SI);
30974
30975 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
30976 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
30977 IX86_BUILTIN_GATHERDIV4SI);
30978
30979 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
30980 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
30981 IX86_BUILTIN_GATHERDIV8SI);
30982
30983 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ",
30984 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
30985 IX86_BUILTIN_GATHERALTSIV4DF);
30986
30987 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ",
30988 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
30989 IX86_BUILTIN_GATHERALTDIV8SF);
30990
30991 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ",
30992 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
30993 IX86_BUILTIN_GATHERALTSIV4DI);
30994
30995 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ",
30996 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
30997 IX86_BUILTIN_GATHERALTDIV8SI);
30998
30999 /* AVX512F */
31000 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16sf",
31001 V16SF_FTYPE_V16SF_PCFLOAT_V16SI_HI_INT,
31002 IX86_BUILTIN_GATHER3SIV16SF);
31003
31004 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8df",
31005 V8DF_FTYPE_V8DF_PCDOUBLE_V8SI_QI_INT,
31006 IX86_BUILTIN_GATHER3SIV8DF);
31007
31008 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16sf",
31009 V8SF_FTYPE_V8SF_PCFLOAT_V8DI_QI_INT,
31010 IX86_BUILTIN_GATHER3DIV16SF);
31011
31012 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8df",
31013 V8DF_FTYPE_V8DF_PCDOUBLE_V8DI_QI_INT,
31014 IX86_BUILTIN_GATHER3DIV8DF);
31015
31016 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16si",
31017 V16SI_FTYPE_V16SI_PCINT_V16SI_HI_INT,
31018 IX86_BUILTIN_GATHER3SIV16SI);
31019
31020 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8di",
31021 V8DI_FTYPE_V8DI_PCINT64_V8SI_QI_INT,
31022 IX86_BUILTIN_GATHER3SIV8DI);
31023
31024 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16si",
31025 V8SI_FTYPE_V8SI_PCINT_V8DI_QI_INT,
31026 IX86_BUILTIN_GATHER3DIV16SI);
31027
31028 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8di",
31029 V8DI_FTYPE_V8DI_PCINT64_V8DI_QI_INT,
31030 IX86_BUILTIN_GATHER3DIV8DI);
31031
31032 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8df ",
31033 V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT,
31034 IX86_BUILTIN_GATHER3ALTSIV8DF);
31035
31036 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8sf ",
31037 V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT,
31038 IX86_BUILTIN_GATHER3ALTDIV16SF);
31039
31040 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8di ",
31041 V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT,
31042 IX86_BUILTIN_GATHER3ALTSIV8DI);
31043
31044 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8si ",
31045 V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT,
31046 IX86_BUILTIN_GATHER3ALTDIV16SI);
31047
31048 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16sf",
31049 VOID_FTYPE_PFLOAT_HI_V16SI_V16SF_INT,
31050 IX86_BUILTIN_SCATTERSIV16SF);
31051
31052 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8df",
31053 VOID_FTYPE_PDOUBLE_QI_V8SI_V8DF_INT,
31054 IX86_BUILTIN_SCATTERSIV8DF);
31055
31056 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16sf",
31057 VOID_FTYPE_PFLOAT_QI_V8DI_V8SF_INT,
31058 IX86_BUILTIN_SCATTERDIV16SF);
31059
31060 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8df",
31061 VOID_FTYPE_PDOUBLE_QI_V8DI_V8DF_INT,
31062 IX86_BUILTIN_SCATTERDIV8DF);
31063
31064 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16si",
31065 VOID_FTYPE_PINT_HI_V16SI_V16SI_INT,
31066 IX86_BUILTIN_SCATTERSIV16SI);
31067
31068 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8di",
31069 VOID_FTYPE_PLONGLONG_QI_V8SI_V8DI_INT,
31070 IX86_BUILTIN_SCATTERSIV8DI);
31071
31072 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16si",
31073 VOID_FTYPE_PINT_QI_V8DI_V8SI_INT,
31074 IX86_BUILTIN_SCATTERDIV16SI);
31075
31076 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8di",
31077 VOID_FTYPE_PLONGLONG_QI_V8DI_V8DI_INT,
31078 IX86_BUILTIN_SCATTERDIV8DI);
31079
31080 /* AVX512PF */
31081 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdpd",
31082 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
31083 IX86_BUILTIN_GATHERPFDPD);
31084 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdps",
31085 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
31086 IX86_BUILTIN_GATHERPFDPS);
31087 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqpd",
31088 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
31089 IX86_BUILTIN_GATHERPFQPD);
31090 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqps",
31091 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
31092 IX86_BUILTIN_GATHERPFQPS);
31093 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdpd",
31094 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
31095 IX86_BUILTIN_SCATTERPFDPD);
31096 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdps",
31097 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
31098 IX86_BUILTIN_SCATTERPFDPS);
31099 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqpd",
31100 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
31101 IX86_BUILTIN_SCATTERPFQPD);
31102 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqps",
31103 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
31104 IX86_BUILTIN_SCATTERPFQPS);
31105
31106 /* SHA */
31107 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg1",
31108 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG1);
31109 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg2",
31110 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG2);
31111 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1nexte",
31112 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1NEXTE);
31113 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1rnds4",
31114 V4SI_FTYPE_V4SI_V4SI_INT, IX86_BUILTIN_SHA1RNDS4);
31115 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg1",
31116 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG1);
31117 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg2",
31118 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG2);
31119 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256rnds2",
31120 V4SI_FTYPE_V4SI_V4SI_V4SI, IX86_BUILTIN_SHA256RNDS2);
31121
31122 /* RTM. */
31123 def_builtin (OPTION_MASK_ISA_RTM, "__builtin_ia32_xabort",
31124 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT);
31125
31126 /* MMX access to the vec_init patterns. */
31127 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
31128 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
31129
31130 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
31131 V4HI_FTYPE_HI_HI_HI_HI,
31132 IX86_BUILTIN_VEC_INIT_V4HI);
31133
31134 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
31135 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
31136 IX86_BUILTIN_VEC_INIT_V8QI);
31137
31138 /* Access to the vec_extract patterns. */
31139 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
31140 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
31141 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
31142 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
31143 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
31144 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
31145 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
31146 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
31147 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
31148 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
31149
31150 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
31151 "__builtin_ia32_vec_ext_v4hi",
31152 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
31153
31154 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
31155 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
31156
31157 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
31158 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
31159
31160 /* Access to the vec_set patterns. */
31161 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
31162 "__builtin_ia32_vec_set_v2di",
31163 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
31164
31165 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
31166 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
31167
31168 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
31169 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
31170
31171 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
31172 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
31173
31174 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
31175 "__builtin_ia32_vec_set_v4hi",
31176 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
31177
31178 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
31179 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
31180
31181 /* RDSEED */
31182 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_hi_step",
31183 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDSEED16_STEP);
31184 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_si_step",
31185 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDSEED32_STEP);
31186 def_builtin (OPTION_MASK_ISA_RDSEED | OPTION_MASK_ISA_64BIT,
31187 "__builtin_ia32_rdseed_di_step",
31188 INT_FTYPE_PULONGLONG, IX86_BUILTIN_RDSEED64_STEP);
31189
31190 /* ADCX */
31191 def_builtin (0, "__builtin_ia32_addcarryx_u32",
31192 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_ADDCARRYX32);
31193 def_builtin (OPTION_MASK_ISA_64BIT,
31194 "__builtin_ia32_addcarryx_u64",
31195 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
31196 IX86_BUILTIN_ADDCARRYX64);
31197
31198 /* Read/write FLAGS. */
31199 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u32",
31200 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
31201 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u64",
31202 UINT64_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
31203 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u32",
31204 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_WRITE_FLAGS);
31205 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u64",
31206 VOID_FTYPE_UINT64, IX86_BUILTIN_WRITE_FLAGS);
31207
31208 /* CLFLUSHOPT. */
31209 def_builtin (OPTION_MASK_ISA_CLFLUSHOPT, "__builtin_ia32_clflushopt",
31210 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSHOPT);
31211
31212 /* Add FMA4 multi-arg argument instructions */
31213 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
31214 {
31215 if (d->name == 0)
31216 continue;
31217
31218 ftype = (enum ix86_builtin_func_type) d->flag;
31219 def_builtin_const (d->mask, d->name, ftype, d->code);
31220 }
31221 }
31222
31223 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
31224 to return a pointer to VERSION_DECL if the outcome of the expression
31225 formed by PREDICATE_CHAIN is true. This function will be called during
31226 version dispatch to decide which function version to execute. It returns
31227 the basic block at the end, to which more conditions can be added. */
31228
31229 static basic_block
31230 add_condition_to_bb (tree function_decl, tree version_decl,
31231 tree predicate_chain, basic_block new_bb)
31232 {
31233 gimple return_stmt;
31234 tree convert_expr, result_var;
31235 gimple convert_stmt;
31236 gimple call_cond_stmt;
31237 gimple if_else_stmt;
31238
31239 basic_block bb1, bb2, bb3;
31240 edge e12, e23;
31241
31242 tree cond_var, and_expr_var = NULL_TREE;
31243 gimple_seq gseq;
31244
31245 tree predicate_decl, predicate_arg;
31246
31247 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
31248
31249 gcc_assert (new_bb != NULL);
31250 gseq = bb_seq (new_bb);
31251
31252
31253 convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
31254 build_fold_addr_expr (version_decl));
31255 result_var = create_tmp_var (ptr_type_node, NULL);
31256 convert_stmt = gimple_build_assign (result_var, convert_expr);
31257 return_stmt = gimple_build_return (result_var);
31258
31259 if (predicate_chain == NULL_TREE)
31260 {
31261 gimple_seq_add_stmt (&gseq, convert_stmt);
31262 gimple_seq_add_stmt (&gseq, return_stmt);
31263 set_bb_seq (new_bb, gseq);
31264 gimple_set_bb (convert_stmt, new_bb);
31265 gimple_set_bb (return_stmt, new_bb);
31266 pop_cfun ();
31267 return new_bb;
31268 }
31269
31270 while (predicate_chain != NULL)
31271 {
31272 cond_var = create_tmp_var (integer_type_node, NULL);
31273 predicate_decl = TREE_PURPOSE (predicate_chain);
31274 predicate_arg = TREE_VALUE (predicate_chain);
31275 call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
31276 gimple_call_set_lhs (call_cond_stmt, cond_var);
31277
31278 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
31279 gimple_set_bb (call_cond_stmt, new_bb);
31280 gimple_seq_add_stmt (&gseq, call_cond_stmt);
31281
31282 predicate_chain = TREE_CHAIN (predicate_chain);
31283
31284 if (and_expr_var == NULL)
31285 and_expr_var = cond_var;
31286 else
31287 {
31288 gimple assign_stmt;
31289 /* Use MIN_EXPR to check if any integer is zero?.
31290 and_expr_var = min_expr <cond_var, and_expr_var> */
31291 assign_stmt = gimple_build_assign (and_expr_var,
31292 build2 (MIN_EXPR, integer_type_node,
31293 cond_var, and_expr_var));
31294
31295 gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
31296 gimple_set_bb (assign_stmt, new_bb);
31297 gimple_seq_add_stmt (&gseq, assign_stmt);
31298 }
31299 }
31300
31301 if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
31302 integer_zero_node,
31303 NULL_TREE, NULL_TREE);
31304 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
31305 gimple_set_bb (if_else_stmt, new_bb);
31306 gimple_seq_add_stmt (&gseq, if_else_stmt);
31307
31308 gimple_seq_add_stmt (&gseq, convert_stmt);
31309 gimple_seq_add_stmt (&gseq, return_stmt);
31310 set_bb_seq (new_bb, gseq);
31311
31312 bb1 = new_bb;
31313 e12 = split_block (bb1, if_else_stmt);
31314 bb2 = e12->dest;
31315 e12->flags &= ~EDGE_FALLTHRU;
31316 e12->flags |= EDGE_TRUE_VALUE;
31317
31318 e23 = split_block (bb2, return_stmt);
31319
31320 gimple_set_bb (convert_stmt, bb2);
31321 gimple_set_bb (return_stmt, bb2);
31322
31323 bb3 = e23->dest;
31324 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
31325
31326 remove_edge (e23);
31327 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
31328
31329 pop_cfun ();
31330
31331 return bb3;
31332 }
31333
31334 /* This parses the attribute arguments to target in DECL and determines
31335 the right builtin to use to match the platform specification.
31336 It returns the priority value for this version decl. If PREDICATE_LIST
31337 is not NULL, it stores the list of cpu features that need to be checked
31338 before dispatching this function. */
31339
31340 static unsigned int
31341 get_builtin_code_for_version (tree decl, tree *predicate_list)
31342 {
31343 tree attrs;
31344 struct cl_target_option cur_target;
31345 tree target_node;
31346 struct cl_target_option *new_target;
31347 const char *arg_str = NULL;
31348 const char *attrs_str = NULL;
31349 char *tok_str = NULL;
31350 char *token;
31351
31352 /* Priority of i386 features, greater value is higher priority. This is
31353 used to decide the order in which function dispatch must happen. For
31354 instance, a version specialized for SSE4.2 should be checked for dispatch
31355 before a version for SSE3, as SSE4.2 implies SSE3. */
31356 enum feature_priority
31357 {
31358 P_ZERO = 0,
31359 P_MMX,
31360 P_SSE,
31361 P_SSE2,
31362 P_SSE3,
31363 P_SSSE3,
31364 P_PROC_SSSE3,
31365 P_SSE4_A,
31366 P_PROC_SSE4_A,
31367 P_SSE4_1,
31368 P_SSE4_2,
31369 P_PROC_SSE4_2,
31370 P_POPCNT,
31371 P_AVX,
31372 P_PROC_AVX,
31373 P_FMA4,
31374 P_XOP,
31375 P_PROC_XOP,
31376 P_FMA,
31377 P_PROC_FMA,
31378 P_AVX2,
31379 P_PROC_AVX2
31380 };
31381
31382 enum feature_priority priority = P_ZERO;
31383
31384 /* These are the target attribute strings for which a dispatcher is
31385 available, from fold_builtin_cpu. */
31386
31387 static struct _feature_list
31388 {
31389 const char *const name;
31390 const enum feature_priority priority;
31391 }
31392 const feature_list[] =
31393 {
31394 {"mmx", P_MMX},
31395 {"sse", P_SSE},
31396 {"sse2", P_SSE2},
31397 {"sse3", P_SSE3},
31398 {"sse4a", P_SSE4_A},
31399 {"ssse3", P_SSSE3},
31400 {"sse4.1", P_SSE4_1},
31401 {"sse4.2", P_SSE4_2},
31402 {"popcnt", P_POPCNT},
31403 {"avx", P_AVX},
31404 {"fma4", P_FMA4},
31405 {"xop", P_XOP},
31406 {"fma", P_FMA},
31407 {"avx2", P_AVX2}
31408 };
31409
31410
31411 static unsigned int NUM_FEATURES
31412 = sizeof (feature_list) / sizeof (struct _feature_list);
31413
31414 unsigned int i;
31415
31416 tree predicate_chain = NULL_TREE;
31417 tree predicate_decl, predicate_arg;
31418
31419 attrs = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
31420 gcc_assert (attrs != NULL);
31421
31422 attrs = TREE_VALUE (TREE_VALUE (attrs));
31423
31424 gcc_assert (TREE_CODE (attrs) == STRING_CST);
31425 attrs_str = TREE_STRING_POINTER (attrs);
31426
31427 /* Return priority zero for default function. */
31428 if (strcmp (attrs_str, "default") == 0)
31429 return 0;
31430
31431 /* Handle arch= if specified. For priority, set it to be 1 more than
31432 the best instruction set the processor can handle. For instance, if
31433 there is a version for atom and a version for ssse3 (the highest ISA
31434 priority for atom), the atom version must be checked for dispatch
31435 before the ssse3 version. */
31436 if (strstr (attrs_str, "arch=") != NULL)
31437 {
31438 cl_target_option_save (&cur_target, &global_options);
31439 target_node = ix86_valid_target_attribute_tree (attrs, &global_options,
31440 &global_options_set);
31441
31442 gcc_assert (target_node);
31443 new_target = TREE_TARGET_OPTION (target_node);
31444 gcc_assert (new_target);
31445
31446 if (new_target->arch_specified && new_target->arch > 0)
31447 {
31448 switch (new_target->arch)
31449 {
31450 case PROCESSOR_CORE2:
31451 arg_str = "core2";
31452 priority = P_PROC_SSSE3;
31453 break;
31454 case PROCESSOR_NEHALEM:
31455 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AES)
31456 arg_str = "westmere";
31457 else
31458 /* We translate "arch=corei7" and "arch=nehalem" to
31459 "corei7" so that it will be mapped to M_INTEL_COREI7
31460 as cpu type to cover all M_INTEL_COREI7_XXXs. */
31461 arg_str = "corei7";
31462 priority = P_PROC_SSE4_2;
31463 break;
31464 case PROCESSOR_SANDYBRIDGE:
31465 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_F16C)
31466 arg_str = "ivybridge";
31467 else
31468 arg_str = "sandybridge";
31469 priority = P_PROC_AVX;
31470 break;
31471 case PROCESSOR_HASWELL:
31472 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_ADX)
31473 arg_str = "broadwell";
31474 else
31475 arg_str = "haswell";
31476 priority = P_PROC_AVX2;
31477 break;
31478 case PROCESSOR_BONNELL:
31479 arg_str = "bonnell";
31480 priority = P_PROC_SSSE3;
31481 break;
31482 case PROCESSOR_SILVERMONT:
31483 arg_str = "silvermont";
31484 priority = P_PROC_SSE4_2;
31485 break;
31486 case PROCESSOR_AMDFAM10:
31487 arg_str = "amdfam10h";
31488 priority = P_PROC_SSE4_A;
31489 break;
31490 case PROCESSOR_BTVER1:
31491 arg_str = "btver1";
31492 priority = P_PROC_SSE4_A;
31493 break;
31494 case PROCESSOR_BTVER2:
31495 arg_str = "btver2";
31496 priority = P_PROC_AVX;
31497 break;
31498 case PROCESSOR_BDVER1:
31499 arg_str = "bdver1";
31500 priority = P_PROC_XOP;
31501 break;
31502 case PROCESSOR_BDVER2:
31503 arg_str = "bdver2";
31504 priority = P_PROC_FMA;
31505 break;
31506 case PROCESSOR_BDVER3:
31507 arg_str = "bdver3";
31508 priority = P_PROC_FMA;
31509 break;
31510 case PROCESSOR_BDVER4:
31511 arg_str = "bdver4";
31512 priority = P_PROC_AVX2;
31513 break;
31514 }
31515 }
31516
31517 cl_target_option_restore (&global_options, &cur_target);
31518
31519 if (predicate_list && arg_str == NULL)
31520 {
31521 error_at (DECL_SOURCE_LOCATION (decl),
31522 "No dispatcher found for the versioning attributes");
31523 return 0;
31524 }
31525
31526 if (predicate_list)
31527 {
31528 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS];
31529 /* For a C string literal the length includes the trailing NULL. */
31530 predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
31531 predicate_chain = tree_cons (predicate_decl, predicate_arg,
31532 predicate_chain);
31533 }
31534 }
31535
31536 /* Process feature name. */
31537 tok_str = (char *) xmalloc (strlen (attrs_str) + 1);
31538 strcpy (tok_str, attrs_str);
31539 token = strtok (tok_str, ",");
31540 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_SUPPORTS];
31541
31542 while (token != NULL)
31543 {
31544 /* Do not process "arch=" */
31545 if (strncmp (token, "arch=", 5) == 0)
31546 {
31547 token = strtok (NULL, ",");
31548 continue;
31549 }
31550 for (i = 0; i < NUM_FEATURES; ++i)
31551 {
31552 if (strcmp (token, feature_list[i].name) == 0)
31553 {
31554 if (predicate_list)
31555 {
31556 predicate_arg = build_string_literal (
31557 strlen (feature_list[i].name) + 1,
31558 feature_list[i].name);
31559 predicate_chain = tree_cons (predicate_decl, predicate_arg,
31560 predicate_chain);
31561 }
31562 /* Find the maximum priority feature. */
31563 if (feature_list[i].priority > priority)
31564 priority = feature_list[i].priority;
31565
31566 break;
31567 }
31568 }
31569 if (predicate_list && i == NUM_FEATURES)
31570 {
31571 error_at (DECL_SOURCE_LOCATION (decl),
31572 "No dispatcher found for %s", token);
31573 return 0;
31574 }
31575 token = strtok (NULL, ",");
31576 }
31577 free (tok_str);
31578
31579 if (predicate_list && predicate_chain == NULL_TREE)
31580 {
31581 error_at (DECL_SOURCE_LOCATION (decl),
31582 "No dispatcher found for the versioning attributes : %s",
31583 attrs_str);
31584 return 0;
31585 }
31586 else if (predicate_list)
31587 {
31588 predicate_chain = nreverse (predicate_chain);
31589 *predicate_list = predicate_chain;
31590 }
31591
31592 return priority;
31593 }
31594
31595 /* This compares the priority of target features in function DECL1
31596 and DECL2. It returns positive value if DECL1 is higher priority,
31597 negative value if DECL2 is higher priority and 0 if they are the
31598 same. */
31599
31600 static int
31601 ix86_compare_version_priority (tree decl1, tree decl2)
31602 {
31603 unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
31604 unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
31605
31606 return (int)priority1 - (int)priority2;
31607 }
31608
31609 /* V1 and V2 point to function versions with different priorities
31610 based on the target ISA. This function compares their priorities. */
31611
31612 static int
31613 feature_compare (const void *v1, const void *v2)
31614 {
31615 typedef struct _function_version_info
31616 {
31617 tree version_decl;
31618 tree predicate_chain;
31619 unsigned int dispatch_priority;
31620 } function_version_info;
31621
31622 const function_version_info c1 = *(const function_version_info *)v1;
31623 const function_version_info c2 = *(const function_version_info *)v2;
31624 return (c2.dispatch_priority - c1.dispatch_priority);
31625 }
31626
31627 /* This function generates the dispatch function for
31628 multi-versioned functions. DISPATCH_DECL is the function which will
31629 contain the dispatch logic. FNDECLS are the function choices for
31630 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
31631 in DISPATCH_DECL in which the dispatch code is generated. */
31632
31633 static int
31634 dispatch_function_versions (tree dispatch_decl,
31635 void *fndecls_p,
31636 basic_block *empty_bb)
31637 {
31638 tree default_decl;
31639 gimple ifunc_cpu_init_stmt;
31640 gimple_seq gseq;
31641 int ix;
31642 tree ele;
31643 vec<tree> *fndecls;
31644 unsigned int num_versions = 0;
31645 unsigned int actual_versions = 0;
31646 unsigned int i;
31647
31648 struct _function_version_info
31649 {
31650 tree version_decl;
31651 tree predicate_chain;
31652 unsigned int dispatch_priority;
31653 }*function_version_info;
31654
31655 gcc_assert (dispatch_decl != NULL
31656 && fndecls_p != NULL
31657 && empty_bb != NULL);
31658
31659 /*fndecls_p is actually a vector. */
31660 fndecls = static_cast<vec<tree> *> (fndecls_p);
31661
31662 /* At least one more version other than the default. */
31663 num_versions = fndecls->length ();
31664 gcc_assert (num_versions >= 2);
31665
31666 function_version_info = (struct _function_version_info *)
31667 XNEWVEC (struct _function_version_info, (num_versions - 1));
31668
31669 /* The first version in the vector is the default decl. */
31670 default_decl = (*fndecls)[0];
31671
31672 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
31673
31674 gseq = bb_seq (*empty_bb);
31675 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
31676 constructors, so explicity call __builtin_cpu_init here. */
31677 ifunc_cpu_init_stmt = gimple_build_call_vec (
31678 ix86_builtins [(int) IX86_BUILTIN_CPU_INIT], vNULL);
31679 gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
31680 gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
31681 set_bb_seq (*empty_bb, gseq);
31682
31683 pop_cfun ();
31684
31685
31686 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
31687 {
31688 tree version_decl = ele;
31689 tree predicate_chain = NULL_TREE;
31690 unsigned int priority;
31691 /* Get attribute string, parse it and find the right predicate decl.
31692 The predicate function could be a lengthy combination of many
31693 features, like arch-type and various isa-variants. */
31694 priority = get_builtin_code_for_version (version_decl,
31695 &predicate_chain);
31696
31697 if (predicate_chain == NULL_TREE)
31698 continue;
31699
31700 function_version_info [actual_versions].version_decl = version_decl;
31701 function_version_info [actual_versions].predicate_chain
31702 = predicate_chain;
31703 function_version_info [actual_versions].dispatch_priority = priority;
31704 actual_versions++;
31705 }
31706
31707 /* Sort the versions according to descending order of dispatch priority. The
31708 priority is based on the ISA. This is not a perfect solution. There
31709 could still be ambiguity. If more than one function version is suitable
31710 to execute, which one should be dispatched? In future, allow the user
31711 to specify a dispatch priority next to the version. */
31712 qsort (function_version_info, actual_versions,
31713 sizeof (struct _function_version_info), feature_compare);
31714
31715 for (i = 0; i < actual_versions; ++i)
31716 *empty_bb = add_condition_to_bb (dispatch_decl,
31717 function_version_info[i].version_decl,
31718 function_version_info[i].predicate_chain,
31719 *empty_bb);
31720
31721 /* dispatch default version at the end. */
31722 *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
31723 NULL, *empty_bb);
31724
31725 free (function_version_info);
31726 return 0;
31727 }
31728
31729 /* Comparator function to be used in qsort routine to sort attribute
31730 specification strings to "target". */
31731
31732 static int
31733 attr_strcmp (const void *v1, const void *v2)
31734 {
31735 const char *c1 = *(char *const*)v1;
31736 const char *c2 = *(char *const*)v2;
31737 return strcmp (c1, c2);
31738 }
31739
31740 /* ARGLIST is the argument to target attribute. This function tokenizes
31741 the comma separated arguments, sorts them and returns a string which
31742 is a unique identifier for the comma separated arguments. It also
31743 replaces non-identifier characters "=,-" with "_". */
31744
31745 static char *
31746 sorted_attr_string (tree arglist)
31747 {
31748 tree arg;
31749 size_t str_len_sum = 0;
31750 char **args = NULL;
31751 char *attr_str, *ret_str;
31752 char *attr = NULL;
31753 unsigned int argnum = 1;
31754 unsigned int i;
31755
31756 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
31757 {
31758 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
31759 size_t len = strlen (str);
31760 str_len_sum += len + 1;
31761 if (arg != arglist)
31762 argnum++;
31763 for (i = 0; i < strlen (str); i++)
31764 if (str[i] == ',')
31765 argnum++;
31766 }
31767
31768 attr_str = XNEWVEC (char, str_len_sum);
31769 str_len_sum = 0;
31770 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
31771 {
31772 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
31773 size_t len = strlen (str);
31774 memcpy (attr_str + str_len_sum, str, len);
31775 attr_str[str_len_sum + len] = TREE_CHAIN (arg) ? ',' : '\0';
31776 str_len_sum += len + 1;
31777 }
31778
31779 /* Replace "=,-" with "_". */
31780 for (i = 0; i < strlen (attr_str); i++)
31781 if (attr_str[i] == '=' || attr_str[i]== '-')
31782 attr_str[i] = '_';
31783
31784 if (argnum == 1)
31785 return attr_str;
31786
31787 args = XNEWVEC (char *, argnum);
31788
31789 i = 0;
31790 attr = strtok (attr_str, ",");
31791 while (attr != NULL)
31792 {
31793 args[i] = attr;
31794 i++;
31795 attr = strtok (NULL, ",");
31796 }
31797
31798 qsort (args, argnum, sizeof (char *), attr_strcmp);
31799
31800 ret_str = XNEWVEC (char, str_len_sum);
31801 str_len_sum = 0;
31802 for (i = 0; i < argnum; i++)
31803 {
31804 size_t len = strlen (args[i]);
31805 memcpy (ret_str + str_len_sum, args[i], len);
31806 ret_str[str_len_sum + len] = i < argnum - 1 ? '_' : '\0';
31807 str_len_sum += len + 1;
31808 }
31809
31810 XDELETEVEC (args);
31811 XDELETEVEC (attr_str);
31812 return ret_str;
31813 }
31814
31815 /* This function changes the assembler name for functions that are
31816 versions. If DECL is a function version and has a "target"
31817 attribute, it appends the attribute string to its assembler name. */
31818
31819 static tree
31820 ix86_mangle_function_version_assembler_name (tree decl, tree id)
31821 {
31822 tree version_attr;
31823 const char *orig_name, *version_string;
31824 char *attr_str, *assembler_name;
31825
31826 if (DECL_DECLARED_INLINE_P (decl)
31827 && lookup_attribute ("gnu_inline",
31828 DECL_ATTRIBUTES (decl)))
31829 error_at (DECL_SOURCE_LOCATION (decl),
31830 "Function versions cannot be marked as gnu_inline,"
31831 " bodies have to be generated");
31832
31833 if (DECL_VIRTUAL_P (decl)
31834 || DECL_VINDEX (decl))
31835 sorry ("Virtual function multiversioning not supported");
31836
31837 version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
31838
31839 /* target attribute string cannot be NULL. */
31840 gcc_assert (version_attr != NULL_TREE);
31841
31842 orig_name = IDENTIFIER_POINTER (id);
31843 version_string
31844 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr)));
31845
31846 if (strcmp (version_string, "default") == 0)
31847 return id;
31848
31849 attr_str = sorted_attr_string (TREE_VALUE (version_attr));
31850 assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2);
31851
31852 sprintf (assembler_name, "%s.%s", orig_name, attr_str);
31853
31854 /* Allow assembler name to be modified if already set. */
31855 if (DECL_ASSEMBLER_NAME_SET_P (decl))
31856 SET_DECL_RTL (decl, NULL);
31857
31858 tree ret = get_identifier (assembler_name);
31859 XDELETEVEC (attr_str);
31860 XDELETEVEC (assembler_name);
31861 return ret;
31862 }
31863
31864 /* This function returns true if FN1 and FN2 are versions of the same function,
31865 that is, the target strings of the function decls are different. This assumes
31866 that FN1 and FN2 have the same signature. */
31867
31868 static bool
31869 ix86_function_versions (tree fn1, tree fn2)
31870 {
31871 tree attr1, attr2;
31872 char *target1, *target2;
31873 bool result;
31874
31875 if (TREE_CODE (fn1) != FUNCTION_DECL
31876 || TREE_CODE (fn2) != FUNCTION_DECL)
31877 return false;
31878
31879 attr1 = lookup_attribute ("target", DECL_ATTRIBUTES (fn1));
31880 attr2 = lookup_attribute ("target", DECL_ATTRIBUTES (fn2));
31881
31882 /* At least one function decl should have the target attribute specified. */
31883 if (attr1 == NULL_TREE && attr2 == NULL_TREE)
31884 return false;
31885
31886 /* Diagnose missing target attribute if one of the decls is already
31887 multi-versioned. */
31888 if (attr1 == NULL_TREE || attr2 == NULL_TREE)
31889 {
31890 if (DECL_FUNCTION_VERSIONED (fn1) || DECL_FUNCTION_VERSIONED (fn2))
31891 {
31892 if (attr2 != NULL_TREE)
31893 {
31894 tree tem = fn1;
31895 fn1 = fn2;
31896 fn2 = tem;
31897 attr1 = attr2;
31898 }
31899 error_at (DECL_SOURCE_LOCATION (fn2),
31900 "missing %<target%> attribute for multi-versioned %D",
31901 fn2);
31902 inform (DECL_SOURCE_LOCATION (fn1),
31903 "previous declaration of %D", fn1);
31904 /* Prevent diagnosing of the same error multiple times. */
31905 DECL_ATTRIBUTES (fn2)
31906 = tree_cons (get_identifier ("target"),
31907 copy_node (TREE_VALUE (attr1)),
31908 DECL_ATTRIBUTES (fn2));
31909 }
31910 return false;
31911 }
31912
31913 target1 = sorted_attr_string (TREE_VALUE (attr1));
31914 target2 = sorted_attr_string (TREE_VALUE (attr2));
31915
31916 /* The sorted target strings must be different for fn1 and fn2
31917 to be versions. */
31918 if (strcmp (target1, target2) == 0)
31919 result = false;
31920 else
31921 result = true;
31922
31923 XDELETEVEC (target1);
31924 XDELETEVEC (target2);
31925
31926 return result;
31927 }
31928
31929 static tree
31930 ix86_mangle_decl_assembler_name (tree decl, tree id)
31931 {
31932 /* For function version, add the target suffix to the assembler name. */
31933 if (TREE_CODE (decl) == FUNCTION_DECL
31934 && DECL_FUNCTION_VERSIONED (decl))
31935 id = ix86_mangle_function_version_assembler_name (decl, id);
31936 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
31937 id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
31938 #endif
31939
31940 return id;
31941 }
31942
31943 /* Return a new name by appending SUFFIX to the DECL name. If make_unique
31944 is true, append the full path name of the source file. */
31945
31946 static char *
31947 make_name (tree decl, const char *suffix, bool make_unique)
31948 {
31949 char *global_var_name;
31950 int name_len;
31951 const char *name;
31952 const char *unique_name = NULL;
31953
31954 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
31955
31956 /* Get a unique name that can be used globally without any chances
31957 of collision at link time. */
31958 if (make_unique)
31959 unique_name = IDENTIFIER_POINTER (get_file_function_name ("\0"));
31960
31961 name_len = strlen (name) + strlen (suffix) + 2;
31962
31963 if (make_unique)
31964 name_len += strlen (unique_name) + 1;
31965 global_var_name = XNEWVEC (char, name_len);
31966
31967 /* Use '.' to concatenate names as it is demangler friendly. */
31968 if (make_unique)
31969 snprintf (global_var_name, name_len, "%s.%s.%s", name, unique_name,
31970 suffix);
31971 else
31972 snprintf (global_var_name, name_len, "%s.%s", name, suffix);
31973
31974 return global_var_name;
31975 }
31976
31977 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
31978
31979 /* Make a dispatcher declaration for the multi-versioned function DECL.
31980 Calls to DECL function will be replaced with calls to the dispatcher
31981 by the front-end. Return the decl created. */
31982
31983 static tree
31984 make_dispatcher_decl (const tree decl)
31985 {
31986 tree func_decl;
31987 char *func_name;
31988 tree fn_type, func_type;
31989 bool is_uniq = false;
31990
31991 if (TREE_PUBLIC (decl) == 0)
31992 is_uniq = true;
31993
31994 func_name = make_name (decl, "ifunc", is_uniq);
31995
31996 fn_type = TREE_TYPE (decl);
31997 func_type = build_function_type (TREE_TYPE (fn_type),
31998 TYPE_ARG_TYPES (fn_type));
31999
32000 func_decl = build_fn_decl (func_name, func_type);
32001 XDELETEVEC (func_name);
32002 TREE_USED (func_decl) = 1;
32003 DECL_CONTEXT (func_decl) = NULL_TREE;
32004 DECL_INITIAL (func_decl) = error_mark_node;
32005 DECL_ARTIFICIAL (func_decl) = 1;
32006 /* Mark this func as external, the resolver will flip it again if
32007 it gets generated. */
32008 DECL_EXTERNAL (func_decl) = 1;
32009 /* This will be of type IFUNCs have to be externally visible. */
32010 TREE_PUBLIC (func_decl) = 1;
32011
32012 return func_decl;
32013 }
32014
32015 #endif
32016
32017 /* Returns true if decl is multi-versioned and DECL is the default function,
32018 that is it is not tagged with target specific optimization. */
32019
32020 static bool
32021 is_function_default_version (const tree decl)
32022 {
32023 if (TREE_CODE (decl) != FUNCTION_DECL
32024 || !DECL_FUNCTION_VERSIONED (decl))
32025 return false;
32026 tree attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
32027 gcc_assert (attr);
32028 attr = TREE_VALUE (TREE_VALUE (attr));
32029 return (TREE_CODE (attr) == STRING_CST
32030 && strcmp (TREE_STRING_POINTER (attr), "default") == 0);
32031 }
32032
32033 /* Make a dispatcher declaration for the multi-versioned function DECL.
32034 Calls to DECL function will be replaced with calls to the dispatcher
32035 by the front-end. Returns the decl of the dispatcher function. */
32036
32037 static tree
32038 ix86_get_function_versions_dispatcher (void *decl)
32039 {
32040 tree fn = (tree) decl;
32041 struct cgraph_node *node = NULL;
32042 struct cgraph_node *default_node = NULL;
32043 struct cgraph_function_version_info *node_v = NULL;
32044 struct cgraph_function_version_info *first_v = NULL;
32045
32046 tree dispatch_decl = NULL;
32047
32048 struct cgraph_function_version_info *default_version_info = NULL;
32049
32050 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
32051
32052 node = cgraph_node::get (fn);
32053 gcc_assert (node != NULL);
32054
32055 node_v = node->function_version ();
32056 gcc_assert (node_v != NULL);
32057
32058 if (node_v->dispatcher_resolver != NULL)
32059 return node_v->dispatcher_resolver;
32060
32061 /* Find the default version and make it the first node. */
32062 first_v = node_v;
32063 /* Go to the beginning of the chain. */
32064 while (first_v->prev != NULL)
32065 first_v = first_v->prev;
32066 default_version_info = first_v;
32067 while (default_version_info != NULL)
32068 {
32069 if (is_function_default_version
32070 (default_version_info->this_node->decl))
32071 break;
32072 default_version_info = default_version_info->next;
32073 }
32074
32075 /* If there is no default node, just return NULL. */
32076 if (default_version_info == NULL)
32077 return NULL;
32078
32079 /* Make default info the first node. */
32080 if (first_v != default_version_info)
32081 {
32082 default_version_info->prev->next = default_version_info->next;
32083 if (default_version_info->next)
32084 default_version_info->next->prev = default_version_info->prev;
32085 first_v->prev = default_version_info;
32086 default_version_info->next = first_v;
32087 default_version_info->prev = NULL;
32088 }
32089
32090 default_node = default_version_info->this_node;
32091
32092 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
32093 if (targetm.has_ifunc_p ())
32094 {
32095 struct cgraph_function_version_info *it_v = NULL;
32096 struct cgraph_node *dispatcher_node = NULL;
32097 struct cgraph_function_version_info *dispatcher_version_info = NULL;
32098
32099 /* Right now, the dispatching is done via ifunc. */
32100 dispatch_decl = make_dispatcher_decl (default_node->decl);
32101
32102 dispatcher_node = cgraph_node::get_create (dispatch_decl);
32103 gcc_assert (dispatcher_node != NULL);
32104 dispatcher_node->dispatcher_function = 1;
32105 dispatcher_version_info
32106 = dispatcher_node->insert_new_function_version ();
32107 dispatcher_version_info->next = default_version_info;
32108 dispatcher_node->definition = 1;
32109
32110 /* Set the dispatcher for all the versions. */
32111 it_v = default_version_info;
32112 while (it_v != NULL)
32113 {
32114 it_v->dispatcher_resolver = dispatch_decl;
32115 it_v = it_v->next;
32116 }
32117 }
32118 else
32119 #endif
32120 {
32121 error_at (DECL_SOURCE_LOCATION (default_node->decl),
32122 "multiversioning needs ifunc which is not supported "
32123 "on this target");
32124 }
32125
32126 return dispatch_decl;
32127 }
32128
32129 /* Makes a function attribute of the form NAME(ARG_NAME) and chains
32130 it to CHAIN. */
32131
32132 static tree
32133 make_attribute (const char *name, const char *arg_name, tree chain)
32134 {
32135 tree attr_name;
32136 tree attr_arg_name;
32137 tree attr_args;
32138 tree attr;
32139
32140 attr_name = get_identifier (name);
32141 attr_arg_name = build_string (strlen (arg_name), arg_name);
32142 attr_args = tree_cons (NULL_TREE, attr_arg_name, NULL_TREE);
32143 attr = tree_cons (attr_name, attr_args, chain);
32144 return attr;
32145 }
32146
32147 /* Make the resolver function decl to dispatch the versions of
32148 a multi-versioned function, DEFAULT_DECL. Create an
32149 empty basic block in the resolver and store the pointer in
32150 EMPTY_BB. Return the decl of the resolver function. */
32151
32152 static tree
32153 make_resolver_func (const tree default_decl,
32154 const tree dispatch_decl,
32155 basic_block *empty_bb)
32156 {
32157 char *resolver_name;
32158 tree decl, type, decl_name, t;
32159 bool is_uniq = false;
32160
32161 /* IFUNC's have to be globally visible. So, if the default_decl is
32162 not, then the name of the IFUNC should be made unique. */
32163 if (TREE_PUBLIC (default_decl) == 0)
32164 is_uniq = true;
32165
32166 /* Append the filename to the resolver function if the versions are
32167 not externally visible. This is because the resolver function has
32168 to be externally visible for the loader to find it. So, appending
32169 the filename will prevent conflicts with a resolver function from
32170 another module which is based on the same version name. */
32171 resolver_name = make_name (default_decl, "resolver", is_uniq);
32172
32173 /* The resolver function should return a (void *). */
32174 type = build_function_type_list (ptr_type_node, NULL_TREE);
32175
32176 decl = build_fn_decl (resolver_name, type);
32177 decl_name = get_identifier (resolver_name);
32178 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
32179
32180 DECL_NAME (decl) = decl_name;
32181 TREE_USED (decl) = 1;
32182 DECL_ARTIFICIAL (decl) = 1;
32183 DECL_IGNORED_P (decl) = 0;
32184 /* IFUNC resolvers have to be externally visible. */
32185 TREE_PUBLIC (decl) = 1;
32186 DECL_UNINLINABLE (decl) = 1;
32187
32188 /* Resolver is not external, body is generated. */
32189 DECL_EXTERNAL (decl) = 0;
32190 DECL_EXTERNAL (dispatch_decl) = 0;
32191
32192 DECL_CONTEXT (decl) = NULL_TREE;
32193 DECL_INITIAL (decl) = make_node (BLOCK);
32194 DECL_STATIC_CONSTRUCTOR (decl) = 0;
32195
32196 if (DECL_COMDAT_GROUP (default_decl)
32197 || TREE_PUBLIC (default_decl))
32198 {
32199 /* In this case, each translation unit with a call to this
32200 versioned function will put out a resolver. Ensure it
32201 is comdat to keep just one copy. */
32202 DECL_COMDAT (decl) = 1;
32203 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
32204 }
32205 /* Build result decl and add to function_decl. */
32206 t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
32207 DECL_ARTIFICIAL (t) = 1;
32208 DECL_IGNORED_P (t) = 1;
32209 DECL_RESULT (decl) = t;
32210
32211 gimplify_function_tree (decl);
32212 push_cfun (DECL_STRUCT_FUNCTION (decl));
32213 *empty_bb = init_lowered_empty_function (decl, false);
32214
32215 cgraph_node::add_new_function (decl, true);
32216 cgraph_node::get_create (decl)->call_function_insertion_hooks ();
32217
32218 pop_cfun ();
32219
32220 gcc_assert (dispatch_decl != NULL);
32221 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
32222 DECL_ATTRIBUTES (dispatch_decl)
32223 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
32224
32225 /* Create the alias for dispatch to resolver here. */
32226 /*cgraph_create_function_alias (dispatch_decl, decl);*/
32227 cgraph_node::create_same_body_alias (dispatch_decl, decl);
32228 XDELETEVEC (resolver_name);
32229 return decl;
32230 }
32231
32232 /* Generate the dispatching code body to dispatch multi-versioned function
32233 DECL. The target hook is called to process the "target" attributes and
32234 provide the code to dispatch the right function at run-time. NODE points
32235 to the dispatcher decl whose body will be created. */
32236
32237 static tree
32238 ix86_generate_version_dispatcher_body (void *node_p)
32239 {
32240 tree resolver_decl;
32241 basic_block empty_bb;
32242 tree default_ver_decl;
32243 struct cgraph_node *versn;
32244 struct cgraph_node *node;
32245
32246 struct cgraph_function_version_info *node_version_info = NULL;
32247 struct cgraph_function_version_info *versn_info = NULL;
32248
32249 node = (cgraph_node *)node_p;
32250
32251 node_version_info = node->function_version ();
32252 gcc_assert (node->dispatcher_function
32253 && node_version_info != NULL);
32254
32255 if (node_version_info->dispatcher_resolver)
32256 return node_version_info->dispatcher_resolver;
32257
32258 /* The first version in the chain corresponds to the default version. */
32259 default_ver_decl = node_version_info->next->this_node->decl;
32260
32261 /* node is going to be an alias, so remove the finalized bit. */
32262 node->definition = false;
32263
32264 resolver_decl = make_resolver_func (default_ver_decl,
32265 node->decl, &empty_bb);
32266
32267 node_version_info->dispatcher_resolver = resolver_decl;
32268
32269 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
32270
32271 auto_vec<tree, 2> fn_ver_vec;
32272
32273 for (versn_info = node_version_info->next; versn_info;
32274 versn_info = versn_info->next)
32275 {
32276 versn = versn_info->this_node;
32277 /* Check for virtual functions here again, as by this time it should
32278 have been determined if this function needs a vtable index or
32279 not. This happens for methods in derived classes that override
32280 virtual methods in base classes but are not explicitly marked as
32281 virtual. */
32282 if (DECL_VINDEX (versn->decl))
32283 sorry ("Virtual function multiversioning not supported");
32284
32285 fn_ver_vec.safe_push (versn->decl);
32286 }
32287
32288 dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
32289 rebuild_cgraph_edges ();
32290 pop_cfun ();
32291 return resolver_decl;
32292 }
32293 /* This builds the processor_model struct type defined in
32294 libgcc/config/i386/cpuinfo.c */
32295
32296 static tree
32297 build_processor_model_struct (void)
32298 {
32299 const char *field_name[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
32300 "__cpu_features"};
32301 tree field = NULL_TREE, field_chain = NULL_TREE;
32302 int i;
32303 tree type = make_node (RECORD_TYPE);
32304
32305 /* The first 3 fields are unsigned int. */
32306 for (i = 0; i < 3; ++i)
32307 {
32308 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
32309 get_identifier (field_name[i]), unsigned_type_node);
32310 if (field_chain != NULL_TREE)
32311 DECL_CHAIN (field) = field_chain;
32312 field_chain = field;
32313 }
32314
32315 /* The last field is an array of unsigned integers of size one. */
32316 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
32317 get_identifier (field_name[3]),
32318 build_array_type (unsigned_type_node,
32319 build_index_type (size_one_node)));
32320 if (field_chain != NULL_TREE)
32321 DECL_CHAIN (field) = field_chain;
32322 field_chain = field;
32323
32324 finish_builtin_struct (type, "__processor_model", field_chain, NULL_TREE);
32325 return type;
32326 }
32327
32328 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
32329
32330 static tree
32331 make_var_decl (tree type, const char *name)
32332 {
32333 tree new_decl;
32334
32335 new_decl = build_decl (UNKNOWN_LOCATION,
32336 VAR_DECL,
32337 get_identifier(name),
32338 type);
32339
32340 DECL_EXTERNAL (new_decl) = 1;
32341 TREE_STATIC (new_decl) = 1;
32342 TREE_PUBLIC (new_decl) = 1;
32343 DECL_INITIAL (new_decl) = 0;
32344 DECL_ARTIFICIAL (new_decl) = 0;
32345 DECL_PRESERVE_P (new_decl) = 1;
32346
32347 make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl));
32348 assemble_variable (new_decl, 0, 0, 0);
32349
32350 return new_decl;
32351 }
32352
32353 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
32354 into an integer defined in libgcc/config/i386/cpuinfo.c */
32355
32356 static tree
32357 fold_builtin_cpu (tree fndecl, tree *args)
32358 {
32359 unsigned int i;
32360 enum ix86_builtins fn_code = (enum ix86_builtins)
32361 DECL_FUNCTION_CODE (fndecl);
32362 tree param_string_cst = NULL;
32363
32364 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
32365 enum processor_features
32366 {
32367 F_CMOV = 0,
32368 F_MMX,
32369 F_POPCNT,
32370 F_SSE,
32371 F_SSE2,
32372 F_SSE3,
32373 F_SSSE3,
32374 F_SSE4_1,
32375 F_SSE4_2,
32376 F_AVX,
32377 F_AVX2,
32378 F_SSE4_A,
32379 F_FMA4,
32380 F_XOP,
32381 F_FMA,
32382 F_MAX
32383 };
32384
32385 /* These are the values for vendor types and cpu types and subtypes
32386 in cpuinfo.c. Cpu types and subtypes should be subtracted by
32387 the corresponding start value. */
32388 enum processor_model
32389 {
32390 M_INTEL = 1,
32391 M_AMD,
32392 M_CPU_TYPE_START,
32393 M_INTEL_BONNELL,
32394 M_INTEL_CORE2,
32395 M_INTEL_COREI7,
32396 M_AMDFAM10H,
32397 M_AMDFAM15H,
32398 M_INTEL_SILVERMONT,
32399 M_AMD_BTVER1,
32400 M_AMD_BTVER2,
32401 M_CPU_SUBTYPE_START,
32402 M_INTEL_COREI7_NEHALEM,
32403 M_INTEL_COREI7_WESTMERE,
32404 M_INTEL_COREI7_SANDYBRIDGE,
32405 M_AMDFAM10H_BARCELONA,
32406 M_AMDFAM10H_SHANGHAI,
32407 M_AMDFAM10H_ISTANBUL,
32408 M_AMDFAM15H_BDVER1,
32409 M_AMDFAM15H_BDVER2,
32410 M_AMDFAM15H_BDVER3,
32411 M_AMDFAM15H_BDVER4,
32412 M_INTEL_COREI7_IVYBRIDGE,
32413 M_INTEL_COREI7_HASWELL
32414 };
32415
32416 static struct _arch_names_table
32417 {
32418 const char *const name;
32419 const enum processor_model model;
32420 }
32421 const arch_names_table[] =
32422 {
32423 {"amd", M_AMD},
32424 {"intel", M_INTEL},
32425 {"atom", M_INTEL_BONNELL},
32426 {"slm", M_INTEL_SILVERMONT},
32427 {"core2", M_INTEL_CORE2},
32428 {"corei7", M_INTEL_COREI7},
32429 {"nehalem", M_INTEL_COREI7_NEHALEM},
32430 {"westmere", M_INTEL_COREI7_WESTMERE},
32431 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE},
32432 {"ivybridge", M_INTEL_COREI7_IVYBRIDGE},
32433 {"haswell", M_INTEL_COREI7_HASWELL},
32434 {"bonnell", M_INTEL_BONNELL},
32435 {"silvermont", M_INTEL_SILVERMONT},
32436 {"amdfam10h", M_AMDFAM10H},
32437 {"barcelona", M_AMDFAM10H_BARCELONA},
32438 {"shanghai", M_AMDFAM10H_SHANGHAI},
32439 {"istanbul", M_AMDFAM10H_ISTANBUL},
32440 {"btver1", M_AMD_BTVER1},
32441 {"amdfam15h", M_AMDFAM15H},
32442 {"bdver1", M_AMDFAM15H_BDVER1},
32443 {"bdver2", M_AMDFAM15H_BDVER2},
32444 {"bdver3", M_AMDFAM15H_BDVER3},
32445 {"bdver4", M_AMDFAM15H_BDVER4},
32446 {"btver2", M_AMD_BTVER2},
32447 };
32448
32449 static struct _isa_names_table
32450 {
32451 const char *const name;
32452 const enum processor_features feature;
32453 }
32454 const isa_names_table[] =
32455 {
32456 {"cmov", F_CMOV},
32457 {"mmx", F_MMX},
32458 {"popcnt", F_POPCNT},
32459 {"sse", F_SSE},
32460 {"sse2", F_SSE2},
32461 {"sse3", F_SSE3},
32462 {"ssse3", F_SSSE3},
32463 {"sse4a", F_SSE4_A},
32464 {"sse4.1", F_SSE4_1},
32465 {"sse4.2", F_SSE4_2},
32466 {"avx", F_AVX},
32467 {"fma4", F_FMA4},
32468 {"xop", F_XOP},
32469 {"fma", F_FMA},
32470 {"avx2", F_AVX2}
32471 };
32472
32473 tree __processor_model_type = build_processor_model_struct ();
32474 tree __cpu_model_var = make_var_decl (__processor_model_type,
32475 "__cpu_model");
32476
32477
32478 varpool_add_new_variable (__cpu_model_var);
32479
32480 gcc_assert ((args != NULL) && (*args != NULL));
32481
32482 param_string_cst = *args;
32483 while (param_string_cst
32484 && TREE_CODE (param_string_cst) != STRING_CST)
32485 {
32486 /* *args must be a expr that can contain other EXPRS leading to a
32487 STRING_CST. */
32488 if (!EXPR_P (param_string_cst))
32489 {
32490 error ("Parameter to builtin must be a string constant or literal");
32491 return integer_zero_node;
32492 }
32493 param_string_cst = TREE_OPERAND (EXPR_CHECK (param_string_cst), 0);
32494 }
32495
32496 gcc_assert (param_string_cst);
32497
32498 if (fn_code == IX86_BUILTIN_CPU_IS)
32499 {
32500 tree ref;
32501 tree field;
32502 tree final;
32503
32504 unsigned int field_val = 0;
32505 unsigned int NUM_ARCH_NAMES
32506 = sizeof (arch_names_table) / sizeof (struct _arch_names_table);
32507
32508 for (i = 0; i < NUM_ARCH_NAMES; i++)
32509 if (strcmp (arch_names_table[i].name,
32510 TREE_STRING_POINTER (param_string_cst)) == 0)
32511 break;
32512
32513 if (i == NUM_ARCH_NAMES)
32514 {
32515 error ("Parameter to builtin not valid: %s",
32516 TREE_STRING_POINTER (param_string_cst));
32517 return integer_zero_node;
32518 }
32519
32520 field = TYPE_FIELDS (__processor_model_type);
32521 field_val = arch_names_table[i].model;
32522
32523 /* CPU types are stored in the next field. */
32524 if (field_val > M_CPU_TYPE_START
32525 && field_val < M_CPU_SUBTYPE_START)
32526 {
32527 field = DECL_CHAIN (field);
32528 field_val -= M_CPU_TYPE_START;
32529 }
32530
32531 /* CPU subtypes are stored in the next field. */
32532 if (field_val > M_CPU_SUBTYPE_START)
32533 {
32534 field = DECL_CHAIN ( DECL_CHAIN (field));
32535 field_val -= M_CPU_SUBTYPE_START;
32536 }
32537
32538 /* Get the appropriate field in __cpu_model. */
32539 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
32540 field, NULL_TREE);
32541
32542 /* Check the value. */
32543 final = build2 (EQ_EXPR, unsigned_type_node, ref,
32544 build_int_cstu (unsigned_type_node, field_val));
32545 return build1 (CONVERT_EXPR, integer_type_node, final);
32546 }
32547 else if (fn_code == IX86_BUILTIN_CPU_SUPPORTS)
32548 {
32549 tree ref;
32550 tree array_elt;
32551 tree field;
32552 tree final;
32553
32554 unsigned int field_val = 0;
32555 unsigned int NUM_ISA_NAMES
32556 = sizeof (isa_names_table) / sizeof (struct _isa_names_table);
32557
32558 for (i = 0; i < NUM_ISA_NAMES; i++)
32559 if (strcmp (isa_names_table[i].name,
32560 TREE_STRING_POINTER (param_string_cst)) == 0)
32561 break;
32562
32563 if (i == NUM_ISA_NAMES)
32564 {
32565 error ("Parameter to builtin not valid: %s",
32566 TREE_STRING_POINTER (param_string_cst));
32567 return integer_zero_node;
32568 }
32569
32570 field = TYPE_FIELDS (__processor_model_type);
32571 /* Get the last field, which is __cpu_features. */
32572 while (DECL_CHAIN (field))
32573 field = DECL_CHAIN (field);
32574
32575 /* Get the appropriate field: __cpu_model.__cpu_features */
32576 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
32577 field, NULL_TREE);
32578
32579 /* Access the 0th element of __cpu_features array. */
32580 array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
32581 integer_zero_node, NULL_TREE, NULL_TREE);
32582
32583 field_val = (1 << isa_names_table[i].feature);
32584 /* Return __cpu_model.__cpu_features[0] & field_val */
32585 final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
32586 build_int_cstu (unsigned_type_node, field_val));
32587 return build1 (CONVERT_EXPR, integer_type_node, final);
32588 }
32589 gcc_unreachable ();
32590 }
32591
32592 static tree
32593 ix86_fold_builtin (tree fndecl, int n_args,
32594 tree *args, bool ignore ATTRIBUTE_UNUSED)
32595 {
32596 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
32597 {
32598 enum ix86_builtins fn_code = (enum ix86_builtins)
32599 DECL_FUNCTION_CODE (fndecl);
32600 if (fn_code == IX86_BUILTIN_CPU_IS
32601 || fn_code == IX86_BUILTIN_CPU_SUPPORTS)
32602 {
32603 gcc_assert (n_args == 1);
32604 return fold_builtin_cpu (fndecl, args);
32605 }
32606 }
32607
32608 #ifdef SUBTARGET_FOLD_BUILTIN
32609 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
32610 #endif
32611
32612 return NULL_TREE;
32613 }
32614
32615 /* Make builtins to detect cpu type and features supported. NAME is
32616 the builtin name, CODE is the builtin code, and FTYPE is the function
32617 type of the builtin. */
32618
32619 static void
32620 make_cpu_type_builtin (const char* name, int code,
32621 enum ix86_builtin_func_type ftype, bool is_const)
32622 {
32623 tree decl;
32624 tree type;
32625
32626 type = ix86_get_builtin_func_type (ftype);
32627 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
32628 NULL, NULL_TREE);
32629 gcc_assert (decl != NULL_TREE);
32630 ix86_builtins[(int) code] = decl;
32631 TREE_READONLY (decl) = is_const;
32632 }
32633
32634 /* Make builtins to get CPU type and features supported. The created
32635 builtins are :
32636
32637 __builtin_cpu_init (), to detect cpu type and features,
32638 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
32639 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
32640 */
32641
32642 static void
32643 ix86_init_platform_type_builtins (void)
32644 {
32645 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT,
32646 INT_FTYPE_VOID, false);
32647 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS,
32648 INT_FTYPE_PCCHAR, true);
32649 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS,
32650 INT_FTYPE_PCCHAR, true);
32651 }
32652
32653 /* Internal method for ix86_init_builtins. */
32654
32655 static void
32656 ix86_init_builtins_va_builtins_abi (void)
32657 {
32658 tree ms_va_ref, sysv_va_ref;
32659 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
32660 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
32661 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
32662 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
32663
32664 if (!TARGET_64BIT)
32665 return;
32666 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
32667 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
32668 ms_va_ref = build_reference_type (ms_va_list_type_node);
32669 sysv_va_ref =
32670 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
32671
32672 fnvoid_va_end_ms =
32673 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
32674 fnvoid_va_start_ms =
32675 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
32676 fnvoid_va_end_sysv =
32677 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
32678 fnvoid_va_start_sysv =
32679 build_varargs_function_type_list (void_type_node, sysv_va_ref,
32680 NULL_TREE);
32681 fnvoid_va_copy_ms =
32682 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
32683 NULL_TREE);
32684 fnvoid_va_copy_sysv =
32685 build_function_type_list (void_type_node, sysv_va_ref,
32686 sysv_va_ref, NULL_TREE);
32687
32688 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
32689 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
32690 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
32691 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
32692 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
32693 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
32694 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
32695 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32696 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
32697 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32698 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
32699 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32700 }
32701
32702 static void
32703 ix86_init_builtin_types (void)
32704 {
32705 tree float128_type_node, float80_type_node;
32706
32707 /* The __float80 type. */
32708 float80_type_node = long_double_type_node;
32709 if (TYPE_MODE (float80_type_node) != XFmode)
32710 {
32711 /* The __float80 type. */
32712 float80_type_node = make_node (REAL_TYPE);
32713
32714 TYPE_PRECISION (float80_type_node) = 80;
32715 layout_type (float80_type_node);
32716 }
32717 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
32718
32719 /* The __float128 type. */
32720 float128_type_node = make_node (REAL_TYPE);
32721 TYPE_PRECISION (float128_type_node) = 128;
32722 layout_type (float128_type_node);
32723 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
32724
32725 /* This macro is built by i386-builtin-types.awk. */
32726 DEFINE_BUILTIN_PRIMITIVE_TYPES;
32727 }
32728
32729 static void
32730 ix86_init_builtins (void)
32731 {
32732 tree t;
32733
32734 ix86_init_builtin_types ();
32735
32736 /* Builtins to get CPU type and features. */
32737 ix86_init_platform_type_builtins ();
32738
32739 /* TFmode support builtins. */
32740 def_builtin_const (0, "__builtin_infq",
32741 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
32742 def_builtin_const (0, "__builtin_huge_valq",
32743 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
32744
32745 /* We will expand them to normal call if SSE isn't available since
32746 they are used by libgcc. */
32747 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
32748 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
32749 BUILT_IN_MD, "__fabstf2", NULL_TREE);
32750 TREE_READONLY (t) = 1;
32751 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
32752
32753 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
32754 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
32755 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
32756 TREE_READONLY (t) = 1;
32757 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
32758
32759 ix86_init_tm_builtins ();
32760 ix86_init_mmx_sse_builtins ();
32761
32762 if (TARGET_LP64)
32763 ix86_init_builtins_va_builtins_abi ();
32764
32765 #ifdef SUBTARGET_INIT_BUILTINS
32766 SUBTARGET_INIT_BUILTINS;
32767 #endif
32768 }
32769
32770 /* Return the ix86 builtin for CODE. */
32771
32772 static tree
32773 ix86_builtin_decl (unsigned code, bool)
32774 {
32775 if (code >= IX86_BUILTIN_MAX)
32776 return error_mark_node;
32777
32778 return ix86_builtins[code];
32779 }
32780
32781 /* Errors in the source file can cause expand_expr to return const0_rtx
32782 where we expect a vector. To avoid crashing, use one of the vector
32783 clear instructions. */
32784 static rtx
32785 safe_vector_operand (rtx x, enum machine_mode mode)
32786 {
32787 if (x == const0_rtx)
32788 x = CONST0_RTX (mode);
32789 return x;
32790 }
32791
32792 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
32793
32794 static rtx
32795 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
32796 {
32797 rtx pat;
32798 tree arg0 = CALL_EXPR_ARG (exp, 0);
32799 tree arg1 = CALL_EXPR_ARG (exp, 1);
32800 rtx op0 = expand_normal (arg0);
32801 rtx op1 = expand_normal (arg1);
32802 enum machine_mode tmode = insn_data[icode].operand[0].mode;
32803 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
32804 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
32805
32806 if (VECTOR_MODE_P (mode0))
32807 op0 = safe_vector_operand (op0, mode0);
32808 if (VECTOR_MODE_P (mode1))
32809 op1 = safe_vector_operand (op1, mode1);
32810
32811 if (optimize || !target
32812 || GET_MODE (target) != tmode
32813 || !insn_data[icode].operand[0].predicate (target, tmode))
32814 target = gen_reg_rtx (tmode);
32815
32816 if (GET_MODE (op1) == SImode && mode1 == TImode)
32817 {
32818 rtx x = gen_reg_rtx (V4SImode);
32819 emit_insn (gen_sse2_loadd (x, op1));
32820 op1 = gen_lowpart (TImode, x);
32821 }
32822
32823 if (!insn_data[icode].operand[1].predicate (op0, mode0))
32824 op0 = copy_to_mode_reg (mode0, op0);
32825 if (!insn_data[icode].operand[2].predicate (op1, mode1))
32826 op1 = copy_to_mode_reg (mode1, op1);
32827
32828 pat = GEN_FCN (icode) (target, op0, op1);
32829 if (! pat)
32830 return 0;
32831
32832 emit_insn (pat);
32833
32834 return target;
32835 }
32836
32837 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
32838
32839 static rtx
32840 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
32841 enum ix86_builtin_func_type m_type,
32842 enum rtx_code sub_code)
32843 {
32844 rtx pat;
32845 int i;
32846 int nargs;
32847 bool comparison_p = false;
32848 bool tf_p = false;
32849 bool last_arg_constant = false;
32850 int num_memory = 0;
32851 struct {
32852 rtx op;
32853 enum machine_mode mode;
32854 } args[4];
32855
32856 enum machine_mode tmode = insn_data[icode].operand[0].mode;
32857
32858 switch (m_type)
32859 {
32860 case MULTI_ARG_4_DF2_DI_I:
32861 case MULTI_ARG_4_DF2_DI_I1:
32862 case MULTI_ARG_4_SF2_SI_I:
32863 case MULTI_ARG_4_SF2_SI_I1:
32864 nargs = 4;
32865 last_arg_constant = true;
32866 break;
32867
32868 case MULTI_ARG_3_SF:
32869 case MULTI_ARG_3_DF:
32870 case MULTI_ARG_3_SF2:
32871 case MULTI_ARG_3_DF2:
32872 case MULTI_ARG_3_DI:
32873 case MULTI_ARG_3_SI:
32874 case MULTI_ARG_3_SI_DI:
32875 case MULTI_ARG_3_HI:
32876 case MULTI_ARG_3_HI_SI:
32877 case MULTI_ARG_3_QI:
32878 case MULTI_ARG_3_DI2:
32879 case MULTI_ARG_3_SI2:
32880 case MULTI_ARG_3_HI2:
32881 case MULTI_ARG_3_QI2:
32882 nargs = 3;
32883 break;
32884
32885 case MULTI_ARG_2_SF:
32886 case MULTI_ARG_2_DF:
32887 case MULTI_ARG_2_DI:
32888 case MULTI_ARG_2_SI:
32889 case MULTI_ARG_2_HI:
32890 case MULTI_ARG_2_QI:
32891 nargs = 2;
32892 break;
32893
32894 case MULTI_ARG_2_DI_IMM:
32895 case MULTI_ARG_2_SI_IMM:
32896 case MULTI_ARG_2_HI_IMM:
32897 case MULTI_ARG_2_QI_IMM:
32898 nargs = 2;
32899 last_arg_constant = true;
32900 break;
32901
32902 case MULTI_ARG_1_SF:
32903 case MULTI_ARG_1_DF:
32904 case MULTI_ARG_1_SF2:
32905 case MULTI_ARG_1_DF2:
32906 case MULTI_ARG_1_DI:
32907 case MULTI_ARG_1_SI:
32908 case MULTI_ARG_1_HI:
32909 case MULTI_ARG_1_QI:
32910 case MULTI_ARG_1_SI_DI:
32911 case MULTI_ARG_1_HI_DI:
32912 case MULTI_ARG_1_HI_SI:
32913 case MULTI_ARG_1_QI_DI:
32914 case MULTI_ARG_1_QI_SI:
32915 case MULTI_ARG_1_QI_HI:
32916 nargs = 1;
32917 break;
32918
32919 case MULTI_ARG_2_DI_CMP:
32920 case MULTI_ARG_2_SI_CMP:
32921 case MULTI_ARG_2_HI_CMP:
32922 case MULTI_ARG_2_QI_CMP:
32923 nargs = 2;
32924 comparison_p = true;
32925 break;
32926
32927 case MULTI_ARG_2_SF_TF:
32928 case MULTI_ARG_2_DF_TF:
32929 case MULTI_ARG_2_DI_TF:
32930 case MULTI_ARG_2_SI_TF:
32931 case MULTI_ARG_2_HI_TF:
32932 case MULTI_ARG_2_QI_TF:
32933 nargs = 2;
32934 tf_p = true;
32935 break;
32936
32937 default:
32938 gcc_unreachable ();
32939 }
32940
32941 if (optimize || !target
32942 || GET_MODE (target) != tmode
32943 || !insn_data[icode].operand[0].predicate (target, tmode))
32944 target = gen_reg_rtx (tmode);
32945
32946 gcc_assert (nargs <= 4);
32947
32948 for (i = 0; i < nargs; i++)
32949 {
32950 tree arg = CALL_EXPR_ARG (exp, i);
32951 rtx op = expand_normal (arg);
32952 int adjust = (comparison_p) ? 1 : 0;
32953 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
32954
32955 if (last_arg_constant && i == nargs - 1)
32956 {
32957 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
32958 {
32959 enum insn_code new_icode = icode;
32960 switch (icode)
32961 {
32962 case CODE_FOR_xop_vpermil2v2df3:
32963 case CODE_FOR_xop_vpermil2v4sf3:
32964 case CODE_FOR_xop_vpermil2v4df3:
32965 case CODE_FOR_xop_vpermil2v8sf3:
32966 error ("the last argument must be a 2-bit immediate");
32967 return gen_reg_rtx (tmode);
32968 case CODE_FOR_xop_rotlv2di3:
32969 new_icode = CODE_FOR_rotlv2di3;
32970 goto xop_rotl;
32971 case CODE_FOR_xop_rotlv4si3:
32972 new_icode = CODE_FOR_rotlv4si3;
32973 goto xop_rotl;
32974 case CODE_FOR_xop_rotlv8hi3:
32975 new_icode = CODE_FOR_rotlv8hi3;
32976 goto xop_rotl;
32977 case CODE_FOR_xop_rotlv16qi3:
32978 new_icode = CODE_FOR_rotlv16qi3;
32979 xop_rotl:
32980 if (CONST_INT_P (op))
32981 {
32982 int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
32983 op = GEN_INT (INTVAL (op) & mask);
32984 gcc_checking_assert
32985 (insn_data[icode].operand[i + 1].predicate (op, mode));
32986 }
32987 else
32988 {
32989 gcc_checking_assert
32990 (nargs == 2
32991 && insn_data[new_icode].operand[0].mode == tmode
32992 && insn_data[new_icode].operand[1].mode == tmode
32993 && insn_data[new_icode].operand[2].mode == mode
32994 && insn_data[new_icode].operand[0].predicate
32995 == insn_data[icode].operand[0].predicate
32996 && insn_data[new_icode].operand[1].predicate
32997 == insn_data[icode].operand[1].predicate);
32998 icode = new_icode;
32999 goto non_constant;
33000 }
33001 break;
33002 default:
33003 gcc_unreachable ();
33004 }
33005 }
33006 }
33007 else
33008 {
33009 non_constant:
33010 if (VECTOR_MODE_P (mode))
33011 op = safe_vector_operand (op, mode);
33012
33013 /* If we aren't optimizing, only allow one memory operand to be
33014 generated. */
33015 if (memory_operand (op, mode))
33016 num_memory++;
33017
33018 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
33019
33020 if (optimize
33021 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
33022 || num_memory > 1)
33023 op = force_reg (mode, op);
33024 }
33025
33026 args[i].op = op;
33027 args[i].mode = mode;
33028 }
33029
33030 switch (nargs)
33031 {
33032 case 1:
33033 pat = GEN_FCN (icode) (target, args[0].op);
33034 break;
33035
33036 case 2:
33037 if (tf_p)
33038 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
33039 GEN_INT ((int)sub_code));
33040 else if (! comparison_p)
33041 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
33042 else
33043 {
33044 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
33045 args[0].op,
33046 args[1].op);
33047
33048 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
33049 }
33050 break;
33051
33052 case 3:
33053 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
33054 break;
33055
33056 case 4:
33057 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
33058 break;
33059
33060 default:
33061 gcc_unreachable ();
33062 }
33063
33064 if (! pat)
33065 return 0;
33066
33067 emit_insn (pat);
33068 return target;
33069 }
33070
33071 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
33072 insns with vec_merge. */
33073
33074 static rtx
33075 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
33076 rtx target)
33077 {
33078 rtx pat;
33079 tree arg0 = CALL_EXPR_ARG (exp, 0);
33080 rtx op1, op0 = expand_normal (arg0);
33081 enum machine_mode tmode = insn_data[icode].operand[0].mode;
33082 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
33083
33084 if (optimize || !target
33085 || GET_MODE (target) != tmode
33086 || !insn_data[icode].operand[0].predicate (target, tmode))
33087 target = gen_reg_rtx (tmode);
33088
33089 if (VECTOR_MODE_P (mode0))
33090 op0 = safe_vector_operand (op0, mode0);
33091
33092 if ((optimize && !register_operand (op0, mode0))
33093 || !insn_data[icode].operand[1].predicate (op0, mode0))
33094 op0 = copy_to_mode_reg (mode0, op0);
33095
33096 op1 = op0;
33097 if (!insn_data[icode].operand[2].predicate (op1, mode0))
33098 op1 = copy_to_mode_reg (mode0, op1);
33099
33100 pat = GEN_FCN (icode) (target, op0, op1);
33101 if (! pat)
33102 return 0;
33103 emit_insn (pat);
33104 return target;
33105 }
33106
33107 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
33108
33109 static rtx
33110 ix86_expand_sse_compare (const struct builtin_description *d,
33111 tree exp, rtx target, bool swap)
33112 {
33113 rtx pat;
33114 tree arg0 = CALL_EXPR_ARG (exp, 0);
33115 tree arg1 = CALL_EXPR_ARG (exp, 1);
33116 rtx op0 = expand_normal (arg0);
33117 rtx op1 = expand_normal (arg1);
33118 rtx op2;
33119 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
33120 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
33121 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
33122 enum rtx_code comparison = d->comparison;
33123
33124 if (VECTOR_MODE_P (mode0))
33125 op0 = safe_vector_operand (op0, mode0);
33126 if (VECTOR_MODE_P (mode1))
33127 op1 = safe_vector_operand (op1, mode1);
33128
33129 /* Swap operands if we have a comparison that isn't available in
33130 hardware. */
33131 if (swap)
33132 {
33133 rtx tmp = gen_reg_rtx (mode1);
33134 emit_move_insn (tmp, op1);
33135 op1 = op0;
33136 op0 = tmp;
33137 }
33138
33139 if (optimize || !target
33140 || GET_MODE (target) != tmode
33141 || !insn_data[d->icode].operand[0].predicate (target, tmode))
33142 target = gen_reg_rtx (tmode);
33143
33144 if ((optimize && !register_operand (op0, mode0))
33145 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
33146 op0 = copy_to_mode_reg (mode0, op0);
33147 if ((optimize && !register_operand (op1, mode1))
33148 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
33149 op1 = copy_to_mode_reg (mode1, op1);
33150
33151 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
33152 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
33153 if (! pat)
33154 return 0;
33155 emit_insn (pat);
33156 return target;
33157 }
33158
33159 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
33160
33161 static rtx
33162 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
33163 rtx target)
33164 {
33165 rtx pat;
33166 tree arg0 = CALL_EXPR_ARG (exp, 0);
33167 tree arg1 = CALL_EXPR_ARG (exp, 1);
33168 rtx op0 = expand_normal (arg0);
33169 rtx op1 = expand_normal (arg1);
33170 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
33171 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
33172 enum rtx_code comparison = d->comparison;
33173
33174 if (VECTOR_MODE_P (mode0))
33175 op0 = safe_vector_operand (op0, mode0);
33176 if (VECTOR_MODE_P (mode1))
33177 op1 = safe_vector_operand (op1, mode1);
33178
33179 /* Swap operands if we have a comparison that isn't available in
33180 hardware. */
33181 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
33182 {
33183 rtx tmp = op1;
33184 op1 = op0;
33185 op0 = tmp;
33186 }
33187
33188 target = gen_reg_rtx (SImode);
33189 emit_move_insn (target, const0_rtx);
33190 target = gen_rtx_SUBREG (QImode, target, 0);
33191
33192 if ((optimize && !register_operand (op0, mode0))
33193 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33194 op0 = copy_to_mode_reg (mode0, op0);
33195 if ((optimize && !register_operand (op1, mode1))
33196 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
33197 op1 = copy_to_mode_reg (mode1, op1);
33198
33199 pat = GEN_FCN (d->icode) (op0, op1);
33200 if (! pat)
33201 return 0;
33202 emit_insn (pat);
33203 emit_insn (gen_rtx_SET (VOIDmode,
33204 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33205 gen_rtx_fmt_ee (comparison, QImode,
33206 SET_DEST (pat),
33207 const0_rtx)));
33208
33209 return SUBREG_REG (target);
33210 }
33211
33212 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
33213
33214 static rtx
33215 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
33216 rtx target)
33217 {
33218 rtx pat;
33219 tree arg0 = CALL_EXPR_ARG (exp, 0);
33220 rtx op1, op0 = expand_normal (arg0);
33221 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
33222 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
33223
33224 if (optimize || target == 0
33225 || GET_MODE (target) != tmode
33226 || !insn_data[d->icode].operand[0].predicate (target, tmode))
33227 target = gen_reg_rtx (tmode);
33228
33229 if (VECTOR_MODE_P (mode0))
33230 op0 = safe_vector_operand (op0, mode0);
33231
33232 if ((optimize && !register_operand (op0, mode0))
33233 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33234 op0 = copy_to_mode_reg (mode0, op0);
33235
33236 op1 = GEN_INT (d->comparison);
33237
33238 pat = GEN_FCN (d->icode) (target, op0, op1);
33239 if (! pat)
33240 return 0;
33241 emit_insn (pat);
33242 return target;
33243 }
33244
33245 static rtx
33246 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d,
33247 tree exp, rtx target)
33248 {
33249 rtx pat;
33250 tree arg0 = CALL_EXPR_ARG (exp, 0);
33251 tree arg1 = CALL_EXPR_ARG (exp, 1);
33252 rtx op0 = expand_normal (arg0);
33253 rtx op1 = expand_normal (arg1);
33254 rtx op2;
33255 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
33256 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
33257 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
33258
33259 if (optimize || target == 0
33260 || GET_MODE (target) != tmode
33261 || !insn_data[d->icode].operand[0].predicate (target, tmode))
33262 target = gen_reg_rtx (tmode);
33263
33264 op0 = safe_vector_operand (op0, mode0);
33265 op1 = safe_vector_operand (op1, mode1);
33266
33267 if ((optimize && !register_operand (op0, mode0))
33268 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33269 op0 = copy_to_mode_reg (mode0, op0);
33270 if ((optimize && !register_operand (op1, mode1))
33271 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
33272 op1 = copy_to_mode_reg (mode1, op1);
33273
33274 op2 = GEN_INT (d->comparison);
33275
33276 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
33277 if (! pat)
33278 return 0;
33279 emit_insn (pat);
33280 return target;
33281 }
33282
33283 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
33284
33285 static rtx
33286 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
33287 rtx target)
33288 {
33289 rtx pat;
33290 tree arg0 = CALL_EXPR_ARG (exp, 0);
33291 tree arg1 = CALL_EXPR_ARG (exp, 1);
33292 rtx op0 = expand_normal (arg0);
33293 rtx op1 = expand_normal (arg1);
33294 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
33295 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
33296 enum rtx_code comparison = d->comparison;
33297
33298 if (VECTOR_MODE_P (mode0))
33299 op0 = safe_vector_operand (op0, mode0);
33300 if (VECTOR_MODE_P (mode1))
33301 op1 = safe_vector_operand (op1, mode1);
33302
33303 target = gen_reg_rtx (SImode);
33304 emit_move_insn (target, const0_rtx);
33305 target = gen_rtx_SUBREG (QImode, target, 0);
33306
33307 if ((optimize && !register_operand (op0, mode0))
33308 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33309 op0 = copy_to_mode_reg (mode0, op0);
33310 if ((optimize && !register_operand (op1, mode1))
33311 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
33312 op1 = copy_to_mode_reg (mode1, op1);
33313
33314 pat = GEN_FCN (d->icode) (op0, op1);
33315 if (! pat)
33316 return 0;
33317 emit_insn (pat);
33318 emit_insn (gen_rtx_SET (VOIDmode,
33319 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33320 gen_rtx_fmt_ee (comparison, QImode,
33321 SET_DEST (pat),
33322 const0_rtx)));
33323
33324 return SUBREG_REG (target);
33325 }
33326
33327 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
33328
33329 static rtx
33330 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
33331 tree exp, rtx target)
33332 {
33333 rtx pat;
33334 tree arg0 = CALL_EXPR_ARG (exp, 0);
33335 tree arg1 = CALL_EXPR_ARG (exp, 1);
33336 tree arg2 = CALL_EXPR_ARG (exp, 2);
33337 tree arg3 = CALL_EXPR_ARG (exp, 3);
33338 tree arg4 = CALL_EXPR_ARG (exp, 4);
33339 rtx scratch0, scratch1;
33340 rtx op0 = expand_normal (arg0);
33341 rtx op1 = expand_normal (arg1);
33342 rtx op2 = expand_normal (arg2);
33343 rtx op3 = expand_normal (arg3);
33344 rtx op4 = expand_normal (arg4);
33345 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
33346
33347 tmode0 = insn_data[d->icode].operand[0].mode;
33348 tmode1 = insn_data[d->icode].operand[1].mode;
33349 modev2 = insn_data[d->icode].operand[2].mode;
33350 modei3 = insn_data[d->icode].operand[3].mode;
33351 modev4 = insn_data[d->icode].operand[4].mode;
33352 modei5 = insn_data[d->icode].operand[5].mode;
33353 modeimm = insn_data[d->icode].operand[6].mode;
33354
33355 if (VECTOR_MODE_P (modev2))
33356 op0 = safe_vector_operand (op0, modev2);
33357 if (VECTOR_MODE_P (modev4))
33358 op2 = safe_vector_operand (op2, modev4);
33359
33360 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
33361 op0 = copy_to_mode_reg (modev2, op0);
33362 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
33363 op1 = copy_to_mode_reg (modei3, op1);
33364 if ((optimize && !register_operand (op2, modev4))
33365 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
33366 op2 = copy_to_mode_reg (modev4, op2);
33367 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
33368 op3 = copy_to_mode_reg (modei5, op3);
33369
33370 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
33371 {
33372 error ("the fifth argument must be an 8-bit immediate");
33373 return const0_rtx;
33374 }
33375
33376 if (d->code == IX86_BUILTIN_PCMPESTRI128)
33377 {
33378 if (optimize || !target
33379 || GET_MODE (target) != tmode0
33380 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
33381 target = gen_reg_rtx (tmode0);
33382
33383 scratch1 = gen_reg_rtx (tmode1);
33384
33385 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
33386 }
33387 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
33388 {
33389 if (optimize || !target
33390 || GET_MODE (target) != tmode1
33391 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
33392 target = gen_reg_rtx (tmode1);
33393
33394 scratch0 = gen_reg_rtx (tmode0);
33395
33396 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
33397 }
33398 else
33399 {
33400 gcc_assert (d->flag);
33401
33402 scratch0 = gen_reg_rtx (tmode0);
33403 scratch1 = gen_reg_rtx (tmode1);
33404
33405 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
33406 }
33407
33408 if (! pat)
33409 return 0;
33410
33411 emit_insn (pat);
33412
33413 if (d->flag)
33414 {
33415 target = gen_reg_rtx (SImode);
33416 emit_move_insn (target, const0_rtx);
33417 target = gen_rtx_SUBREG (QImode, target, 0);
33418
33419 emit_insn
33420 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33421 gen_rtx_fmt_ee (EQ, QImode,
33422 gen_rtx_REG ((enum machine_mode) d->flag,
33423 FLAGS_REG),
33424 const0_rtx)));
33425 return SUBREG_REG (target);
33426 }
33427 else
33428 return target;
33429 }
33430
33431
33432 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
33433
33434 static rtx
33435 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
33436 tree exp, rtx target)
33437 {
33438 rtx pat;
33439 tree arg0 = CALL_EXPR_ARG (exp, 0);
33440 tree arg1 = CALL_EXPR_ARG (exp, 1);
33441 tree arg2 = CALL_EXPR_ARG (exp, 2);
33442 rtx scratch0, scratch1;
33443 rtx op0 = expand_normal (arg0);
33444 rtx op1 = expand_normal (arg1);
33445 rtx op2 = expand_normal (arg2);
33446 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
33447
33448 tmode0 = insn_data[d->icode].operand[0].mode;
33449 tmode1 = insn_data[d->icode].operand[1].mode;
33450 modev2 = insn_data[d->icode].operand[2].mode;
33451 modev3 = insn_data[d->icode].operand[3].mode;
33452 modeimm = insn_data[d->icode].operand[4].mode;
33453
33454 if (VECTOR_MODE_P (modev2))
33455 op0 = safe_vector_operand (op0, modev2);
33456 if (VECTOR_MODE_P (modev3))
33457 op1 = safe_vector_operand (op1, modev3);
33458
33459 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
33460 op0 = copy_to_mode_reg (modev2, op0);
33461 if ((optimize && !register_operand (op1, modev3))
33462 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
33463 op1 = copy_to_mode_reg (modev3, op1);
33464
33465 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
33466 {
33467 error ("the third argument must be an 8-bit immediate");
33468 return const0_rtx;
33469 }
33470
33471 if (d->code == IX86_BUILTIN_PCMPISTRI128)
33472 {
33473 if (optimize || !target
33474 || GET_MODE (target) != tmode0
33475 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
33476 target = gen_reg_rtx (tmode0);
33477
33478 scratch1 = gen_reg_rtx (tmode1);
33479
33480 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
33481 }
33482 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
33483 {
33484 if (optimize || !target
33485 || GET_MODE (target) != tmode1
33486 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
33487 target = gen_reg_rtx (tmode1);
33488
33489 scratch0 = gen_reg_rtx (tmode0);
33490
33491 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
33492 }
33493 else
33494 {
33495 gcc_assert (d->flag);
33496
33497 scratch0 = gen_reg_rtx (tmode0);
33498 scratch1 = gen_reg_rtx (tmode1);
33499
33500 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
33501 }
33502
33503 if (! pat)
33504 return 0;
33505
33506 emit_insn (pat);
33507
33508 if (d->flag)
33509 {
33510 target = gen_reg_rtx (SImode);
33511 emit_move_insn (target, const0_rtx);
33512 target = gen_rtx_SUBREG (QImode, target, 0);
33513
33514 emit_insn
33515 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33516 gen_rtx_fmt_ee (EQ, QImode,
33517 gen_rtx_REG ((enum machine_mode) d->flag,
33518 FLAGS_REG),
33519 const0_rtx)));
33520 return SUBREG_REG (target);
33521 }
33522 else
33523 return target;
33524 }
33525
33526 /* Subroutine of ix86_expand_builtin to take care of insns with
33527 variable number of operands. */
33528
33529 static rtx
33530 ix86_expand_args_builtin (const struct builtin_description *d,
33531 tree exp, rtx target)
33532 {
33533 rtx pat, real_target;
33534 unsigned int i, nargs;
33535 unsigned int nargs_constant = 0;
33536 unsigned int mask_pos = 0;
33537 int num_memory = 0;
33538 struct
33539 {
33540 rtx op;
33541 enum machine_mode mode;
33542 } args[6];
33543 bool last_arg_count = false;
33544 enum insn_code icode = d->icode;
33545 const struct insn_data_d *insn_p = &insn_data[icode];
33546 enum machine_mode tmode = insn_p->operand[0].mode;
33547 enum machine_mode rmode = VOIDmode;
33548 bool swap = false;
33549 enum rtx_code comparison = d->comparison;
33550
33551 switch ((enum ix86_builtin_func_type) d->flag)
33552 {
33553 case V2DF_FTYPE_V2DF_ROUND:
33554 case V4DF_FTYPE_V4DF_ROUND:
33555 case V4SF_FTYPE_V4SF_ROUND:
33556 case V8SF_FTYPE_V8SF_ROUND:
33557 case V4SI_FTYPE_V4SF_ROUND:
33558 case V8SI_FTYPE_V8SF_ROUND:
33559 return ix86_expand_sse_round (d, exp, target);
33560 case V4SI_FTYPE_V2DF_V2DF_ROUND:
33561 case V8SI_FTYPE_V4DF_V4DF_ROUND:
33562 case V16SI_FTYPE_V8DF_V8DF_ROUND:
33563 return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
33564 case INT_FTYPE_V8SF_V8SF_PTEST:
33565 case INT_FTYPE_V4DI_V4DI_PTEST:
33566 case INT_FTYPE_V4DF_V4DF_PTEST:
33567 case INT_FTYPE_V4SF_V4SF_PTEST:
33568 case INT_FTYPE_V2DI_V2DI_PTEST:
33569 case INT_FTYPE_V2DF_V2DF_PTEST:
33570 return ix86_expand_sse_ptest (d, exp, target);
33571 case FLOAT128_FTYPE_FLOAT128:
33572 case FLOAT_FTYPE_FLOAT:
33573 case INT_FTYPE_INT:
33574 case UINT64_FTYPE_INT:
33575 case UINT16_FTYPE_UINT16:
33576 case INT64_FTYPE_INT64:
33577 case INT64_FTYPE_V4SF:
33578 case INT64_FTYPE_V2DF:
33579 case INT_FTYPE_V16QI:
33580 case INT_FTYPE_V8QI:
33581 case INT_FTYPE_V8SF:
33582 case INT_FTYPE_V4DF:
33583 case INT_FTYPE_V4SF:
33584 case INT_FTYPE_V2DF:
33585 case INT_FTYPE_V32QI:
33586 case V16QI_FTYPE_V16QI:
33587 case V8SI_FTYPE_V8SF:
33588 case V8SI_FTYPE_V4SI:
33589 case V8HI_FTYPE_V8HI:
33590 case V8HI_FTYPE_V16QI:
33591 case V8QI_FTYPE_V8QI:
33592 case V8SF_FTYPE_V8SF:
33593 case V8SF_FTYPE_V8SI:
33594 case V8SF_FTYPE_V4SF:
33595 case V8SF_FTYPE_V8HI:
33596 case V4SI_FTYPE_V4SI:
33597 case V4SI_FTYPE_V16QI:
33598 case V4SI_FTYPE_V4SF:
33599 case V4SI_FTYPE_V8SI:
33600 case V4SI_FTYPE_V8HI:
33601 case V4SI_FTYPE_V4DF:
33602 case V4SI_FTYPE_V2DF:
33603 case V4HI_FTYPE_V4HI:
33604 case V4DF_FTYPE_V4DF:
33605 case V4DF_FTYPE_V4SI:
33606 case V4DF_FTYPE_V4SF:
33607 case V4DF_FTYPE_V2DF:
33608 case V4SF_FTYPE_V4SF:
33609 case V4SF_FTYPE_V4SI:
33610 case V4SF_FTYPE_V8SF:
33611 case V4SF_FTYPE_V4DF:
33612 case V4SF_FTYPE_V8HI:
33613 case V4SF_FTYPE_V2DF:
33614 case V2DI_FTYPE_V2DI:
33615 case V2DI_FTYPE_V16QI:
33616 case V2DI_FTYPE_V8HI:
33617 case V2DI_FTYPE_V4SI:
33618 case V2DF_FTYPE_V2DF:
33619 case V2DF_FTYPE_V4SI:
33620 case V2DF_FTYPE_V4DF:
33621 case V2DF_FTYPE_V4SF:
33622 case V2DF_FTYPE_V2SI:
33623 case V2SI_FTYPE_V2SI:
33624 case V2SI_FTYPE_V4SF:
33625 case V2SI_FTYPE_V2SF:
33626 case V2SI_FTYPE_V2DF:
33627 case V2SF_FTYPE_V2SF:
33628 case V2SF_FTYPE_V2SI:
33629 case V32QI_FTYPE_V32QI:
33630 case V32QI_FTYPE_V16QI:
33631 case V16HI_FTYPE_V16HI:
33632 case V16HI_FTYPE_V8HI:
33633 case V8SI_FTYPE_V8SI:
33634 case V16HI_FTYPE_V16QI:
33635 case V8SI_FTYPE_V16QI:
33636 case V4DI_FTYPE_V16QI:
33637 case V8SI_FTYPE_V8HI:
33638 case V4DI_FTYPE_V8HI:
33639 case V4DI_FTYPE_V4SI:
33640 case V4DI_FTYPE_V2DI:
33641 case HI_FTYPE_HI:
33642 case UINT_FTYPE_V2DF:
33643 case UINT_FTYPE_V4SF:
33644 case UINT64_FTYPE_V2DF:
33645 case UINT64_FTYPE_V4SF:
33646 case V16QI_FTYPE_V8DI:
33647 case V16HI_FTYPE_V16SI:
33648 case V16SI_FTYPE_HI:
33649 case V16SI_FTYPE_V16SI:
33650 case V16SI_FTYPE_INT:
33651 case V16SF_FTYPE_FLOAT:
33652 case V16SF_FTYPE_V4SF:
33653 case V16SF_FTYPE_V16SF:
33654 case V8HI_FTYPE_V8DI:
33655 case V8UHI_FTYPE_V8UHI:
33656 case V8SI_FTYPE_V8DI:
33657 case V8USI_FTYPE_V8USI:
33658 case V8SF_FTYPE_V8DF:
33659 case V8DI_FTYPE_QI:
33660 case V8DI_FTYPE_INT64:
33661 case V8DI_FTYPE_V4DI:
33662 case V8DI_FTYPE_V8DI:
33663 case V8DF_FTYPE_DOUBLE:
33664 case V8DF_FTYPE_V4DF:
33665 case V8DF_FTYPE_V8DF:
33666 case V8DF_FTYPE_V8SI:
33667 nargs = 1;
33668 break;
33669 case V4SF_FTYPE_V4SF_VEC_MERGE:
33670 case V2DF_FTYPE_V2DF_VEC_MERGE:
33671 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
33672 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
33673 case V16QI_FTYPE_V16QI_V16QI:
33674 case V16QI_FTYPE_V8HI_V8HI:
33675 case V16SI_FTYPE_V16SI_V16SI:
33676 case V16SF_FTYPE_V16SF_V16SF:
33677 case V16SF_FTYPE_V16SF_V16SI:
33678 case V8QI_FTYPE_V8QI_V8QI:
33679 case V8QI_FTYPE_V4HI_V4HI:
33680 case V8HI_FTYPE_V8HI_V8HI:
33681 case V8HI_FTYPE_V16QI_V16QI:
33682 case V8HI_FTYPE_V4SI_V4SI:
33683 case V8SF_FTYPE_V8SF_V8SF:
33684 case V8SF_FTYPE_V8SF_V8SI:
33685 case V8DI_FTYPE_V8DI_V8DI:
33686 case V8DF_FTYPE_V8DF_V8DF:
33687 case V8DF_FTYPE_V8DF_V8DI:
33688 case V4SI_FTYPE_V4SI_V4SI:
33689 case V4SI_FTYPE_V8HI_V8HI:
33690 case V4SI_FTYPE_V4SF_V4SF:
33691 case V4SI_FTYPE_V2DF_V2DF:
33692 case V4HI_FTYPE_V4HI_V4HI:
33693 case V4HI_FTYPE_V8QI_V8QI:
33694 case V4HI_FTYPE_V2SI_V2SI:
33695 case V4DF_FTYPE_V4DF_V4DF:
33696 case V4DF_FTYPE_V4DF_V4DI:
33697 case V4SF_FTYPE_V4SF_V4SF:
33698 case V4SF_FTYPE_V4SF_V4SI:
33699 case V4SF_FTYPE_V4SF_V2SI:
33700 case V4SF_FTYPE_V4SF_V2DF:
33701 case V4SF_FTYPE_V4SF_UINT:
33702 case V4SF_FTYPE_V4SF_UINT64:
33703 case V4SF_FTYPE_V4SF_DI:
33704 case V4SF_FTYPE_V4SF_SI:
33705 case V2DI_FTYPE_V2DI_V2DI:
33706 case V2DI_FTYPE_V16QI_V16QI:
33707 case V2DI_FTYPE_V4SI_V4SI:
33708 case V2UDI_FTYPE_V4USI_V4USI:
33709 case V2DI_FTYPE_V2DI_V16QI:
33710 case V2DI_FTYPE_V2DF_V2DF:
33711 case V2SI_FTYPE_V2SI_V2SI:
33712 case V2SI_FTYPE_V4HI_V4HI:
33713 case V2SI_FTYPE_V2SF_V2SF:
33714 case V2DF_FTYPE_V2DF_V2DF:
33715 case V2DF_FTYPE_V2DF_V4SF:
33716 case V2DF_FTYPE_V2DF_V2DI:
33717 case V2DF_FTYPE_V2DF_DI:
33718 case V2DF_FTYPE_V2DF_SI:
33719 case V2DF_FTYPE_V2DF_UINT:
33720 case V2DF_FTYPE_V2DF_UINT64:
33721 case V2SF_FTYPE_V2SF_V2SF:
33722 case V1DI_FTYPE_V1DI_V1DI:
33723 case V1DI_FTYPE_V8QI_V8QI:
33724 case V1DI_FTYPE_V2SI_V2SI:
33725 case V32QI_FTYPE_V16HI_V16HI:
33726 case V16HI_FTYPE_V8SI_V8SI:
33727 case V32QI_FTYPE_V32QI_V32QI:
33728 case V16HI_FTYPE_V32QI_V32QI:
33729 case V16HI_FTYPE_V16HI_V16HI:
33730 case V8SI_FTYPE_V4DF_V4DF:
33731 case V8SI_FTYPE_V8SI_V8SI:
33732 case V8SI_FTYPE_V16HI_V16HI:
33733 case V4DI_FTYPE_V4DI_V4DI:
33734 case V4DI_FTYPE_V8SI_V8SI:
33735 case V4UDI_FTYPE_V8USI_V8USI:
33736 case QI_FTYPE_V8DI_V8DI:
33737 case HI_FTYPE_V16SI_V16SI:
33738 if (comparison == UNKNOWN)
33739 return ix86_expand_binop_builtin (icode, exp, target);
33740 nargs = 2;
33741 break;
33742 case V4SF_FTYPE_V4SF_V4SF_SWAP:
33743 case V2DF_FTYPE_V2DF_V2DF_SWAP:
33744 gcc_assert (comparison != UNKNOWN);
33745 nargs = 2;
33746 swap = true;
33747 break;
33748 case V16HI_FTYPE_V16HI_V8HI_COUNT:
33749 case V16HI_FTYPE_V16HI_SI_COUNT:
33750 case V8SI_FTYPE_V8SI_V4SI_COUNT:
33751 case V8SI_FTYPE_V8SI_SI_COUNT:
33752 case V4DI_FTYPE_V4DI_V2DI_COUNT:
33753 case V4DI_FTYPE_V4DI_INT_COUNT:
33754 case V8HI_FTYPE_V8HI_V8HI_COUNT:
33755 case V8HI_FTYPE_V8HI_SI_COUNT:
33756 case V4SI_FTYPE_V4SI_V4SI_COUNT:
33757 case V4SI_FTYPE_V4SI_SI_COUNT:
33758 case V4HI_FTYPE_V4HI_V4HI_COUNT:
33759 case V4HI_FTYPE_V4HI_SI_COUNT:
33760 case V2DI_FTYPE_V2DI_V2DI_COUNT:
33761 case V2DI_FTYPE_V2DI_SI_COUNT:
33762 case V2SI_FTYPE_V2SI_V2SI_COUNT:
33763 case V2SI_FTYPE_V2SI_SI_COUNT:
33764 case V1DI_FTYPE_V1DI_V1DI_COUNT:
33765 case V1DI_FTYPE_V1DI_SI_COUNT:
33766 nargs = 2;
33767 last_arg_count = true;
33768 break;
33769 case UINT64_FTYPE_UINT64_UINT64:
33770 case UINT_FTYPE_UINT_UINT:
33771 case UINT_FTYPE_UINT_USHORT:
33772 case UINT_FTYPE_UINT_UCHAR:
33773 case UINT16_FTYPE_UINT16_INT:
33774 case UINT8_FTYPE_UINT8_INT:
33775 case HI_FTYPE_HI_HI:
33776 case V16SI_FTYPE_V8DF_V8DF:
33777 nargs = 2;
33778 break;
33779 case V2DI_FTYPE_V2DI_INT_CONVERT:
33780 nargs = 2;
33781 rmode = V1TImode;
33782 nargs_constant = 1;
33783 break;
33784 case V4DI_FTYPE_V4DI_INT_CONVERT:
33785 nargs = 2;
33786 rmode = V2TImode;
33787 nargs_constant = 1;
33788 break;
33789 case V8HI_FTYPE_V8HI_INT:
33790 case V8HI_FTYPE_V8SF_INT:
33791 case V16HI_FTYPE_V16SF_INT:
33792 case V8HI_FTYPE_V4SF_INT:
33793 case V8SF_FTYPE_V8SF_INT:
33794 case V4SF_FTYPE_V16SF_INT:
33795 case V16SF_FTYPE_V16SF_INT:
33796 case V4SI_FTYPE_V4SI_INT:
33797 case V4SI_FTYPE_V8SI_INT:
33798 case V4HI_FTYPE_V4HI_INT:
33799 case V4DF_FTYPE_V4DF_INT:
33800 case V4DF_FTYPE_V8DF_INT:
33801 case V4SF_FTYPE_V4SF_INT:
33802 case V4SF_FTYPE_V8SF_INT:
33803 case V2DI_FTYPE_V2DI_INT:
33804 case V2DF_FTYPE_V2DF_INT:
33805 case V2DF_FTYPE_V4DF_INT:
33806 case V16HI_FTYPE_V16HI_INT:
33807 case V8SI_FTYPE_V8SI_INT:
33808 case V16SI_FTYPE_V16SI_INT:
33809 case V4SI_FTYPE_V16SI_INT:
33810 case V4DI_FTYPE_V4DI_INT:
33811 case V2DI_FTYPE_V4DI_INT:
33812 case V4DI_FTYPE_V8DI_INT:
33813 case HI_FTYPE_HI_INT:
33814 nargs = 2;
33815 nargs_constant = 1;
33816 break;
33817 case V16QI_FTYPE_V16QI_V16QI_V16QI:
33818 case V8SF_FTYPE_V8SF_V8SF_V8SF:
33819 case V4DF_FTYPE_V4DF_V4DF_V4DF:
33820 case V4SF_FTYPE_V4SF_V4SF_V4SF:
33821 case V2DF_FTYPE_V2DF_V2DF_V2DF:
33822 case V32QI_FTYPE_V32QI_V32QI_V32QI:
33823 case HI_FTYPE_V16SI_V16SI_HI:
33824 case QI_FTYPE_V8DI_V8DI_QI:
33825 case V16HI_FTYPE_V16SI_V16HI_HI:
33826 case V16QI_FTYPE_V16SI_V16QI_HI:
33827 case V16QI_FTYPE_V8DI_V16QI_QI:
33828 case V16SF_FTYPE_V16SF_V16SF_HI:
33829 case V16SF_FTYPE_V16SF_V16SF_V16SF:
33830 case V16SF_FTYPE_V16SF_V16SI_V16SF:
33831 case V16SF_FTYPE_V16SI_V16SF_HI:
33832 case V16SF_FTYPE_V16SI_V16SF_V16SF:
33833 case V16SF_FTYPE_V4SF_V16SF_HI:
33834 case V16SI_FTYPE_SI_V16SI_HI:
33835 case V16SI_FTYPE_V16HI_V16SI_HI:
33836 case V16SI_FTYPE_V16QI_V16SI_HI:
33837 case V16SI_FTYPE_V16SF_V16SI_HI:
33838 case V16SI_FTYPE_V16SI_V16SI_HI:
33839 case V16SI_FTYPE_V16SI_V16SI_V16SI:
33840 case V16SI_FTYPE_V4SI_V16SI_HI:
33841 case V2DI_FTYPE_V2DI_V2DI_V2DI:
33842 case V4DI_FTYPE_V4DI_V4DI_V4DI:
33843 case V8DF_FTYPE_V2DF_V8DF_QI:
33844 case V8DF_FTYPE_V4DF_V8DF_QI:
33845 case V8DF_FTYPE_V8DF_V8DF_QI:
33846 case V8DF_FTYPE_V8DF_V8DF_V8DF:
33847 case V8DF_FTYPE_V8DF_V8DI_V8DF:
33848 case V8DF_FTYPE_V8DI_V8DF_V8DF:
33849 case V8DF_FTYPE_V8SF_V8DF_QI:
33850 case V8DF_FTYPE_V8SI_V8DF_QI:
33851 case V8DI_FTYPE_DI_V8DI_QI:
33852 case V8DI_FTYPE_V16QI_V8DI_QI:
33853 case V8DI_FTYPE_V2DI_V8DI_QI:
33854 case V8DI_FTYPE_V4DI_V8DI_QI:
33855 case V8DI_FTYPE_V8DI_V8DI_QI:
33856 case V8DI_FTYPE_V8DI_V8DI_V8DI:
33857 case V8DI_FTYPE_V8HI_V8DI_QI:
33858 case V8DI_FTYPE_V8SI_V8DI_QI:
33859 case V8HI_FTYPE_V8DI_V8HI_QI:
33860 case V8SF_FTYPE_V8DF_V8SF_QI:
33861 case V8SI_FTYPE_V8DF_V8SI_QI:
33862 case V8SI_FTYPE_V8DI_V8SI_QI:
33863 case V4SI_FTYPE_V4SI_V4SI_V4SI:
33864 nargs = 3;
33865 break;
33866 case V32QI_FTYPE_V32QI_V32QI_INT:
33867 case V16HI_FTYPE_V16HI_V16HI_INT:
33868 case V16QI_FTYPE_V16QI_V16QI_INT:
33869 case V4DI_FTYPE_V4DI_V4DI_INT:
33870 case V8HI_FTYPE_V8HI_V8HI_INT:
33871 case V8SI_FTYPE_V8SI_V8SI_INT:
33872 case V8SI_FTYPE_V8SI_V4SI_INT:
33873 case V8SF_FTYPE_V8SF_V8SF_INT:
33874 case V8SF_FTYPE_V8SF_V4SF_INT:
33875 case V4SI_FTYPE_V4SI_V4SI_INT:
33876 case V4DF_FTYPE_V4DF_V4DF_INT:
33877 case V16SF_FTYPE_V16SF_V16SF_INT:
33878 case V16SF_FTYPE_V16SF_V4SF_INT:
33879 case V16SI_FTYPE_V16SI_V4SI_INT:
33880 case V4DF_FTYPE_V4DF_V2DF_INT:
33881 case V4SF_FTYPE_V4SF_V4SF_INT:
33882 case V2DI_FTYPE_V2DI_V2DI_INT:
33883 case V4DI_FTYPE_V4DI_V2DI_INT:
33884 case V2DF_FTYPE_V2DF_V2DF_INT:
33885 case QI_FTYPE_V8DI_V8DI_INT:
33886 case QI_FTYPE_V8DF_V8DF_INT:
33887 case QI_FTYPE_V2DF_V2DF_INT:
33888 case QI_FTYPE_V4SF_V4SF_INT:
33889 case HI_FTYPE_V16SI_V16SI_INT:
33890 case HI_FTYPE_V16SF_V16SF_INT:
33891 nargs = 3;
33892 nargs_constant = 1;
33893 break;
33894 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
33895 nargs = 3;
33896 rmode = V4DImode;
33897 nargs_constant = 1;
33898 break;
33899 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
33900 nargs = 3;
33901 rmode = V2DImode;
33902 nargs_constant = 1;
33903 break;
33904 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
33905 nargs = 3;
33906 rmode = DImode;
33907 nargs_constant = 1;
33908 break;
33909 case V2DI_FTYPE_V2DI_UINT_UINT:
33910 nargs = 3;
33911 nargs_constant = 2;
33912 break;
33913 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI:
33914 case V16SF_FTYPE_V16SF_V16SI_V16SF_HI:
33915 case V16SF_FTYPE_V16SI_V16SF_V16SF_HI:
33916 case V16SI_FTYPE_V16SI_V16SI_V16SI_HI:
33917 case V16SI_FTYPE_V16SI_V4SI_V16SI_HI:
33918 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI:
33919 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI:
33920 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI:
33921 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI:
33922 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI:
33923 case V8DF_FTYPE_V8DF_V8DI_V8DF_QI:
33924 case V8DF_FTYPE_V8DI_V8DF_V8DF_QI:
33925 case V8DI_FTYPE_V16SI_V16SI_V8DI_QI:
33926 case V8DI_FTYPE_V8DI_SI_V8DI_V8DI:
33927 case V8DI_FTYPE_V8DI_V2DI_V8DI_QI:
33928 case V8DI_FTYPE_V8DI_V8DI_V8DI_QI:
33929 nargs = 4;
33930 break;
33931 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
33932 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
33933 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
33934 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
33935 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT:
33936 nargs = 4;
33937 nargs_constant = 1;
33938 break;
33939 case QI_FTYPE_V2DF_V2DF_INT_QI:
33940 case QI_FTYPE_V4SF_V4SF_INT_QI:
33941 nargs = 4;
33942 mask_pos = 1;
33943 nargs_constant = 1;
33944 break;
33945 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
33946 nargs = 4;
33947 nargs_constant = 2;
33948 break;
33949 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED:
33950 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG:
33951 nargs = 4;
33952 break;
33953 case QI_FTYPE_V8DI_V8DI_INT_QI:
33954 case HI_FTYPE_V16SI_V16SI_INT_HI:
33955 case QI_FTYPE_V8DF_V8DF_INT_QI:
33956 case HI_FTYPE_V16SF_V16SF_INT_HI:
33957 mask_pos = 1;
33958 nargs = 4;
33959 nargs_constant = 1;
33960 break;
33961 case V8DF_FTYPE_V8DF_INT_V8DF_QI:
33962 case V16SF_FTYPE_V16SF_INT_V16SF_HI:
33963 case V16HI_FTYPE_V16SF_INT_V16HI_HI:
33964 case V16SI_FTYPE_V16SI_INT_V16SI_HI:
33965 case V4SI_FTYPE_V16SI_INT_V4SI_QI:
33966 case V4DI_FTYPE_V8DI_INT_V4DI_QI:
33967 case V4DF_FTYPE_V8DF_INT_V4DF_QI:
33968 case V4SF_FTYPE_V16SF_INT_V4SF_QI:
33969 case V8DI_FTYPE_V8DI_INT_V8DI_QI:
33970 nargs = 4;
33971 mask_pos = 2;
33972 nargs_constant = 1;
33973 break;
33974 case V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI:
33975 case V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI:
33976 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI:
33977 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI:
33978 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI:
33979 case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI:
33980 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI:
33981 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI:
33982 case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI:
33983 case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI:
33984 nargs = 5;
33985 mask_pos = 2;
33986 nargs_constant = 1;
33987 break;
33988 case V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI:
33989 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI:
33990 case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI:
33991 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI:
33992 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI:
33993 nargs = 5;
33994 mask_pos = 1;
33995 nargs_constant = 1;
33996 break;
33997
33998 default:
33999 gcc_unreachable ();
34000 }
34001
34002 gcc_assert (nargs <= ARRAY_SIZE (args));
34003
34004 if (comparison != UNKNOWN)
34005 {
34006 gcc_assert (nargs == 2);
34007 return ix86_expand_sse_compare (d, exp, target, swap);
34008 }
34009
34010 if (rmode == VOIDmode || rmode == tmode)
34011 {
34012 if (optimize
34013 || target == 0
34014 || GET_MODE (target) != tmode
34015 || !insn_p->operand[0].predicate (target, tmode))
34016 target = gen_reg_rtx (tmode);
34017 real_target = target;
34018 }
34019 else
34020 {
34021 real_target = gen_reg_rtx (tmode);
34022 target = simplify_gen_subreg (rmode, real_target, tmode, 0);
34023 }
34024
34025 for (i = 0; i < nargs; i++)
34026 {
34027 tree arg = CALL_EXPR_ARG (exp, i);
34028 rtx op = expand_normal (arg);
34029 enum machine_mode mode = insn_p->operand[i + 1].mode;
34030 bool match = insn_p->operand[i + 1].predicate (op, mode);
34031
34032 if (last_arg_count && (i + 1) == nargs)
34033 {
34034 /* SIMD shift insns take either an 8-bit immediate or
34035 register as count. But builtin functions take int as
34036 count. If count doesn't match, we put it in register. */
34037 if (!match)
34038 {
34039 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
34040 if (!insn_p->operand[i + 1].predicate (op, mode))
34041 op = copy_to_reg (op);
34042 }
34043 }
34044 else if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
34045 (!mask_pos && (nargs - i) <= nargs_constant))
34046 {
34047 if (!match)
34048 switch (icode)
34049 {
34050 case CODE_FOR_avx2_inserti128:
34051 case CODE_FOR_avx2_extracti128:
34052 error ("the last argument must be an 1-bit immediate");
34053 return const0_rtx;
34054
34055 case CODE_FOR_avx512f_cmpv8di3_mask:
34056 case CODE_FOR_avx512f_cmpv16si3_mask:
34057 case CODE_FOR_avx512f_ucmpv8di3_mask:
34058 case CODE_FOR_avx512f_ucmpv16si3_mask:
34059 error ("the last argument must be a 3-bit immediate");
34060 return const0_rtx;
34061
34062 case CODE_FOR_sse4_1_roundsd:
34063 case CODE_FOR_sse4_1_roundss:
34064
34065 case CODE_FOR_sse4_1_roundpd:
34066 case CODE_FOR_sse4_1_roundps:
34067 case CODE_FOR_avx_roundpd256:
34068 case CODE_FOR_avx_roundps256:
34069
34070 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix:
34071 case CODE_FOR_sse4_1_roundps_sfix:
34072 case CODE_FOR_avx_roundpd_vec_pack_sfix256:
34073 case CODE_FOR_avx_roundps_sfix256:
34074
34075 case CODE_FOR_sse4_1_blendps:
34076 case CODE_FOR_avx_blendpd256:
34077 case CODE_FOR_avx_vpermilv4df:
34078 case CODE_FOR_avx512f_getmantv8df_mask:
34079 case CODE_FOR_avx512f_getmantv16sf_mask:
34080 error ("the last argument must be a 4-bit immediate");
34081 return const0_rtx;
34082
34083 case CODE_FOR_sha1rnds4:
34084 case CODE_FOR_sse4_1_blendpd:
34085 case CODE_FOR_avx_vpermilv2df:
34086 case CODE_FOR_xop_vpermil2v2df3:
34087 case CODE_FOR_xop_vpermil2v4sf3:
34088 case CODE_FOR_xop_vpermil2v4df3:
34089 case CODE_FOR_xop_vpermil2v8sf3:
34090 case CODE_FOR_avx512f_vinsertf32x4_mask:
34091 case CODE_FOR_avx512f_vinserti32x4_mask:
34092 case CODE_FOR_avx512f_vextractf32x4_mask:
34093 case CODE_FOR_avx512f_vextracti32x4_mask:
34094 error ("the last argument must be a 2-bit immediate");
34095 return const0_rtx;
34096
34097 case CODE_FOR_avx_vextractf128v4df:
34098 case CODE_FOR_avx_vextractf128v8sf:
34099 case CODE_FOR_avx_vextractf128v8si:
34100 case CODE_FOR_avx_vinsertf128v4df:
34101 case CODE_FOR_avx_vinsertf128v8sf:
34102 case CODE_FOR_avx_vinsertf128v8si:
34103 case CODE_FOR_avx512f_vinsertf64x4_mask:
34104 case CODE_FOR_avx512f_vinserti64x4_mask:
34105 case CODE_FOR_avx512f_vextractf64x4_mask:
34106 case CODE_FOR_avx512f_vextracti64x4_mask:
34107 error ("the last argument must be a 1-bit immediate");
34108 return const0_rtx;
34109
34110 case CODE_FOR_avx_vmcmpv2df3:
34111 case CODE_FOR_avx_vmcmpv4sf3:
34112 case CODE_FOR_avx_cmpv2df3:
34113 case CODE_FOR_avx_cmpv4sf3:
34114 case CODE_FOR_avx_cmpv4df3:
34115 case CODE_FOR_avx_cmpv8sf3:
34116 case CODE_FOR_avx512f_cmpv8df3_mask:
34117 case CODE_FOR_avx512f_cmpv16sf3_mask:
34118 case CODE_FOR_avx512f_vmcmpv2df3_mask:
34119 case CODE_FOR_avx512f_vmcmpv4sf3_mask:
34120 error ("the last argument must be a 5-bit immediate");
34121 return const0_rtx;
34122
34123 default:
34124 switch (nargs_constant)
34125 {
34126 case 2:
34127 if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
34128 (!mask_pos && (nargs - i) == nargs_constant))
34129 {
34130 error ("the next to last argument must be an 8-bit immediate");
34131 break;
34132 }
34133 case 1:
34134 error ("the last argument must be an 8-bit immediate");
34135 break;
34136 default:
34137 gcc_unreachable ();
34138 }
34139 return const0_rtx;
34140 }
34141 }
34142 else
34143 {
34144 if (VECTOR_MODE_P (mode))
34145 op = safe_vector_operand (op, mode);
34146
34147 /* If we aren't optimizing, only allow one memory operand to
34148 be generated. */
34149 if (memory_operand (op, mode))
34150 num_memory++;
34151
34152 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34153 {
34154 if (optimize || !match || num_memory > 1)
34155 op = copy_to_mode_reg (mode, op);
34156 }
34157 else
34158 {
34159 op = copy_to_reg (op);
34160 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
34161 }
34162 }
34163
34164 args[i].op = op;
34165 args[i].mode = mode;
34166 }
34167
34168 switch (nargs)
34169 {
34170 case 1:
34171 pat = GEN_FCN (icode) (real_target, args[0].op);
34172 break;
34173 case 2:
34174 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
34175 break;
34176 case 3:
34177 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34178 args[2].op);
34179 break;
34180 case 4:
34181 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34182 args[2].op, args[3].op);
34183 break;
34184 case 5:
34185 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34186 args[2].op, args[3].op, args[4].op);
34187 case 6:
34188 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34189 args[2].op, args[3].op, args[4].op,
34190 args[5].op);
34191 break;
34192 default:
34193 gcc_unreachable ();
34194 }
34195
34196 if (! pat)
34197 return 0;
34198
34199 emit_insn (pat);
34200 return target;
34201 }
34202
34203 /* Transform pattern of following layout:
34204 (parallel [
34205 set (A B)
34206 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)])
34207 ])
34208 into:
34209 (set (A B))
34210
34211 Or:
34212 (parallel [ A B
34213 ...
34214 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)
34215 ...
34216 ])
34217 into:
34218 (parallel [ A B ... ]) */
34219
34220 static rtx
34221 ix86_erase_embedded_rounding (rtx pat)
34222 {
34223 if (GET_CODE (pat) == INSN)
34224 pat = PATTERN (pat);
34225
34226 gcc_assert (GET_CODE (pat) == PARALLEL);
34227
34228 if (XVECLEN (pat, 0) == 2)
34229 {
34230 rtx p0 = XVECEXP (pat, 0, 0);
34231 rtx p1 = XVECEXP (pat, 0, 1);
34232
34233 gcc_assert (GET_CODE (p0) == SET
34234 && GET_CODE (p1) == UNSPEC
34235 && XINT (p1, 1) == UNSPEC_EMBEDDED_ROUNDING);
34236
34237 return p0;
34238 }
34239 else
34240 {
34241 rtx *res = XALLOCAVEC (rtx, XVECLEN (pat, 0));
34242 int i = 0;
34243 int j = 0;
34244
34245 for (; i < XVECLEN (pat, 0); ++i)
34246 {
34247 rtx elem = XVECEXP (pat, 0, i);
34248 if (GET_CODE (elem) != UNSPEC
34249 || XINT (elem, 1) != UNSPEC_EMBEDDED_ROUNDING)
34250 res [j++] = elem;
34251 }
34252
34253 /* No more than 1 occurence was removed. */
34254 gcc_assert (j >= XVECLEN (pat, 0) - 1);
34255
34256 return gen_rtx_PARALLEL (GET_MODE (pat), gen_rtvec_v (j, res));
34257 }
34258 }
34259
34260 /* Subroutine of ix86_expand_round_builtin to take care of comi insns
34261 with rounding. */
34262 static rtx
34263 ix86_expand_sse_comi_round (const struct builtin_description *d,
34264 tree exp, rtx target)
34265 {
34266 rtx pat, set_dst;
34267 tree arg0 = CALL_EXPR_ARG (exp, 0);
34268 tree arg1 = CALL_EXPR_ARG (exp, 1);
34269 tree arg2 = CALL_EXPR_ARG (exp, 2);
34270 tree arg3 = CALL_EXPR_ARG (exp, 3);
34271 rtx op0 = expand_normal (arg0);
34272 rtx op1 = expand_normal (arg1);
34273 rtx op2 = expand_normal (arg2);
34274 rtx op3 = expand_normal (arg3);
34275 enum insn_code icode = d->icode;
34276 const struct insn_data_d *insn_p = &insn_data[icode];
34277 enum machine_mode mode0 = insn_p->operand[0].mode;
34278 enum machine_mode mode1 = insn_p->operand[1].mode;
34279 enum rtx_code comparison = UNEQ;
34280 bool need_ucomi = false;
34281
34282 /* See avxintrin.h for values. */
34283 enum rtx_code comi_comparisons[32] =
34284 {
34285 UNEQ, GT, GE, UNORDERED, LTGT, UNLE, UNLT, ORDERED, UNEQ, UNLT,
34286 UNLE, LT, LTGT, GE, GT, LT, UNEQ, GT, GE, UNORDERED, LTGT, UNLE,
34287 UNLT, ORDERED, UNEQ, UNLT, UNLE, LT, LTGT, GE, GT, LT
34288 };
34289 bool need_ucomi_values[32] =
34290 {
34291 true, false, false, true, true, false, false, true,
34292 true, false, false, true, true, false, false, true,
34293 false, true, true, false, false, true, true, false,
34294 false, true, true, false, false, true, true, false
34295 };
34296
34297 if (!CONST_INT_P (op2))
34298 {
34299 error ("the third argument must be comparison constant");
34300 return const0_rtx;
34301 }
34302 if (INTVAL (op2) < 0 || INTVAL (op2) >= 32)
34303 {
34304 error ("incorect comparison mode");
34305 return const0_rtx;
34306 }
34307
34308 if (!insn_p->operand[2].predicate (op3, SImode))
34309 {
34310 error ("incorrect rounding operand");
34311 return const0_rtx;
34312 }
34313
34314 comparison = comi_comparisons[INTVAL (op2)];
34315 need_ucomi = need_ucomi_values[INTVAL (op2)];
34316
34317 if (VECTOR_MODE_P (mode0))
34318 op0 = safe_vector_operand (op0, mode0);
34319 if (VECTOR_MODE_P (mode1))
34320 op1 = safe_vector_operand (op1, mode1);
34321
34322 target = gen_reg_rtx (SImode);
34323 emit_move_insn (target, const0_rtx);
34324 target = gen_rtx_SUBREG (QImode, target, 0);
34325
34326 if ((optimize && !register_operand (op0, mode0))
34327 || !insn_p->operand[0].predicate (op0, mode0))
34328 op0 = copy_to_mode_reg (mode0, op0);
34329 if ((optimize && !register_operand (op1, mode1))
34330 || !insn_p->operand[1].predicate (op1, mode1))
34331 op1 = copy_to_mode_reg (mode1, op1);
34332
34333 if (need_ucomi)
34334 icode = icode == CODE_FOR_sse_comi_round
34335 ? CODE_FOR_sse_ucomi_round
34336 : CODE_FOR_sse2_ucomi_round;
34337
34338 pat = GEN_FCN (icode) (op0, op1, op3);
34339 if (! pat)
34340 return 0;
34341
34342 /* Rounding operand can be either NO_ROUND or ROUND_SAE at this point. */
34343 if (INTVAL (op3) == NO_ROUND)
34344 {
34345 pat = ix86_erase_embedded_rounding (pat);
34346 if (! pat)
34347 return 0;
34348
34349 set_dst = SET_DEST (pat);
34350 }
34351 else
34352 {
34353 gcc_assert (GET_CODE (XVECEXP (pat, 0, 0)) == SET);
34354 set_dst = SET_DEST (XVECEXP (pat, 0, 0));
34355 }
34356
34357 emit_insn (pat);
34358 emit_insn (gen_rtx_SET (VOIDmode,
34359 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
34360 gen_rtx_fmt_ee (comparison, QImode,
34361 set_dst,
34362 const0_rtx)));
34363
34364 return SUBREG_REG (target);
34365 }
34366
34367 static rtx
34368 ix86_expand_round_builtin (const struct builtin_description *d,
34369 tree exp, rtx target)
34370 {
34371 rtx pat;
34372 unsigned int i, nargs;
34373 struct
34374 {
34375 rtx op;
34376 enum machine_mode mode;
34377 } args[6];
34378 enum insn_code icode = d->icode;
34379 const struct insn_data_d *insn_p = &insn_data[icode];
34380 enum machine_mode tmode = insn_p->operand[0].mode;
34381 unsigned int nargs_constant = 0;
34382 unsigned int redundant_embed_rnd = 0;
34383
34384 switch ((enum ix86_builtin_func_type) d->flag)
34385 {
34386 case UINT64_FTYPE_V2DF_INT:
34387 case UINT64_FTYPE_V4SF_INT:
34388 case UINT_FTYPE_V2DF_INT:
34389 case UINT_FTYPE_V4SF_INT:
34390 case INT64_FTYPE_V2DF_INT:
34391 case INT64_FTYPE_V4SF_INT:
34392 case INT_FTYPE_V2DF_INT:
34393 case INT_FTYPE_V4SF_INT:
34394 nargs = 2;
34395 break;
34396 case V4SF_FTYPE_V4SF_UINT_INT:
34397 case V4SF_FTYPE_V4SF_UINT64_INT:
34398 case V2DF_FTYPE_V2DF_UINT64_INT:
34399 case V4SF_FTYPE_V4SF_INT_INT:
34400 case V4SF_FTYPE_V4SF_INT64_INT:
34401 case V2DF_FTYPE_V2DF_INT64_INT:
34402 case V4SF_FTYPE_V4SF_V4SF_INT:
34403 case V2DF_FTYPE_V2DF_V2DF_INT:
34404 case V4SF_FTYPE_V4SF_V2DF_INT:
34405 case V2DF_FTYPE_V2DF_V4SF_INT:
34406 nargs = 3;
34407 break;
34408 case V8SF_FTYPE_V8DF_V8SF_QI_INT:
34409 case V8DF_FTYPE_V8DF_V8DF_QI_INT:
34410 case V8SI_FTYPE_V8DF_V8SI_QI_INT:
34411 case V16SF_FTYPE_V16SF_V16SF_HI_INT:
34412 case V16SF_FTYPE_V16SI_V16SF_HI_INT:
34413 case V16SI_FTYPE_V16SF_V16SI_HI_INT:
34414 case V8DF_FTYPE_V8SF_V8DF_QI_INT:
34415 case V16SF_FTYPE_V16HI_V16SF_HI_INT:
34416 case V2DF_FTYPE_V2DF_V2DF_V2DF_INT:
34417 case V4SF_FTYPE_V4SF_V4SF_V4SF_INT:
34418 nargs = 4;
34419 break;
34420 case V4SF_FTYPE_V4SF_V4SF_INT_INT:
34421 case V2DF_FTYPE_V2DF_V2DF_INT_INT:
34422 nargs_constant = 2;
34423 nargs = 4;
34424 break;
34425 case INT_FTYPE_V4SF_V4SF_INT_INT:
34426 case INT_FTYPE_V2DF_V2DF_INT_INT:
34427 return ix86_expand_sse_comi_round (d, exp, target);
34428 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT:
34429 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT:
34430 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT:
34431 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT:
34432 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT:
34433 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT:
34434 nargs = 5;
34435 break;
34436 case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT:
34437 case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT:
34438 nargs_constant = 4;
34439 nargs = 5;
34440 break;
34441 case QI_FTYPE_V8DF_V8DF_INT_QI_INT:
34442 case QI_FTYPE_V2DF_V2DF_INT_QI_INT:
34443 case HI_FTYPE_V16SF_V16SF_INT_HI_INT:
34444 case QI_FTYPE_V4SF_V4SF_INT_QI_INT:
34445 nargs_constant = 3;
34446 nargs = 5;
34447 break;
34448 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT:
34449 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT:
34450 nargs = 6;
34451 nargs_constant = 4;
34452 break;
34453 case V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT:
34454 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT:
34455 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT:
34456 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT:
34457 nargs = 6;
34458 nargs_constant = 3;
34459 break;
34460 default:
34461 gcc_unreachable ();
34462 }
34463 gcc_assert (nargs <= ARRAY_SIZE (args));
34464
34465 if (optimize
34466 || target == 0
34467 || GET_MODE (target) != tmode
34468 || !insn_p->operand[0].predicate (target, tmode))
34469 target = gen_reg_rtx (tmode);
34470
34471 for (i = 0; i < nargs; i++)
34472 {
34473 tree arg = CALL_EXPR_ARG (exp, i);
34474 rtx op = expand_normal (arg);
34475 enum machine_mode mode = insn_p->operand[i + 1].mode;
34476 bool match = insn_p->operand[i + 1].predicate (op, mode);
34477
34478 if (i == nargs - nargs_constant)
34479 {
34480 if (!match)
34481 {
34482 switch (icode)
34483 {
34484 case CODE_FOR_avx512f_getmantv8df_mask_round:
34485 case CODE_FOR_avx512f_getmantv16sf_mask_round:
34486 case CODE_FOR_avx512f_getmantv2df_round:
34487 case CODE_FOR_avx512f_getmantv4sf_round:
34488 error ("the immediate argument must be a 4-bit immediate");
34489 return const0_rtx;
34490 case CODE_FOR_avx512f_cmpv8df3_mask_round:
34491 case CODE_FOR_avx512f_cmpv16sf3_mask_round:
34492 case CODE_FOR_avx512f_vmcmpv2df3_mask_round:
34493 case CODE_FOR_avx512f_vmcmpv4sf3_mask_round:
34494 error ("the immediate argument must be a 5-bit immediate");
34495 return const0_rtx;
34496 default:
34497 error ("the immediate argument must be an 8-bit immediate");
34498 return const0_rtx;
34499 }
34500 }
34501 }
34502 else if (i == nargs-1)
34503 {
34504 if (!insn_p->operand[nargs].predicate (op, SImode))
34505 {
34506 error ("incorrect rounding operand");
34507 return const0_rtx;
34508 }
34509
34510 /* If there is no rounding use normal version of the pattern. */
34511 if (INTVAL (op) == NO_ROUND)
34512 redundant_embed_rnd = 1;
34513 }
34514 else
34515 {
34516 if (VECTOR_MODE_P (mode))
34517 op = safe_vector_operand (op, mode);
34518
34519 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34520 {
34521 if (optimize || !match)
34522 op = copy_to_mode_reg (mode, op);
34523 }
34524 else
34525 {
34526 op = copy_to_reg (op);
34527 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
34528 }
34529 }
34530
34531 args[i].op = op;
34532 args[i].mode = mode;
34533 }
34534
34535 switch (nargs)
34536 {
34537 case 1:
34538 pat = GEN_FCN (icode) (target, args[0].op);
34539 break;
34540 case 2:
34541 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
34542 break;
34543 case 3:
34544 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34545 args[2].op);
34546 break;
34547 case 4:
34548 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34549 args[2].op, args[3].op);
34550 break;
34551 case 5:
34552 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34553 args[2].op, args[3].op, args[4].op);
34554 case 6:
34555 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34556 args[2].op, args[3].op, args[4].op,
34557 args[5].op);
34558 break;
34559 default:
34560 gcc_unreachable ();
34561 }
34562
34563 if (!pat)
34564 return 0;
34565
34566 if (redundant_embed_rnd)
34567 pat = ix86_erase_embedded_rounding (pat);
34568
34569 emit_insn (pat);
34570 return target;
34571 }
34572
34573 /* Subroutine of ix86_expand_builtin to take care of special insns
34574 with variable number of operands. */
34575
34576 static rtx
34577 ix86_expand_special_args_builtin (const struct builtin_description *d,
34578 tree exp, rtx target)
34579 {
34580 tree arg;
34581 rtx pat, op;
34582 unsigned int i, nargs, arg_adjust, memory;
34583 bool aligned_mem = false;
34584 struct
34585 {
34586 rtx op;
34587 enum machine_mode mode;
34588 } args[3];
34589 enum insn_code icode = d->icode;
34590 bool last_arg_constant = false;
34591 const struct insn_data_d *insn_p = &insn_data[icode];
34592 enum machine_mode tmode = insn_p->operand[0].mode;
34593 enum { load, store } klass;
34594
34595 switch ((enum ix86_builtin_func_type) d->flag)
34596 {
34597 case VOID_FTYPE_VOID:
34598 emit_insn (GEN_FCN (icode) (target));
34599 return 0;
34600 case VOID_FTYPE_UINT64:
34601 case VOID_FTYPE_UNSIGNED:
34602 nargs = 0;
34603 klass = store;
34604 memory = 0;
34605 break;
34606
34607 case INT_FTYPE_VOID:
34608 case USHORT_FTYPE_VOID:
34609 case UINT64_FTYPE_VOID:
34610 case UNSIGNED_FTYPE_VOID:
34611 nargs = 0;
34612 klass = load;
34613 memory = 0;
34614 break;
34615 case UINT64_FTYPE_PUNSIGNED:
34616 case V2DI_FTYPE_PV2DI:
34617 case V4DI_FTYPE_PV4DI:
34618 case V32QI_FTYPE_PCCHAR:
34619 case V16QI_FTYPE_PCCHAR:
34620 case V8SF_FTYPE_PCV4SF:
34621 case V8SF_FTYPE_PCFLOAT:
34622 case V4SF_FTYPE_PCFLOAT:
34623 case V4DF_FTYPE_PCV2DF:
34624 case V4DF_FTYPE_PCDOUBLE:
34625 case V2DF_FTYPE_PCDOUBLE:
34626 case VOID_FTYPE_PVOID:
34627 case V16SI_FTYPE_PV4SI:
34628 case V16SF_FTYPE_PV4SF:
34629 case V8DI_FTYPE_PV4DI:
34630 case V8DI_FTYPE_PV8DI:
34631 case V8DF_FTYPE_PV4DF:
34632 nargs = 1;
34633 klass = load;
34634 memory = 0;
34635 switch (icode)
34636 {
34637 case CODE_FOR_sse4_1_movntdqa:
34638 case CODE_FOR_avx2_movntdqa:
34639 case CODE_FOR_avx512f_movntdqa:
34640 aligned_mem = true;
34641 break;
34642 default:
34643 break;
34644 }
34645 break;
34646 case VOID_FTYPE_PV2SF_V4SF:
34647 case VOID_FTYPE_PV8DI_V8DI:
34648 case VOID_FTYPE_PV4DI_V4DI:
34649 case VOID_FTYPE_PV2DI_V2DI:
34650 case VOID_FTYPE_PCHAR_V32QI:
34651 case VOID_FTYPE_PCHAR_V16QI:
34652 case VOID_FTYPE_PFLOAT_V16SF:
34653 case VOID_FTYPE_PFLOAT_V8SF:
34654 case VOID_FTYPE_PFLOAT_V4SF:
34655 case VOID_FTYPE_PDOUBLE_V8DF:
34656 case VOID_FTYPE_PDOUBLE_V4DF:
34657 case VOID_FTYPE_PDOUBLE_V2DF:
34658 case VOID_FTYPE_PLONGLONG_LONGLONG:
34659 case VOID_FTYPE_PULONGLONG_ULONGLONG:
34660 case VOID_FTYPE_PINT_INT:
34661 nargs = 1;
34662 klass = store;
34663 /* Reserve memory operand for target. */
34664 memory = ARRAY_SIZE (args);
34665 switch (icode)
34666 {
34667 /* These builtins and instructions require the memory
34668 to be properly aligned. */
34669 case CODE_FOR_avx_movntv4di:
34670 case CODE_FOR_sse2_movntv2di:
34671 case CODE_FOR_avx_movntv8sf:
34672 case CODE_FOR_sse_movntv4sf:
34673 case CODE_FOR_sse4a_vmmovntv4sf:
34674 case CODE_FOR_avx_movntv4df:
34675 case CODE_FOR_sse2_movntv2df:
34676 case CODE_FOR_sse4a_vmmovntv2df:
34677 case CODE_FOR_sse2_movntidi:
34678 case CODE_FOR_sse_movntq:
34679 case CODE_FOR_sse2_movntisi:
34680 case CODE_FOR_avx512f_movntv16sf:
34681 case CODE_FOR_avx512f_movntv8df:
34682 case CODE_FOR_avx512f_movntv8di:
34683 aligned_mem = true;
34684 break;
34685 default:
34686 break;
34687 }
34688 break;
34689 case V4SF_FTYPE_V4SF_PCV2SF:
34690 case V2DF_FTYPE_V2DF_PCDOUBLE:
34691 nargs = 2;
34692 klass = load;
34693 memory = 1;
34694 break;
34695 case V8SF_FTYPE_PCV8SF_V8SI:
34696 case V4DF_FTYPE_PCV4DF_V4DI:
34697 case V4SF_FTYPE_PCV4SF_V4SI:
34698 case V2DF_FTYPE_PCV2DF_V2DI:
34699 case V8SI_FTYPE_PCV8SI_V8SI:
34700 case V4DI_FTYPE_PCV4DI_V4DI:
34701 case V4SI_FTYPE_PCV4SI_V4SI:
34702 case V2DI_FTYPE_PCV2DI_V2DI:
34703 nargs = 2;
34704 klass = load;
34705 memory = 0;
34706 break;
34707 case VOID_FTYPE_PV8DF_V8DF_QI:
34708 case VOID_FTYPE_PV16SF_V16SF_HI:
34709 case VOID_FTYPE_PV8DI_V8DI_QI:
34710 case VOID_FTYPE_PV16SI_V16SI_HI:
34711 switch (icode)
34712 {
34713 /* These builtins and instructions require the memory
34714 to be properly aligned. */
34715 case CODE_FOR_avx512f_storev16sf_mask:
34716 case CODE_FOR_avx512f_storev16si_mask:
34717 case CODE_FOR_avx512f_storev8df_mask:
34718 case CODE_FOR_avx512f_storev8di_mask:
34719 aligned_mem = true;
34720 break;
34721 default:
34722 break;
34723 }
34724 /* FALLTHRU */
34725 case VOID_FTYPE_PV8SF_V8SI_V8SF:
34726 case VOID_FTYPE_PV4DF_V4DI_V4DF:
34727 case VOID_FTYPE_PV4SF_V4SI_V4SF:
34728 case VOID_FTYPE_PV2DF_V2DI_V2DF:
34729 case VOID_FTYPE_PV8SI_V8SI_V8SI:
34730 case VOID_FTYPE_PV4DI_V4DI_V4DI:
34731 case VOID_FTYPE_PV4SI_V4SI_V4SI:
34732 case VOID_FTYPE_PV2DI_V2DI_V2DI:
34733 case VOID_FTYPE_PDOUBLE_V2DF_QI:
34734 case VOID_FTYPE_PFLOAT_V4SF_QI:
34735 case VOID_FTYPE_PV8SI_V8DI_QI:
34736 case VOID_FTYPE_PV8HI_V8DI_QI:
34737 case VOID_FTYPE_PV16HI_V16SI_HI:
34738 case VOID_FTYPE_PV16QI_V8DI_QI:
34739 case VOID_FTYPE_PV16QI_V16SI_HI:
34740 nargs = 2;
34741 klass = store;
34742 /* Reserve memory operand for target. */
34743 memory = ARRAY_SIZE (args);
34744 break;
34745 case V16SF_FTYPE_PCV16SF_V16SF_HI:
34746 case V16SI_FTYPE_PCV16SI_V16SI_HI:
34747 case V8DF_FTYPE_PCV8DF_V8DF_QI:
34748 case V8DI_FTYPE_PCV8DI_V8DI_QI:
34749 case V2DF_FTYPE_PCDOUBLE_V2DF_QI:
34750 case V4SF_FTYPE_PCFLOAT_V4SF_QI:
34751 nargs = 3;
34752 klass = load;
34753 memory = 0;
34754 switch (icode)
34755 {
34756 /* These builtins and instructions require the memory
34757 to be properly aligned. */
34758 case CODE_FOR_avx512f_loadv16sf_mask:
34759 case CODE_FOR_avx512f_loadv16si_mask:
34760 case CODE_FOR_avx512f_loadv8df_mask:
34761 case CODE_FOR_avx512f_loadv8di_mask:
34762 aligned_mem = true;
34763 break;
34764 default:
34765 break;
34766 }
34767 break;
34768 case VOID_FTYPE_UINT_UINT_UINT:
34769 case VOID_FTYPE_UINT64_UINT_UINT:
34770 case UCHAR_FTYPE_UINT_UINT_UINT:
34771 case UCHAR_FTYPE_UINT64_UINT_UINT:
34772 nargs = 3;
34773 klass = load;
34774 memory = ARRAY_SIZE (args);
34775 last_arg_constant = true;
34776 break;
34777 default:
34778 gcc_unreachable ();
34779 }
34780
34781 gcc_assert (nargs <= ARRAY_SIZE (args));
34782
34783 if (klass == store)
34784 {
34785 arg = CALL_EXPR_ARG (exp, 0);
34786 op = expand_normal (arg);
34787 gcc_assert (target == 0);
34788 if (memory)
34789 {
34790 op = ix86_zero_extend_to_Pmode (op);
34791 target = gen_rtx_MEM (tmode, op);
34792 /* target at this point has just BITS_PER_UNIT MEM_ALIGN
34793 on it. Try to improve it using get_pointer_alignment,
34794 and if the special builtin is one that requires strict
34795 mode alignment, also from it's GET_MODE_ALIGNMENT.
34796 Failure to do so could lead to ix86_legitimate_combined_insn
34797 rejecting all changes to such insns. */
34798 unsigned int align = get_pointer_alignment (arg);
34799 if (aligned_mem && align < GET_MODE_ALIGNMENT (tmode))
34800 align = GET_MODE_ALIGNMENT (tmode);
34801 if (MEM_ALIGN (target) < align)
34802 set_mem_align (target, align);
34803 }
34804 else
34805 target = force_reg (tmode, op);
34806 arg_adjust = 1;
34807 }
34808 else
34809 {
34810 arg_adjust = 0;
34811 if (optimize
34812 || target == 0
34813 || !register_operand (target, tmode)
34814 || GET_MODE (target) != tmode)
34815 target = gen_reg_rtx (tmode);
34816 }
34817
34818 for (i = 0; i < nargs; i++)
34819 {
34820 enum machine_mode mode = insn_p->operand[i + 1].mode;
34821 bool match;
34822
34823 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
34824 op = expand_normal (arg);
34825 match = insn_p->operand[i + 1].predicate (op, mode);
34826
34827 if (last_arg_constant && (i + 1) == nargs)
34828 {
34829 if (!match)
34830 {
34831 if (icode == CODE_FOR_lwp_lwpvalsi3
34832 || icode == CODE_FOR_lwp_lwpinssi3
34833 || icode == CODE_FOR_lwp_lwpvaldi3
34834 || icode == CODE_FOR_lwp_lwpinsdi3)
34835 error ("the last argument must be a 32-bit immediate");
34836 else
34837 error ("the last argument must be an 8-bit immediate");
34838 return const0_rtx;
34839 }
34840 }
34841 else
34842 {
34843 if (i == memory)
34844 {
34845 /* This must be the memory operand. */
34846 op = ix86_zero_extend_to_Pmode (op);
34847 op = gen_rtx_MEM (mode, op);
34848 /* op at this point has just BITS_PER_UNIT MEM_ALIGN
34849 on it. Try to improve it using get_pointer_alignment,
34850 and if the special builtin is one that requires strict
34851 mode alignment, also from it's GET_MODE_ALIGNMENT.
34852 Failure to do so could lead to ix86_legitimate_combined_insn
34853 rejecting all changes to such insns. */
34854 unsigned int align = get_pointer_alignment (arg);
34855 if (aligned_mem && align < GET_MODE_ALIGNMENT (mode))
34856 align = GET_MODE_ALIGNMENT (mode);
34857 if (MEM_ALIGN (op) < align)
34858 set_mem_align (op, align);
34859 }
34860 else
34861 {
34862 /* This must be register. */
34863 if (VECTOR_MODE_P (mode))
34864 op = safe_vector_operand (op, mode);
34865
34866 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34867 op = copy_to_mode_reg (mode, op);
34868 else
34869 {
34870 op = copy_to_reg (op);
34871 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
34872 }
34873 }
34874 }
34875
34876 args[i].op = op;
34877 args[i].mode = mode;
34878 }
34879
34880 switch (nargs)
34881 {
34882 case 0:
34883 pat = GEN_FCN (icode) (target);
34884 break;
34885 case 1:
34886 pat = GEN_FCN (icode) (target, args[0].op);
34887 break;
34888 case 2:
34889 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
34890 break;
34891 case 3:
34892 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
34893 break;
34894 default:
34895 gcc_unreachable ();
34896 }
34897
34898 if (! pat)
34899 return 0;
34900 emit_insn (pat);
34901 return klass == store ? 0 : target;
34902 }
34903
34904 /* Return the integer constant in ARG. Constrain it to be in the range
34905 of the subparts of VEC_TYPE; issue an error if not. */
34906
34907 static int
34908 get_element_number (tree vec_type, tree arg)
34909 {
34910 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
34911
34912 if (!tree_fits_uhwi_p (arg)
34913 || (elt = tree_to_uhwi (arg), elt > max))
34914 {
34915 error ("selector must be an integer constant in the range 0..%wi", max);
34916 return 0;
34917 }
34918
34919 return elt;
34920 }
34921
34922 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
34923 ix86_expand_vector_init. We DO have language-level syntax for this, in
34924 the form of (type){ init-list }. Except that since we can't place emms
34925 instructions from inside the compiler, we can't allow the use of MMX
34926 registers unless the user explicitly asks for it. So we do *not* define
34927 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
34928 we have builtins invoked by mmintrin.h that gives us license to emit
34929 these sorts of instructions. */
34930
34931 static rtx
34932 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
34933 {
34934 enum machine_mode tmode = TYPE_MODE (type);
34935 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
34936 int i, n_elt = GET_MODE_NUNITS (tmode);
34937 rtvec v = rtvec_alloc (n_elt);
34938
34939 gcc_assert (VECTOR_MODE_P (tmode));
34940 gcc_assert (call_expr_nargs (exp) == n_elt);
34941
34942 for (i = 0; i < n_elt; ++i)
34943 {
34944 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
34945 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
34946 }
34947
34948 if (!target || !register_operand (target, tmode))
34949 target = gen_reg_rtx (tmode);
34950
34951 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
34952 return target;
34953 }
34954
34955 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
34956 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
34957 had a language-level syntax for referencing vector elements. */
34958
34959 static rtx
34960 ix86_expand_vec_ext_builtin (tree exp, rtx target)
34961 {
34962 enum machine_mode tmode, mode0;
34963 tree arg0, arg1;
34964 int elt;
34965 rtx op0;
34966
34967 arg0 = CALL_EXPR_ARG (exp, 0);
34968 arg1 = CALL_EXPR_ARG (exp, 1);
34969
34970 op0 = expand_normal (arg0);
34971 elt = get_element_number (TREE_TYPE (arg0), arg1);
34972
34973 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
34974 mode0 = TYPE_MODE (TREE_TYPE (arg0));
34975 gcc_assert (VECTOR_MODE_P (mode0));
34976
34977 op0 = force_reg (mode0, op0);
34978
34979 if (optimize || !target || !register_operand (target, tmode))
34980 target = gen_reg_rtx (tmode);
34981
34982 ix86_expand_vector_extract (true, target, op0, elt);
34983
34984 return target;
34985 }
34986
34987 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
34988 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
34989 a language-level syntax for referencing vector elements. */
34990
34991 static rtx
34992 ix86_expand_vec_set_builtin (tree exp)
34993 {
34994 enum machine_mode tmode, mode1;
34995 tree arg0, arg1, arg2;
34996 int elt;
34997 rtx op0, op1, target;
34998
34999 arg0 = CALL_EXPR_ARG (exp, 0);
35000 arg1 = CALL_EXPR_ARG (exp, 1);
35001 arg2 = CALL_EXPR_ARG (exp, 2);
35002
35003 tmode = TYPE_MODE (TREE_TYPE (arg0));
35004 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
35005 gcc_assert (VECTOR_MODE_P (tmode));
35006
35007 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
35008 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
35009 elt = get_element_number (TREE_TYPE (arg0), arg2);
35010
35011 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
35012 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
35013
35014 op0 = force_reg (tmode, op0);
35015 op1 = force_reg (mode1, op1);
35016
35017 /* OP0 is the source of these builtin functions and shouldn't be
35018 modified. Create a copy, use it and return it as target. */
35019 target = gen_reg_rtx (tmode);
35020 emit_move_insn (target, op0);
35021 ix86_expand_vector_set (true, target, op1, elt);
35022
35023 return target;
35024 }
35025
35026 /* Expand an expression EXP that calls a built-in function,
35027 with result going to TARGET if that's convenient
35028 (and in mode MODE if that's convenient).
35029 SUBTARGET may be used as the target for computing one of EXP's operands.
35030 IGNORE is nonzero if the value is to be ignored. */
35031
35032 static rtx
35033 ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
35034 enum machine_mode mode, int ignore)
35035 {
35036 const struct builtin_description *d;
35037 size_t i;
35038 enum insn_code icode;
35039 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
35040 tree arg0, arg1, arg2, arg3, arg4;
35041 rtx op0, op1, op2, op3, op4, pat, insn;
35042 enum machine_mode mode0, mode1, mode2, mode3, mode4;
35043 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
35044
35045 /* For CPU builtins that can be folded, fold first and expand the fold. */
35046 switch (fcode)
35047 {
35048 case IX86_BUILTIN_CPU_INIT:
35049 {
35050 /* Make it call __cpu_indicator_init in libgcc. */
35051 tree call_expr, fndecl, type;
35052 type = build_function_type_list (integer_type_node, NULL_TREE);
35053 fndecl = build_fn_decl ("__cpu_indicator_init", type);
35054 call_expr = build_call_expr (fndecl, 0);
35055 return expand_expr (call_expr, target, mode, EXPAND_NORMAL);
35056 }
35057 case IX86_BUILTIN_CPU_IS:
35058 case IX86_BUILTIN_CPU_SUPPORTS:
35059 {
35060 tree arg0 = CALL_EXPR_ARG (exp, 0);
35061 tree fold_expr = fold_builtin_cpu (fndecl, &arg0);
35062 gcc_assert (fold_expr != NULL_TREE);
35063 return expand_expr (fold_expr, target, mode, EXPAND_NORMAL);
35064 }
35065 }
35066
35067 /* Determine whether the builtin function is available under the current ISA.
35068 Originally the builtin was not created if it wasn't applicable to the
35069 current ISA based on the command line switches. With function specific
35070 options, we need to check in the context of the function making the call
35071 whether it is supported. */
35072 if (ix86_builtins_isa[fcode].isa
35073 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
35074 {
35075 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
35076 NULL, (enum fpmath_unit) 0, false);
35077
35078 if (!opts)
35079 error ("%qE needs unknown isa option", fndecl);
35080 else
35081 {
35082 gcc_assert (opts != NULL);
35083 error ("%qE needs isa option %s", fndecl, opts);
35084 free (opts);
35085 }
35086 return const0_rtx;
35087 }
35088
35089 switch (fcode)
35090 {
35091 case IX86_BUILTIN_MASKMOVQ:
35092 case IX86_BUILTIN_MASKMOVDQU:
35093 icode = (fcode == IX86_BUILTIN_MASKMOVQ
35094 ? CODE_FOR_mmx_maskmovq
35095 : CODE_FOR_sse2_maskmovdqu);
35096 /* Note the arg order is different from the operand order. */
35097 arg1 = CALL_EXPR_ARG (exp, 0);
35098 arg2 = CALL_EXPR_ARG (exp, 1);
35099 arg0 = CALL_EXPR_ARG (exp, 2);
35100 op0 = expand_normal (arg0);
35101 op1 = expand_normal (arg1);
35102 op2 = expand_normal (arg2);
35103 mode0 = insn_data[icode].operand[0].mode;
35104 mode1 = insn_data[icode].operand[1].mode;
35105 mode2 = insn_data[icode].operand[2].mode;
35106
35107 op0 = ix86_zero_extend_to_Pmode (op0);
35108 op0 = gen_rtx_MEM (mode1, op0);
35109
35110 if (!insn_data[icode].operand[0].predicate (op0, mode0))
35111 op0 = copy_to_mode_reg (mode0, op0);
35112 if (!insn_data[icode].operand[1].predicate (op1, mode1))
35113 op1 = copy_to_mode_reg (mode1, op1);
35114 if (!insn_data[icode].operand[2].predicate (op2, mode2))
35115 op2 = copy_to_mode_reg (mode2, op2);
35116 pat = GEN_FCN (icode) (op0, op1, op2);
35117 if (! pat)
35118 return 0;
35119 emit_insn (pat);
35120 return 0;
35121
35122 case IX86_BUILTIN_LDMXCSR:
35123 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
35124 target = assign_386_stack_local (SImode, SLOT_TEMP);
35125 emit_move_insn (target, op0);
35126 emit_insn (gen_sse_ldmxcsr (target));
35127 return 0;
35128
35129 case IX86_BUILTIN_STMXCSR:
35130 target = assign_386_stack_local (SImode, SLOT_TEMP);
35131 emit_insn (gen_sse_stmxcsr (target));
35132 return copy_to_mode_reg (SImode, target);
35133
35134 case IX86_BUILTIN_CLFLUSH:
35135 arg0 = CALL_EXPR_ARG (exp, 0);
35136 op0 = expand_normal (arg0);
35137 icode = CODE_FOR_sse2_clflush;
35138 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35139 op0 = ix86_zero_extend_to_Pmode (op0);
35140
35141 emit_insn (gen_sse2_clflush (op0));
35142 return 0;
35143
35144 case IX86_BUILTIN_CLFLUSHOPT:
35145 arg0 = CALL_EXPR_ARG (exp, 0);
35146 op0 = expand_normal (arg0);
35147 icode = CODE_FOR_clflushopt;
35148 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35149 op0 = ix86_zero_extend_to_Pmode (op0);
35150
35151 emit_insn (gen_clflushopt (op0));
35152 return 0;
35153
35154 case IX86_BUILTIN_MONITOR:
35155 arg0 = CALL_EXPR_ARG (exp, 0);
35156 arg1 = CALL_EXPR_ARG (exp, 1);
35157 arg2 = CALL_EXPR_ARG (exp, 2);
35158 op0 = expand_normal (arg0);
35159 op1 = expand_normal (arg1);
35160 op2 = expand_normal (arg2);
35161 if (!REG_P (op0))
35162 op0 = ix86_zero_extend_to_Pmode (op0);
35163 if (!REG_P (op1))
35164 op1 = copy_to_mode_reg (SImode, op1);
35165 if (!REG_P (op2))
35166 op2 = copy_to_mode_reg (SImode, op2);
35167 emit_insn (ix86_gen_monitor (op0, op1, op2));
35168 return 0;
35169
35170 case IX86_BUILTIN_MWAIT:
35171 arg0 = CALL_EXPR_ARG (exp, 0);
35172 arg1 = CALL_EXPR_ARG (exp, 1);
35173 op0 = expand_normal (arg0);
35174 op1 = expand_normal (arg1);
35175 if (!REG_P (op0))
35176 op0 = copy_to_mode_reg (SImode, op0);
35177 if (!REG_P (op1))
35178 op1 = copy_to_mode_reg (SImode, op1);
35179 emit_insn (gen_sse3_mwait (op0, op1));
35180 return 0;
35181
35182 case IX86_BUILTIN_VEC_INIT_V2SI:
35183 case IX86_BUILTIN_VEC_INIT_V4HI:
35184 case IX86_BUILTIN_VEC_INIT_V8QI:
35185 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
35186
35187 case IX86_BUILTIN_VEC_EXT_V2DF:
35188 case IX86_BUILTIN_VEC_EXT_V2DI:
35189 case IX86_BUILTIN_VEC_EXT_V4SF:
35190 case IX86_BUILTIN_VEC_EXT_V4SI:
35191 case IX86_BUILTIN_VEC_EXT_V8HI:
35192 case IX86_BUILTIN_VEC_EXT_V2SI:
35193 case IX86_BUILTIN_VEC_EXT_V4HI:
35194 case IX86_BUILTIN_VEC_EXT_V16QI:
35195 return ix86_expand_vec_ext_builtin (exp, target);
35196
35197 case IX86_BUILTIN_VEC_SET_V2DI:
35198 case IX86_BUILTIN_VEC_SET_V4SF:
35199 case IX86_BUILTIN_VEC_SET_V4SI:
35200 case IX86_BUILTIN_VEC_SET_V8HI:
35201 case IX86_BUILTIN_VEC_SET_V4HI:
35202 case IX86_BUILTIN_VEC_SET_V16QI:
35203 return ix86_expand_vec_set_builtin (exp);
35204
35205 case IX86_BUILTIN_INFQ:
35206 case IX86_BUILTIN_HUGE_VALQ:
35207 {
35208 REAL_VALUE_TYPE inf;
35209 rtx tmp;
35210
35211 real_inf (&inf);
35212 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
35213
35214 tmp = validize_mem (force_const_mem (mode, tmp));
35215
35216 if (target == 0)
35217 target = gen_reg_rtx (mode);
35218
35219 emit_move_insn (target, tmp);
35220 return target;
35221 }
35222
35223 case IX86_BUILTIN_RDPMC:
35224 case IX86_BUILTIN_RDTSC:
35225 case IX86_BUILTIN_RDTSCP:
35226
35227 op0 = gen_reg_rtx (DImode);
35228 op1 = gen_reg_rtx (DImode);
35229
35230 if (fcode == IX86_BUILTIN_RDPMC)
35231 {
35232 arg0 = CALL_EXPR_ARG (exp, 0);
35233 op2 = expand_normal (arg0);
35234 if (!register_operand (op2, SImode))
35235 op2 = copy_to_mode_reg (SImode, op2);
35236
35237 insn = (TARGET_64BIT
35238 ? gen_rdpmc_rex64 (op0, op1, op2)
35239 : gen_rdpmc (op0, op2));
35240 emit_insn (insn);
35241 }
35242 else if (fcode == IX86_BUILTIN_RDTSC)
35243 {
35244 insn = (TARGET_64BIT
35245 ? gen_rdtsc_rex64 (op0, op1)
35246 : gen_rdtsc (op0));
35247 emit_insn (insn);
35248 }
35249 else
35250 {
35251 op2 = gen_reg_rtx (SImode);
35252
35253 insn = (TARGET_64BIT
35254 ? gen_rdtscp_rex64 (op0, op1, op2)
35255 : gen_rdtscp (op0, op2));
35256 emit_insn (insn);
35257
35258 arg0 = CALL_EXPR_ARG (exp, 0);
35259 op4 = expand_normal (arg0);
35260 if (!address_operand (op4, VOIDmode))
35261 {
35262 op4 = convert_memory_address (Pmode, op4);
35263 op4 = copy_addr_to_reg (op4);
35264 }
35265 emit_move_insn (gen_rtx_MEM (SImode, op4), op2);
35266 }
35267
35268 if (target == 0)
35269 {
35270 /* mode is VOIDmode if __builtin_rd* has been called
35271 without lhs. */
35272 if (mode == VOIDmode)
35273 return target;
35274 target = gen_reg_rtx (mode);
35275 }
35276
35277 if (TARGET_64BIT)
35278 {
35279 op1 = expand_simple_binop (DImode, ASHIFT, op1, GEN_INT (32),
35280 op1, 1, OPTAB_DIRECT);
35281 op0 = expand_simple_binop (DImode, IOR, op0, op1,
35282 op0, 1, OPTAB_DIRECT);
35283 }
35284
35285 emit_move_insn (target, op0);
35286 return target;
35287
35288 case IX86_BUILTIN_FXSAVE:
35289 case IX86_BUILTIN_FXRSTOR:
35290 case IX86_BUILTIN_FXSAVE64:
35291 case IX86_BUILTIN_FXRSTOR64:
35292 case IX86_BUILTIN_FNSTENV:
35293 case IX86_BUILTIN_FLDENV:
35294 mode0 = BLKmode;
35295 switch (fcode)
35296 {
35297 case IX86_BUILTIN_FXSAVE:
35298 icode = CODE_FOR_fxsave;
35299 break;
35300 case IX86_BUILTIN_FXRSTOR:
35301 icode = CODE_FOR_fxrstor;
35302 break;
35303 case IX86_BUILTIN_FXSAVE64:
35304 icode = CODE_FOR_fxsave64;
35305 break;
35306 case IX86_BUILTIN_FXRSTOR64:
35307 icode = CODE_FOR_fxrstor64;
35308 break;
35309 case IX86_BUILTIN_FNSTENV:
35310 icode = CODE_FOR_fnstenv;
35311 break;
35312 case IX86_BUILTIN_FLDENV:
35313 icode = CODE_FOR_fldenv;
35314 break;
35315 default:
35316 gcc_unreachable ();
35317 }
35318
35319 arg0 = CALL_EXPR_ARG (exp, 0);
35320 op0 = expand_normal (arg0);
35321
35322 if (!address_operand (op0, VOIDmode))
35323 {
35324 op0 = convert_memory_address (Pmode, op0);
35325 op0 = copy_addr_to_reg (op0);
35326 }
35327 op0 = gen_rtx_MEM (mode0, op0);
35328
35329 pat = GEN_FCN (icode) (op0);
35330 if (pat)
35331 emit_insn (pat);
35332 return 0;
35333
35334 case IX86_BUILTIN_XSAVE:
35335 case IX86_BUILTIN_XRSTOR:
35336 case IX86_BUILTIN_XSAVE64:
35337 case IX86_BUILTIN_XRSTOR64:
35338 case IX86_BUILTIN_XSAVEOPT:
35339 case IX86_BUILTIN_XSAVEOPT64:
35340 case IX86_BUILTIN_XSAVES:
35341 case IX86_BUILTIN_XRSTORS:
35342 case IX86_BUILTIN_XSAVES64:
35343 case IX86_BUILTIN_XRSTORS64:
35344 case IX86_BUILTIN_XSAVEC:
35345 case IX86_BUILTIN_XSAVEC64:
35346 arg0 = CALL_EXPR_ARG (exp, 0);
35347 arg1 = CALL_EXPR_ARG (exp, 1);
35348 op0 = expand_normal (arg0);
35349 op1 = expand_normal (arg1);
35350
35351 if (!address_operand (op0, VOIDmode))
35352 {
35353 op0 = convert_memory_address (Pmode, op0);
35354 op0 = copy_addr_to_reg (op0);
35355 }
35356 op0 = gen_rtx_MEM (BLKmode, op0);
35357
35358 op1 = force_reg (DImode, op1);
35359
35360 if (TARGET_64BIT)
35361 {
35362 op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
35363 NULL, 1, OPTAB_DIRECT);
35364 switch (fcode)
35365 {
35366 case IX86_BUILTIN_XSAVE:
35367 icode = CODE_FOR_xsave_rex64;
35368 break;
35369 case IX86_BUILTIN_XRSTOR:
35370 icode = CODE_FOR_xrstor_rex64;
35371 break;
35372 case IX86_BUILTIN_XSAVE64:
35373 icode = CODE_FOR_xsave64;
35374 break;
35375 case IX86_BUILTIN_XRSTOR64:
35376 icode = CODE_FOR_xrstor64;
35377 break;
35378 case IX86_BUILTIN_XSAVEOPT:
35379 icode = CODE_FOR_xsaveopt_rex64;
35380 break;
35381 case IX86_BUILTIN_XSAVEOPT64:
35382 icode = CODE_FOR_xsaveopt64;
35383 break;
35384 case IX86_BUILTIN_XSAVES:
35385 icode = CODE_FOR_xsaves_rex64;
35386 break;
35387 case IX86_BUILTIN_XRSTORS:
35388 icode = CODE_FOR_xrstors_rex64;
35389 break;
35390 case IX86_BUILTIN_XSAVES64:
35391 icode = CODE_FOR_xsaves64;
35392 break;
35393 case IX86_BUILTIN_XRSTORS64:
35394 icode = CODE_FOR_xrstors64;
35395 break;
35396 case IX86_BUILTIN_XSAVEC:
35397 icode = CODE_FOR_xsavec_rex64;
35398 break;
35399 case IX86_BUILTIN_XSAVEC64:
35400 icode = CODE_FOR_xsavec64;
35401 break;
35402 default:
35403 gcc_unreachable ();
35404 }
35405
35406 op2 = gen_lowpart (SImode, op2);
35407 op1 = gen_lowpart (SImode, op1);
35408 pat = GEN_FCN (icode) (op0, op1, op2);
35409 }
35410 else
35411 {
35412 switch (fcode)
35413 {
35414 case IX86_BUILTIN_XSAVE:
35415 icode = CODE_FOR_xsave;
35416 break;
35417 case IX86_BUILTIN_XRSTOR:
35418 icode = CODE_FOR_xrstor;
35419 break;
35420 case IX86_BUILTIN_XSAVEOPT:
35421 icode = CODE_FOR_xsaveopt;
35422 break;
35423 case IX86_BUILTIN_XSAVES:
35424 icode = CODE_FOR_xsaves;
35425 break;
35426 case IX86_BUILTIN_XRSTORS:
35427 icode = CODE_FOR_xrstors;
35428 break;
35429 case IX86_BUILTIN_XSAVEC:
35430 icode = CODE_FOR_xsavec;
35431 break;
35432 default:
35433 gcc_unreachable ();
35434 }
35435 pat = GEN_FCN (icode) (op0, op1);
35436 }
35437
35438 if (pat)
35439 emit_insn (pat);
35440 return 0;
35441
35442 case IX86_BUILTIN_LLWPCB:
35443 arg0 = CALL_EXPR_ARG (exp, 0);
35444 op0 = expand_normal (arg0);
35445 icode = CODE_FOR_lwp_llwpcb;
35446 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35447 op0 = ix86_zero_extend_to_Pmode (op0);
35448 emit_insn (gen_lwp_llwpcb (op0));
35449 return 0;
35450
35451 case IX86_BUILTIN_SLWPCB:
35452 icode = CODE_FOR_lwp_slwpcb;
35453 if (!target
35454 || !insn_data[icode].operand[0].predicate (target, Pmode))
35455 target = gen_reg_rtx (Pmode);
35456 emit_insn (gen_lwp_slwpcb (target));
35457 return target;
35458
35459 case IX86_BUILTIN_BEXTRI32:
35460 case IX86_BUILTIN_BEXTRI64:
35461 arg0 = CALL_EXPR_ARG (exp, 0);
35462 arg1 = CALL_EXPR_ARG (exp, 1);
35463 op0 = expand_normal (arg0);
35464 op1 = expand_normal (arg1);
35465 icode = (fcode == IX86_BUILTIN_BEXTRI32
35466 ? CODE_FOR_tbm_bextri_si
35467 : CODE_FOR_tbm_bextri_di);
35468 if (!CONST_INT_P (op1))
35469 {
35470 error ("last argument must be an immediate");
35471 return const0_rtx;
35472 }
35473 else
35474 {
35475 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
35476 unsigned char lsb_index = INTVAL (op1) & 0xFF;
35477 op1 = GEN_INT (length);
35478 op2 = GEN_INT (lsb_index);
35479 pat = GEN_FCN (icode) (target, op0, op1, op2);
35480 if (pat)
35481 emit_insn (pat);
35482 return target;
35483 }
35484
35485 case IX86_BUILTIN_RDRAND16_STEP:
35486 icode = CODE_FOR_rdrandhi_1;
35487 mode0 = HImode;
35488 goto rdrand_step;
35489
35490 case IX86_BUILTIN_RDRAND32_STEP:
35491 icode = CODE_FOR_rdrandsi_1;
35492 mode0 = SImode;
35493 goto rdrand_step;
35494
35495 case IX86_BUILTIN_RDRAND64_STEP:
35496 icode = CODE_FOR_rdranddi_1;
35497 mode0 = DImode;
35498
35499 rdrand_step:
35500 op0 = gen_reg_rtx (mode0);
35501 emit_insn (GEN_FCN (icode) (op0));
35502
35503 arg0 = CALL_EXPR_ARG (exp, 0);
35504 op1 = expand_normal (arg0);
35505 if (!address_operand (op1, VOIDmode))
35506 {
35507 op1 = convert_memory_address (Pmode, op1);
35508 op1 = copy_addr_to_reg (op1);
35509 }
35510 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
35511
35512 op1 = gen_reg_rtx (SImode);
35513 emit_move_insn (op1, CONST1_RTX (SImode));
35514
35515 /* Emit SImode conditional move. */
35516 if (mode0 == HImode)
35517 {
35518 op2 = gen_reg_rtx (SImode);
35519 emit_insn (gen_zero_extendhisi2 (op2, op0));
35520 }
35521 else if (mode0 == SImode)
35522 op2 = op0;
35523 else
35524 op2 = gen_rtx_SUBREG (SImode, op0, 0);
35525
35526 if (target == 0
35527 || !register_operand (target, SImode))
35528 target = gen_reg_rtx (SImode);
35529
35530 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
35531 const0_rtx);
35532 emit_insn (gen_rtx_SET (VOIDmode, target,
35533 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
35534 return target;
35535
35536 case IX86_BUILTIN_RDSEED16_STEP:
35537 icode = CODE_FOR_rdseedhi_1;
35538 mode0 = HImode;
35539 goto rdseed_step;
35540
35541 case IX86_BUILTIN_RDSEED32_STEP:
35542 icode = CODE_FOR_rdseedsi_1;
35543 mode0 = SImode;
35544 goto rdseed_step;
35545
35546 case IX86_BUILTIN_RDSEED64_STEP:
35547 icode = CODE_FOR_rdseeddi_1;
35548 mode0 = DImode;
35549
35550 rdseed_step:
35551 op0 = gen_reg_rtx (mode0);
35552 emit_insn (GEN_FCN (icode) (op0));
35553
35554 arg0 = CALL_EXPR_ARG (exp, 0);
35555 op1 = expand_normal (arg0);
35556 if (!address_operand (op1, VOIDmode))
35557 {
35558 op1 = convert_memory_address (Pmode, op1);
35559 op1 = copy_addr_to_reg (op1);
35560 }
35561 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
35562
35563 op2 = gen_reg_rtx (QImode);
35564
35565 pat = gen_rtx_LTU (QImode, gen_rtx_REG (CCCmode, FLAGS_REG),
35566 const0_rtx);
35567 emit_insn (gen_rtx_SET (VOIDmode, op2, pat));
35568
35569 if (target == 0
35570 || !register_operand (target, SImode))
35571 target = gen_reg_rtx (SImode);
35572
35573 emit_insn (gen_zero_extendqisi2 (target, op2));
35574 return target;
35575
35576 case IX86_BUILTIN_ADDCARRYX32:
35577 icode = TARGET_ADX ? CODE_FOR_adcxsi3 : CODE_FOR_addsi3_carry;
35578 mode0 = SImode;
35579 goto addcarryx;
35580
35581 case IX86_BUILTIN_ADDCARRYX64:
35582 icode = TARGET_ADX ? CODE_FOR_adcxdi3 : CODE_FOR_adddi3_carry;
35583 mode0 = DImode;
35584
35585 addcarryx:
35586 arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in. */
35587 arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1. */
35588 arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2. */
35589 arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out. */
35590
35591 op0 = gen_reg_rtx (QImode);
35592
35593 /* Generate CF from input operand. */
35594 op1 = expand_normal (arg0);
35595 op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1));
35596 emit_insn (gen_addqi3_cc (op0, op1, constm1_rtx));
35597
35598 /* Gen ADCX instruction to compute X+Y+CF. */
35599 op2 = expand_normal (arg1);
35600 op3 = expand_normal (arg2);
35601
35602 if (!REG_P (op2))
35603 op2 = copy_to_mode_reg (mode0, op2);
35604 if (!REG_P (op3))
35605 op3 = copy_to_mode_reg (mode0, op3);
35606
35607 op0 = gen_reg_rtx (mode0);
35608
35609 op4 = gen_rtx_REG (CCCmode, FLAGS_REG);
35610 pat = gen_rtx_LTU (VOIDmode, op4, const0_rtx);
35611 emit_insn (GEN_FCN (icode) (op0, op2, op3, op4, pat));
35612
35613 /* Store the result. */
35614 op4 = expand_normal (arg3);
35615 if (!address_operand (op4, VOIDmode))
35616 {
35617 op4 = convert_memory_address (Pmode, op4);
35618 op4 = copy_addr_to_reg (op4);
35619 }
35620 emit_move_insn (gen_rtx_MEM (mode0, op4), op0);
35621
35622 /* Return current CF value. */
35623 if (target == 0)
35624 target = gen_reg_rtx (QImode);
35625
35626 PUT_MODE (pat, QImode);
35627 emit_insn (gen_rtx_SET (VOIDmode, target, pat));
35628 return target;
35629
35630 case IX86_BUILTIN_READ_FLAGS:
35631 emit_insn (gen_push (gen_rtx_REG (word_mode, FLAGS_REG)));
35632
35633 if (optimize
35634 || target == NULL_RTX
35635 || !nonimmediate_operand (target, word_mode)
35636 || GET_MODE (target) != word_mode)
35637 target = gen_reg_rtx (word_mode);
35638
35639 emit_insn (gen_pop (target));
35640 return target;
35641
35642 case IX86_BUILTIN_WRITE_FLAGS:
35643
35644 arg0 = CALL_EXPR_ARG (exp, 0);
35645 op0 = expand_normal (arg0);
35646 if (!general_no_elim_operand (op0, word_mode))
35647 op0 = copy_to_mode_reg (word_mode, op0);
35648
35649 emit_insn (gen_push (op0));
35650 emit_insn (gen_pop (gen_rtx_REG (word_mode, FLAGS_REG)));
35651 return 0;
35652
35653 case IX86_BUILTIN_KORTESTC16:
35654 icode = CODE_FOR_kortestchi;
35655 mode0 = HImode;
35656 mode1 = CCCmode;
35657 goto kortest;
35658
35659 case IX86_BUILTIN_KORTESTZ16:
35660 icode = CODE_FOR_kortestzhi;
35661 mode0 = HImode;
35662 mode1 = CCZmode;
35663
35664 kortest:
35665 arg0 = CALL_EXPR_ARG (exp, 0); /* Mask reg src1. */
35666 arg1 = CALL_EXPR_ARG (exp, 1); /* Mask reg src2. */
35667 op0 = expand_normal (arg0);
35668 op1 = expand_normal (arg1);
35669
35670 op0 = copy_to_reg (op0);
35671 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
35672 op1 = copy_to_reg (op1);
35673 op1 = simplify_gen_subreg (mode0, op1, GET_MODE (op1), 0);
35674
35675 target = gen_reg_rtx (QImode);
35676 emit_insn (gen_rtx_SET (mode0, target, const0_rtx));
35677
35678 /* Emit kortest. */
35679 emit_insn (GEN_FCN (icode) (op0, op1));
35680 /* And use setcc to return result from flags. */
35681 ix86_expand_setcc (target, EQ,
35682 gen_rtx_REG (mode1, FLAGS_REG), const0_rtx);
35683 return target;
35684
35685 case IX86_BUILTIN_GATHERSIV2DF:
35686 icode = CODE_FOR_avx2_gathersiv2df;
35687 goto gather_gen;
35688 case IX86_BUILTIN_GATHERSIV4DF:
35689 icode = CODE_FOR_avx2_gathersiv4df;
35690 goto gather_gen;
35691 case IX86_BUILTIN_GATHERDIV2DF:
35692 icode = CODE_FOR_avx2_gatherdiv2df;
35693 goto gather_gen;
35694 case IX86_BUILTIN_GATHERDIV4DF:
35695 icode = CODE_FOR_avx2_gatherdiv4df;
35696 goto gather_gen;
35697 case IX86_BUILTIN_GATHERSIV4SF:
35698 icode = CODE_FOR_avx2_gathersiv4sf;
35699 goto gather_gen;
35700 case IX86_BUILTIN_GATHERSIV8SF:
35701 icode = CODE_FOR_avx2_gathersiv8sf;
35702 goto gather_gen;
35703 case IX86_BUILTIN_GATHERDIV4SF:
35704 icode = CODE_FOR_avx2_gatherdiv4sf;
35705 goto gather_gen;
35706 case IX86_BUILTIN_GATHERDIV8SF:
35707 icode = CODE_FOR_avx2_gatherdiv8sf;
35708 goto gather_gen;
35709 case IX86_BUILTIN_GATHERSIV2DI:
35710 icode = CODE_FOR_avx2_gathersiv2di;
35711 goto gather_gen;
35712 case IX86_BUILTIN_GATHERSIV4DI:
35713 icode = CODE_FOR_avx2_gathersiv4di;
35714 goto gather_gen;
35715 case IX86_BUILTIN_GATHERDIV2DI:
35716 icode = CODE_FOR_avx2_gatherdiv2di;
35717 goto gather_gen;
35718 case IX86_BUILTIN_GATHERDIV4DI:
35719 icode = CODE_FOR_avx2_gatherdiv4di;
35720 goto gather_gen;
35721 case IX86_BUILTIN_GATHERSIV4SI:
35722 icode = CODE_FOR_avx2_gathersiv4si;
35723 goto gather_gen;
35724 case IX86_BUILTIN_GATHERSIV8SI:
35725 icode = CODE_FOR_avx2_gathersiv8si;
35726 goto gather_gen;
35727 case IX86_BUILTIN_GATHERDIV4SI:
35728 icode = CODE_FOR_avx2_gatherdiv4si;
35729 goto gather_gen;
35730 case IX86_BUILTIN_GATHERDIV8SI:
35731 icode = CODE_FOR_avx2_gatherdiv8si;
35732 goto gather_gen;
35733 case IX86_BUILTIN_GATHERALTSIV4DF:
35734 icode = CODE_FOR_avx2_gathersiv4df;
35735 goto gather_gen;
35736 case IX86_BUILTIN_GATHERALTDIV8SF:
35737 icode = CODE_FOR_avx2_gatherdiv8sf;
35738 goto gather_gen;
35739 case IX86_BUILTIN_GATHERALTSIV4DI:
35740 icode = CODE_FOR_avx2_gathersiv4di;
35741 goto gather_gen;
35742 case IX86_BUILTIN_GATHERALTDIV8SI:
35743 icode = CODE_FOR_avx2_gatherdiv8si;
35744 goto gather_gen;
35745 case IX86_BUILTIN_GATHER3SIV16SF:
35746 icode = CODE_FOR_avx512f_gathersiv16sf;
35747 goto gather_gen;
35748 case IX86_BUILTIN_GATHER3SIV8DF:
35749 icode = CODE_FOR_avx512f_gathersiv8df;
35750 goto gather_gen;
35751 case IX86_BUILTIN_GATHER3DIV16SF:
35752 icode = CODE_FOR_avx512f_gatherdiv16sf;
35753 goto gather_gen;
35754 case IX86_BUILTIN_GATHER3DIV8DF:
35755 icode = CODE_FOR_avx512f_gatherdiv8df;
35756 goto gather_gen;
35757 case IX86_BUILTIN_GATHER3SIV16SI:
35758 icode = CODE_FOR_avx512f_gathersiv16si;
35759 goto gather_gen;
35760 case IX86_BUILTIN_GATHER3SIV8DI:
35761 icode = CODE_FOR_avx512f_gathersiv8di;
35762 goto gather_gen;
35763 case IX86_BUILTIN_GATHER3DIV16SI:
35764 icode = CODE_FOR_avx512f_gatherdiv16si;
35765 goto gather_gen;
35766 case IX86_BUILTIN_GATHER3DIV8DI:
35767 icode = CODE_FOR_avx512f_gatherdiv8di;
35768 goto gather_gen;
35769 case IX86_BUILTIN_GATHER3ALTSIV8DF:
35770 icode = CODE_FOR_avx512f_gathersiv8df;
35771 goto gather_gen;
35772 case IX86_BUILTIN_GATHER3ALTDIV16SF:
35773 icode = CODE_FOR_avx512f_gatherdiv16sf;
35774 goto gather_gen;
35775 case IX86_BUILTIN_GATHER3ALTSIV8DI:
35776 icode = CODE_FOR_avx512f_gathersiv8di;
35777 goto gather_gen;
35778 case IX86_BUILTIN_GATHER3ALTDIV16SI:
35779 icode = CODE_FOR_avx512f_gatherdiv16si;
35780 goto gather_gen;
35781 case IX86_BUILTIN_SCATTERSIV16SF:
35782 icode = CODE_FOR_avx512f_scattersiv16sf;
35783 goto scatter_gen;
35784 case IX86_BUILTIN_SCATTERSIV8DF:
35785 icode = CODE_FOR_avx512f_scattersiv8df;
35786 goto scatter_gen;
35787 case IX86_BUILTIN_SCATTERDIV16SF:
35788 icode = CODE_FOR_avx512f_scatterdiv16sf;
35789 goto scatter_gen;
35790 case IX86_BUILTIN_SCATTERDIV8DF:
35791 icode = CODE_FOR_avx512f_scatterdiv8df;
35792 goto scatter_gen;
35793 case IX86_BUILTIN_SCATTERSIV16SI:
35794 icode = CODE_FOR_avx512f_scattersiv16si;
35795 goto scatter_gen;
35796 case IX86_BUILTIN_SCATTERSIV8DI:
35797 icode = CODE_FOR_avx512f_scattersiv8di;
35798 goto scatter_gen;
35799 case IX86_BUILTIN_SCATTERDIV16SI:
35800 icode = CODE_FOR_avx512f_scatterdiv16si;
35801 goto scatter_gen;
35802 case IX86_BUILTIN_SCATTERDIV8DI:
35803 icode = CODE_FOR_avx512f_scatterdiv8di;
35804 goto scatter_gen;
35805
35806 case IX86_BUILTIN_GATHERPFDPD:
35807 icode = CODE_FOR_avx512pf_gatherpfv8sidf;
35808 goto vec_prefetch_gen;
35809 case IX86_BUILTIN_GATHERPFDPS:
35810 icode = CODE_FOR_avx512pf_gatherpfv16sisf;
35811 goto vec_prefetch_gen;
35812 case IX86_BUILTIN_GATHERPFQPD:
35813 icode = CODE_FOR_avx512pf_gatherpfv8didf;
35814 goto vec_prefetch_gen;
35815 case IX86_BUILTIN_GATHERPFQPS:
35816 icode = CODE_FOR_avx512pf_gatherpfv8disf;
35817 goto vec_prefetch_gen;
35818 case IX86_BUILTIN_SCATTERPFDPD:
35819 icode = CODE_FOR_avx512pf_scatterpfv8sidf;
35820 goto vec_prefetch_gen;
35821 case IX86_BUILTIN_SCATTERPFDPS:
35822 icode = CODE_FOR_avx512pf_scatterpfv16sisf;
35823 goto vec_prefetch_gen;
35824 case IX86_BUILTIN_SCATTERPFQPD:
35825 icode = CODE_FOR_avx512pf_scatterpfv8didf;
35826 goto vec_prefetch_gen;
35827 case IX86_BUILTIN_SCATTERPFQPS:
35828 icode = CODE_FOR_avx512pf_scatterpfv8disf;
35829 goto vec_prefetch_gen;
35830
35831 gather_gen:
35832 rtx half;
35833 rtx (*gen) (rtx, rtx);
35834
35835 arg0 = CALL_EXPR_ARG (exp, 0);
35836 arg1 = CALL_EXPR_ARG (exp, 1);
35837 arg2 = CALL_EXPR_ARG (exp, 2);
35838 arg3 = CALL_EXPR_ARG (exp, 3);
35839 arg4 = CALL_EXPR_ARG (exp, 4);
35840 op0 = expand_normal (arg0);
35841 op1 = expand_normal (arg1);
35842 op2 = expand_normal (arg2);
35843 op3 = expand_normal (arg3);
35844 op4 = expand_normal (arg4);
35845 /* Note the arg order is different from the operand order. */
35846 mode0 = insn_data[icode].operand[1].mode;
35847 mode2 = insn_data[icode].operand[3].mode;
35848 mode3 = insn_data[icode].operand[4].mode;
35849 mode4 = insn_data[icode].operand[5].mode;
35850
35851 if (target == NULL_RTX
35852 || GET_MODE (target) != insn_data[icode].operand[0].mode
35853 || !insn_data[icode].operand[0].predicate (target,
35854 GET_MODE (target)))
35855 subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
35856 else
35857 subtarget = target;
35858
35859 switch (fcode)
35860 {
35861 case IX86_BUILTIN_GATHER3ALTSIV8DF:
35862 case IX86_BUILTIN_GATHER3ALTSIV8DI:
35863 half = gen_reg_rtx (V8SImode);
35864 if (!nonimmediate_operand (op2, V16SImode))
35865 op2 = copy_to_mode_reg (V16SImode, op2);
35866 emit_insn (gen_vec_extract_lo_v16si (half, op2));
35867 op2 = half;
35868 break;
35869 case IX86_BUILTIN_GATHERALTSIV4DF:
35870 case IX86_BUILTIN_GATHERALTSIV4DI:
35871 half = gen_reg_rtx (V4SImode);
35872 if (!nonimmediate_operand (op2, V8SImode))
35873 op2 = copy_to_mode_reg (V8SImode, op2);
35874 emit_insn (gen_vec_extract_lo_v8si (half, op2));
35875 op2 = half;
35876 break;
35877 case IX86_BUILTIN_GATHER3ALTDIV16SF:
35878 case IX86_BUILTIN_GATHER3ALTDIV16SI:
35879 half = gen_reg_rtx (mode0);
35880 if (mode0 == V8SFmode)
35881 gen = gen_vec_extract_lo_v16sf;
35882 else
35883 gen = gen_vec_extract_lo_v16si;
35884 if (!nonimmediate_operand (op0, GET_MODE (op0)))
35885 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
35886 emit_insn (gen (half, op0));
35887 op0 = half;
35888 if (GET_MODE (op3) != VOIDmode)
35889 {
35890 if (!nonimmediate_operand (op3, GET_MODE (op3)))
35891 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
35892 emit_insn (gen (half, op3));
35893 op3 = half;
35894 }
35895 break;
35896 case IX86_BUILTIN_GATHERALTDIV8SF:
35897 case IX86_BUILTIN_GATHERALTDIV8SI:
35898 half = gen_reg_rtx (mode0);
35899 if (mode0 == V4SFmode)
35900 gen = gen_vec_extract_lo_v8sf;
35901 else
35902 gen = gen_vec_extract_lo_v8si;
35903 if (!nonimmediate_operand (op0, GET_MODE (op0)))
35904 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
35905 emit_insn (gen (half, op0));
35906 op0 = half;
35907 if (GET_MODE (op3) != VOIDmode)
35908 {
35909 if (!nonimmediate_operand (op3, GET_MODE (op3)))
35910 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
35911 emit_insn (gen (half, op3));
35912 op3 = half;
35913 }
35914 break;
35915 default:
35916 break;
35917 }
35918
35919 /* Force memory operand only with base register here. But we
35920 don't want to do it on memory operand for other builtin
35921 functions. */
35922 op1 = ix86_zero_extend_to_Pmode (op1);
35923
35924 if (!insn_data[icode].operand[1].predicate (op0, mode0))
35925 op0 = copy_to_mode_reg (mode0, op0);
35926 if (!insn_data[icode].operand[2].predicate (op1, Pmode))
35927 op1 = copy_to_mode_reg (Pmode, op1);
35928 if (!insn_data[icode].operand[3].predicate (op2, mode2))
35929 op2 = copy_to_mode_reg (mode2, op2);
35930 if (GET_MODE (op3) == mode3 || GET_MODE (op3) == VOIDmode)
35931 {
35932 if (!insn_data[icode].operand[4].predicate (op3, mode3))
35933 op3 = copy_to_mode_reg (mode3, op3);
35934 }
35935 else
35936 {
35937 op3 = copy_to_reg (op3);
35938 op3 = simplify_gen_subreg (mode3, op3, GET_MODE (op3), 0);
35939 }
35940 if (!insn_data[icode].operand[5].predicate (op4, mode4))
35941 {
35942 error ("the last argument must be scale 1, 2, 4, 8");
35943 return const0_rtx;
35944 }
35945
35946 /* Optimize. If mask is known to have all high bits set,
35947 replace op0 with pc_rtx to signal that the instruction
35948 overwrites the whole destination and doesn't use its
35949 previous contents. */
35950 if (optimize)
35951 {
35952 if (TREE_CODE (arg3) == INTEGER_CST)
35953 {
35954 if (integer_all_onesp (arg3))
35955 op0 = pc_rtx;
35956 }
35957 else if (TREE_CODE (arg3) == VECTOR_CST)
35958 {
35959 unsigned int negative = 0;
35960 for (i = 0; i < VECTOR_CST_NELTS (arg3); ++i)
35961 {
35962 tree cst = VECTOR_CST_ELT (arg3, i);
35963 if (TREE_CODE (cst) == INTEGER_CST
35964 && tree_int_cst_sign_bit (cst))
35965 negative++;
35966 else if (TREE_CODE (cst) == REAL_CST
35967 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
35968 negative++;
35969 }
35970 if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
35971 op0 = pc_rtx;
35972 }
35973 else if (TREE_CODE (arg3) == SSA_NAME
35974 && TREE_CODE (TREE_TYPE (arg3)) == VECTOR_TYPE)
35975 {
35976 /* Recognize also when mask is like:
35977 __v2df src = _mm_setzero_pd ();
35978 __v2df mask = _mm_cmpeq_pd (src, src);
35979 or
35980 __v8sf src = _mm256_setzero_ps ();
35981 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
35982 as that is a cheaper way to load all ones into
35983 a register than having to load a constant from
35984 memory. */
35985 gimple def_stmt = SSA_NAME_DEF_STMT (arg3);
35986 if (is_gimple_call (def_stmt))
35987 {
35988 tree fndecl = gimple_call_fndecl (def_stmt);
35989 if (fndecl
35990 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
35991 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
35992 {
35993 case IX86_BUILTIN_CMPPD:
35994 case IX86_BUILTIN_CMPPS:
35995 case IX86_BUILTIN_CMPPD256:
35996 case IX86_BUILTIN_CMPPS256:
35997 if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
35998 break;
35999 /* FALLTHRU */
36000 case IX86_BUILTIN_CMPEQPD:
36001 case IX86_BUILTIN_CMPEQPS:
36002 if (initializer_zerop (gimple_call_arg (def_stmt, 0))
36003 && initializer_zerop (gimple_call_arg (def_stmt,
36004 1)))
36005 op0 = pc_rtx;
36006 break;
36007 default:
36008 break;
36009 }
36010 }
36011 }
36012 }
36013
36014 pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
36015 if (! pat)
36016 return const0_rtx;
36017 emit_insn (pat);
36018
36019 switch (fcode)
36020 {
36021 case IX86_BUILTIN_GATHER3DIV16SF:
36022 if (target == NULL_RTX)
36023 target = gen_reg_rtx (V8SFmode);
36024 emit_insn (gen_vec_extract_lo_v16sf (target, subtarget));
36025 break;
36026 case IX86_BUILTIN_GATHER3DIV16SI:
36027 if (target == NULL_RTX)
36028 target = gen_reg_rtx (V8SImode);
36029 emit_insn (gen_vec_extract_lo_v16si (target, subtarget));
36030 break;
36031 case IX86_BUILTIN_GATHERDIV8SF:
36032 if (target == NULL_RTX)
36033 target = gen_reg_rtx (V4SFmode);
36034 emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
36035 break;
36036 case IX86_BUILTIN_GATHERDIV8SI:
36037 if (target == NULL_RTX)
36038 target = gen_reg_rtx (V4SImode);
36039 emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
36040 break;
36041 default:
36042 target = subtarget;
36043 break;
36044 }
36045 return target;
36046
36047 scatter_gen:
36048 arg0 = CALL_EXPR_ARG (exp, 0);
36049 arg1 = CALL_EXPR_ARG (exp, 1);
36050 arg2 = CALL_EXPR_ARG (exp, 2);
36051 arg3 = CALL_EXPR_ARG (exp, 3);
36052 arg4 = CALL_EXPR_ARG (exp, 4);
36053 op0 = expand_normal (arg0);
36054 op1 = expand_normal (arg1);
36055 op2 = expand_normal (arg2);
36056 op3 = expand_normal (arg3);
36057 op4 = expand_normal (arg4);
36058 mode1 = insn_data[icode].operand[1].mode;
36059 mode2 = insn_data[icode].operand[2].mode;
36060 mode3 = insn_data[icode].operand[3].mode;
36061 mode4 = insn_data[icode].operand[4].mode;
36062
36063 /* Force memory operand only with base register here. But we
36064 don't want to do it on memory operand for other builtin
36065 functions. */
36066 op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1));
36067
36068 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
36069 op0 = copy_to_mode_reg (Pmode, op0);
36070
36071 if (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
36072 {
36073 if (!insn_data[icode].operand[1].predicate (op1, mode1))
36074 op1 = copy_to_mode_reg (mode1, op1);
36075 }
36076 else
36077 {
36078 op1 = copy_to_reg (op1);
36079 op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
36080 }
36081
36082 if (!insn_data[icode].operand[2].predicate (op2, mode2))
36083 op2 = copy_to_mode_reg (mode2, op2);
36084
36085 if (!insn_data[icode].operand[3].predicate (op3, mode3))
36086 op3 = copy_to_mode_reg (mode3, op3);
36087
36088 if (!insn_data[icode].operand[4].predicate (op4, mode4))
36089 {
36090 error ("the last argument must be scale 1, 2, 4, 8");
36091 return const0_rtx;
36092 }
36093
36094 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
36095 if (! pat)
36096 return const0_rtx;
36097
36098 emit_insn (pat);
36099 return 0;
36100
36101 vec_prefetch_gen:
36102 arg0 = CALL_EXPR_ARG (exp, 0);
36103 arg1 = CALL_EXPR_ARG (exp, 1);
36104 arg2 = CALL_EXPR_ARG (exp, 2);
36105 arg3 = CALL_EXPR_ARG (exp, 3);
36106 arg4 = CALL_EXPR_ARG (exp, 4);
36107 op0 = expand_normal (arg0);
36108 op1 = expand_normal (arg1);
36109 op2 = expand_normal (arg2);
36110 op3 = expand_normal (arg3);
36111 op4 = expand_normal (arg4);
36112 mode0 = insn_data[icode].operand[0].mode;
36113 mode1 = insn_data[icode].operand[1].mode;
36114 mode3 = insn_data[icode].operand[3].mode;
36115 mode4 = insn_data[icode].operand[4].mode;
36116
36117 if (GET_MODE (op0) == mode0
36118 || (GET_MODE (op0) == VOIDmode && op0 != constm1_rtx))
36119 {
36120 if (!insn_data[icode].operand[0].predicate (op0, mode0))
36121 op0 = copy_to_mode_reg (mode0, op0);
36122 }
36123 else if (op0 != constm1_rtx)
36124 {
36125 op0 = copy_to_reg (op0);
36126 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
36127 }
36128
36129 if (!insn_data[icode].operand[1].predicate (op1, mode1))
36130 op1 = copy_to_mode_reg (mode1, op1);
36131
36132 /* Force memory operand only with base register here. But we
36133 don't want to do it on memory operand for other builtin
36134 functions. */
36135 op2 = force_reg (Pmode, convert_to_mode (Pmode, op2, 1));
36136
36137 if (!insn_data[icode].operand[2].predicate (op2, Pmode))
36138 op2 = copy_to_mode_reg (Pmode, op2);
36139
36140 if (!insn_data[icode].operand[3].predicate (op3, mode3))
36141 {
36142 error ("the forth argument must be scale 1, 2, 4, 8");
36143 return const0_rtx;
36144 }
36145
36146 if (!insn_data[icode].operand[4].predicate (op4, mode4))
36147 {
36148 error ("incorrect hint operand");
36149 return const0_rtx;
36150 }
36151
36152 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
36153 if (! pat)
36154 return const0_rtx;
36155
36156 emit_insn (pat);
36157
36158 return 0;
36159
36160 case IX86_BUILTIN_XABORT:
36161 icode = CODE_FOR_xabort;
36162 arg0 = CALL_EXPR_ARG (exp, 0);
36163 op0 = expand_normal (arg0);
36164 mode0 = insn_data[icode].operand[0].mode;
36165 if (!insn_data[icode].operand[0].predicate (op0, mode0))
36166 {
36167 error ("the xabort's argument must be an 8-bit immediate");
36168 return const0_rtx;
36169 }
36170 emit_insn (gen_xabort (op0));
36171 return 0;
36172
36173 default:
36174 break;
36175 }
36176
36177 for (i = 0, d = bdesc_special_args;
36178 i < ARRAY_SIZE (bdesc_special_args);
36179 i++, d++)
36180 if (d->code == fcode)
36181 return ix86_expand_special_args_builtin (d, exp, target);
36182
36183 for (i = 0, d = bdesc_args;
36184 i < ARRAY_SIZE (bdesc_args);
36185 i++, d++)
36186 if (d->code == fcode)
36187 switch (fcode)
36188 {
36189 case IX86_BUILTIN_FABSQ:
36190 case IX86_BUILTIN_COPYSIGNQ:
36191 if (!TARGET_SSE)
36192 /* Emit a normal call if SSE isn't available. */
36193 return expand_call (exp, target, ignore);
36194 default:
36195 return ix86_expand_args_builtin (d, exp, target);
36196 }
36197
36198 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
36199 if (d->code == fcode)
36200 return ix86_expand_sse_comi (d, exp, target);
36201
36202 for (i = 0, d = bdesc_round_args; i < ARRAY_SIZE (bdesc_round_args); i++, d++)
36203 if (d->code == fcode)
36204 return ix86_expand_round_builtin (d, exp, target);
36205
36206 for (i = 0, d = bdesc_pcmpestr;
36207 i < ARRAY_SIZE (bdesc_pcmpestr);
36208 i++, d++)
36209 if (d->code == fcode)
36210 return ix86_expand_sse_pcmpestr (d, exp, target);
36211
36212 for (i = 0, d = bdesc_pcmpistr;
36213 i < ARRAY_SIZE (bdesc_pcmpistr);
36214 i++, d++)
36215 if (d->code == fcode)
36216 return ix86_expand_sse_pcmpistr (d, exp, target);
36217
36218 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
36219 if (d->code == fcode)
36220 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
36221 (enum ix86_builtin_func_type)
36222 d->flag, d->comparison);
36223
36224 gcc_unreachable ();
36225 }
36226
36227 /* This returns the target-specific builtin with code CODE if
36228 current_function_decl has visibility on this builtin, which is checked
36229 using isa flags. Returns NULL_TREE otherwise. */
36230
36231 static tree ix86_get_builtin (enum ix86_builtins code)
36232 {
36233 struct cl_target_option *opts;
36234 tree target_tree = NULL_TREE;
36235
36236 /* Determine the isa flags of current_function_decl. */
36237
36238 if (current_function_decl)
36239 target_tree = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl);
36240
36241 if (target_tree == NULL)
36242 target_tree = target_option_default_node;
36243
36244 opts = TREE_TARGET_OPTION (target_tree);
36245
36246 if (ix86_builtins_isa[(int) code].isa & opts->x_ix86_isa_flags)
36247 return ix86_builtin_decl (code, true);
36248 else
36249 return NULL_TREE;
36250 }
36251
36252 /* Returns a function decl for a vectorized version of the builtin function
36253 with builtin function code FN and the result vector type TYPE, or NULL_TREE
36254 if it is not available. */
36255
36256 static tree
36257 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
36258 tree type_in)
36259 {
36260 enum machine_mode in_mode, out_mode;
36261 int in_n, out_n;
36262 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
36263
36264 if (TREE_CODE (type_out) != VECTOR_TYPE
36265 || TREE_CODE (type_in) != VECTOR_TYPE
36266 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
36267 return NULL_TREE;
36268
36269 out_mode = TYPE_MODE (TREE_TYPE (type_out));
36270 out_n = TYPE_VECTOR_SUBPARTS (type_out);
36271 in_mode = TYPE_MODE (TREE_TYPE (type_in));
36272 in_n = TYPE_VECTOR_SUBPARTS (type_in);
36273
36274 switch (fn)
36275 {
36276 case BUILT_IN_SQRT:
36277 if (out_mode == DFmode && in_mode == DFmode)
36278 {
36279 if (out_n == 2 && in_n == 2)
36280 return ix86_get_builtin (IX86_BUILTIN_SQRTPD);
36281 else if (out_n == 4 && in_n == 4)
36282 return ix86_get_builtin (IX86_BUILTIN_SQRTPD256);
36283 else if (out_n == 8 && in_n == 8)
36284 return ix86_get_builtin (IX86_BUILTIN_SQRTPD512);
36285 }
36286 break;
36287
36288 case BUILT_IN_EXP2F:
36289 if (out_mode == SFmode && in_mode == SFmode)
36290 {
36291 if (out_n == 16 && in_n == 16)
36292 return ix86_get_builtin (IX86_BUILTIN_EXP2PS);
36293 }
36294 break;
36295
36296 case BUILT_IN_SQRTF:
36297 if (out_mode == SFmode && in_mode == SFmode)
36298 {
36299 if (out_n == 4 && in_n == 4)
36300 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR);
36301 else if (out_n == 8 && in_n == 8)
36302 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR256);
36303 else if (out_n == 16 && in_n == 16)
36304 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR512);
36305 }
36306 break;
36307
36308 case BUILT_IN_IFLOOR:
36309 case BUILT_IN_LFLOOR:
36310 case BUILT_IN_LLFLOOR:
36311 /* The round insn does not trap on denormals. */
36312 if (flag_trapping_math || !TARGET_ROUND)
36313 break;
36314
36315 if (out_mode == SImode && in_mode == DFmode)
36316 {
36317 if (out_n == 4 && in_n == 2)
36318 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX);
36319 else if (out_n == 8 && in_n == 4)
36320 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256);
36321 else if (out_n == 16 && in_n == 8)
36322 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512);
36323 }
36324 break;
36325
36326 case BUILT_IN_IFLOORF:
36327 case BUILT_IN_LFLOORF:
36328 case BUILT_IN_LLFLOORF:
36329 /* The round insn does not trap on denormals. */
36330 if (flag_trapping_math || !TARGET_ROUND)
36331 break;
36332
36333 if (out_mode == SImode && in_mode == SFmode)
36334 {
36335 if (out_n == 4 && in_n == 4)
36336 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX);
36337 else if (out_n == 8 && in_n == 8)
36338 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256);
36339 }
36340 break;
36341
36342 case BUILT_IN_ICEIL:
36343 case BUILT_IN_LCEIL:
36344 case BUILT_IN_LLCEIL:
36345 /* The round insn does not trap on denormals. */
36346 if (flag_trapping_math || !TARGET_ROUND)
36347 break;
36348
36349 if (out_mode == SImode && in_mode == DFmode)
36350 {
36351 if (out_n == 4 && in_n == 2)
36352 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX);
36353 else if (out_n == 8 && in_n == 4)
36354 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256);
36355 else if (out_n == 16 && in_n == 8)
36356 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512);
36357 }
36358 break;
36359
36360 case BUILT_IN_ICEILF:
36361 case BUILT_IN_LCEILF:
36362 case BUILT_IN_LLCEILF:
36363 /* The round insn does not trap on denormals. */
36364 if (flag_trapping_math || !TARGET_ROUND)
36365 break;
36366
36367 if (out_mode == SImode && in_mode == SFmode)
36368 {
36369 if (out_n == 4 && in_n == 4)
36370 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX);
36371 else if (out_n == 8 && in_n == 8)
36372 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256);
36373 }
36374 break;
36375
36376 case BUILT_IN_IRINT:
36377 case BUILT_IN_LRINT:
36378 case BUILT_IN_LLRINT:
36379 if (out_mode == SImode && in_mode == DFmode)
36380 {
36381 if (out_n == 4 && in_n == 2)
36382 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX);
36383 else if (out_n == 8 && in_n == 4)
36384 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256);
36385 }
36386 break;
36387
36388 case BUILT_IN_IRINTF:
36389 case BUILT_IN_LRINTF:
36390 case BUILT_IN_LLRINTF:
36391 if (out_mode == SImode && in_mode == SFmode)
36392 {
36393 if (out_n == 4 && in_n == 4)
36394 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ);
36395 else if (out_n == 8 && in_n == 8)
36396 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256);
36397 }
36398 break;
36399
36400 case BUILT_IN_IROUND:
36401 case BUILT_IN_LROUND:
36402 case BUILT_IN_LLROUND:
36403 /* The round insn does not trap on denormals. */
36404 if (flag_trapping_math || !TARGET_ROUND)
36405 break;
36406
36407 if (out_mode == SImode && in_mode == DFmode)
36408 {
36409 if (out_n == 4 && in_n == 2)
36410 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX);
36411 else if (out_n == 8 && in_n == 4)
36412 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256);
36413 else if (out_n == 16 && in_n == 8)
36414 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512);
36415 }
36416 break;
36417
36418 case BUILT_IN_IROUNDF:
36419 case BUILT_IN_LROUNDF:
36420 case BUILT_IN_LLROUNDF:
36421 /* The round insn does not trap on denormals. */
36422 if (flag_trapping_math || !TARGET_ROUND)
36423 break;
36424
36425 if (out_mode == SImode && in_mode == SFmode)
36426 {
36427 if (out_n == 4 && in_n == 4)
36428 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX);
36429 else if (out_n == 8 && in_n == 8)
36430 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256);
36431 }
36432 break;
36433
36434 case BUILT_IN_COPYSIGN:
36435 if (out_mode == DFmode && in_mode == DFmode)
36436 {
36437 if (out_n == 2 && in_n == 2)
36438 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD);
36439 else if (out_n == 4 && in_n == 4)
36440 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD256);
36441 else if (out_n == 8 && in_n == 8)
36442 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD512);
36443 }
36444 break;
36445
36446 case BUILT_IN_COPYSIGNF:
36447 if (out_mode == SFmode && in_mode == SFmode)
36448 {
36449 if (out_n == 4 && in_n == 4)
36450 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS);
36451 else if (out_n == 8 && in_n == 8)
36452 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS256);
36453 else if (out_n == 16 && in_n == 16)
36454 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS512);
36455 }
36456 break;
36457
36458 case BUILT_IN_FLOOR:
36459 /* The round insn does not trap on denormals. */
36460 if (flag_trapping_math || !TARGET_ROUND)
36461 break;
36462
36463 if (out_mode == DFmode && in_mode == DFmode)
36464 {
36465 if (out_n == 2 && in_n == 2)
36466 return ix86_get_builtin (IX86_BUILTIN_FLOORPD);
36467 else if (out_n == 4 && in_n == 4)
36468 return ix86_get_builtin (IX86_BUILTIN_FLOORPD256);
36469 }
36470 break;
36471
36472 case BUILT_IN_FLOORF:
36473 /* The round insn does not trap on denormals. */
36474 if (flag_trapping_math || !TARGET_ROUND)
36475 break;
36476
36477 if (out_mode == SFmode && in_mode == SFmode)
36478 {
36479 if (out_n == 4 && in_n == 4)
36480 return ix86_get_builtin (IX86_BUILTIN_FLOORPS);
36481 else if (out_n == 8 && in_n == 8)
36482 return ix86_get_builtin (IX86_BUILTIN_FLOORPS256);
36483 }
36484 break;
36485
36486 case BUILT_IN_CEIL:
36487 /* The round insn does not trap on denormals. */
36488 if (flag_trapping_math || !TARGET_ROUND)
36489 break;
36490
36491 if (out_mode == DFmode && in_mode == DFmode)
36492 {
36493 if (out_n == 2 && in_n == 2)
36494 return ix86_get_builtin (IX86_BUILTIN_CEILPD);
36495 else if (out_n == 4 && in_n == 4)
36496 return ix86_get_builtin (IX86_BUILTIN_CEILPD256);
36497 }
36498 break;
36499
36500 case BUILT_IN_CEILF:
36501 /* The round insn does not trap on denormals. */
36502 if (flag_trapping_math || !TARGET_ROUND)
36503 break;
36504
36505 if (out_mode == SFmode && in_mode == SFmode)
36506 {
36507 if (out_n == 4 && in_n == 4)
36508 return ix86_get_builtin (IX86_BUILTIN_CEILPS);
36509 else if (out_n == 8 && in_n == 8)
36510 return ix86_get_builtin (IX86_BUILTIN_CEILPS256);
36511 }
36512 break;
36513
36514 case BUILT_IN_TRUNC:
36515 /* The round insn does not trap on denormals. */
36516 if (flag_trapping_math || !TARGET_ROUND)
36517 break;
36518
36519 if (out_mode == DFmode && in_mode == DFmode)
36520 {
36521 if (out_n == 2 && in_n == 2)
36522 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD);
36523 else if (out_n == 4 && in_n == 4)
36524 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256);
36525 }
36526 break;
36527
36528 case BUILT_IN_TRUNCF:
36529 /* The round insn does not trap on denormals. */
36530 if (flag_trapping_math || !TARGET_ROUND)
36531 break;
36532
36533 if (out_mode == SFmode && in_mode == SFmode)
36534 {
36535 if (out_n == 4 && in_n == 4)
36536 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS);
36537 else if (out_n == 8 && in_n == 8)
36538 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256);
36539 }
36540 break;
36541
36542 case BUILT_IN_RINT:
36543 /* The round insn does not trap on denormals. */
36544 if (flag_trapping_math || !TARGET_ROUND)
36545 break;
36546
36547 if (out_mode == DFmode && in_mode == DFmode)
36548 {
36549 if (out_n == 2 && in_n == 2)
36550 return ix86_get_builtin (IX86_BUILTIN_RINTPD);
36551 else if (out_n == 4 && in_n == 4)
36552 return ix86_get_builtin (IX86_BUILTIN_RINTPD256);
36553 }
36554 break;
36555
36556 case BUILT_IN_RINTF:
36557 /* The round insn does not trap on denormals. */
36558 if (flag_trapping_math || !TARGET_ROUND)
36559 break;
36560
36561 if (out_mode == SFmode && in_mode == SFmode)
36562 {
36563 if (out_n == 4 && in_n == 4)
36564 return ix86_get_builtin (IX86_BUILTIN_RINTPS);
36565 else if (out_n == 8 && in_n == 8)
36566 return ix86_get_builtin (IX86_BUILTIN_RINTPS256);
36567 }
36568 break;
36569
36570 case BUILT_IN_ROUND:
36571 /* The round insn does not trap on denormals. */
36572 if (flag_trapping_math || !TARGET_ROUND)
36573 break;
36574
36575 if (out_mode == DFmode && in_mode == DFmode)
36576 {
36577 if (out_n == 2 && in_n == 2)
36578 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ);
36579 else if (out_n == 4 && in_n == 4)
36580 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ256);
36581 }
36582 break;
36583
36584 case BUILT_IN_ROUNDF:
36585 /* The round insn does not trap on denormals. */
36586 if (flag_trapping_math || !TARGET_ROUND)
36587 break;
36588
36589 if (out_mode == SFmode && in_mode == SFmode)
36590 {
36591 if (out_n == 4 && in_n == 4)
36592 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ);
36593 else if (out_n == 8 && in_n == 8)
36594 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ256);
36595 }
36596 break;
36597
36598 case BUILT_IN_FMA:
36599 if (out_mode == DFmode && in_mode == DFmode)
36600 {
36601 if (out_n == 2 && in_n == 2)
36602 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD);
36603 if (out_n == 4 && in_n == 4)
36604 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD256);
36605 }
36606 break;
36607
36608 case BUILT_IN_FMAF:
36609 if (out_mode == SFmode && in_mode == SFmode)
36610 {
36611 if (out_n == 4 && in_n == 4)
36612 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS);
36613 if (out_n == 8 && in_n == 8)
36614 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS256);
36615 }
36616 break;
36617
36618 default:
36619 break;
36620 }
36621
36622 /* Dispatch to a handler for a vectorization library. */
36623 if (ix86_veclib_handler)
36624 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
36625 type_in);
36626
36627 return NULL_TREE;
36628 }
36629
36630 /* Handler for an SVML-style interface to
36631 a library with vectorized intrinsics. */
36632
36633 static tree
36634 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
36635 {
36636 char name[20];
36637 tree fntype, new_fndecl, args;
36638 unsigned arity;
36639 const char *bname;
36640 enum machine_mode el_mode, in_mode;
36641 int n, in_n;
36642
36643 /* The SVML is suitable for unsafe math only. */
36644 if (!flag_unsafe_math_optimizations)
36645 return NULL_TREE;
36646
36647 el_mode = TYPE_MODE (TREE_TYPE (type_out));
36648 n = TYPE_VECTOR_SUBPARTS (type_out);
36649 in_mode = TYPE_MODE (TREE_TYPE (type_in));
36650 in_n = TYPE_VECTOR_SUBPARTS (type_in);
36651 if (el_mode != in_mode
36652 || n != in_n)
36653 return NULL_TREE;
36654
36655 switch (fn)
36656 {
36657 case BUILT_IN_EXP:
36658 case BUILT_IN_LOG:
36659 case BUILT_IN_LOG10:
36660 case BUILT_IN_POW:
36661 case BUILT_IN_TANH:
36662 case BUILT_IN_TAN:
36663 case BUILT_IN_ATAN:
36664 case BUILT_IN_ATAN2:
36665 case BUILT_IN_ATANH:
36666 case BUILT_IN_CBRT:
36667 case BUILT_IN_SINH:
36668 case BUILT_IN_SIN:
36669 case BUILT_IN_ASINH:
36670 case BUILT_IN_ASIN:
36671 case BUILT_IN_COSH:
36672 case BUILT_IN_COS:
36673 case BUILT_IN_ACOSH:
36674 case BUILT_IN_ACOS:
36675 if (el_mode != DFmode || n != 2)
36676 return NULL_TREE;
36677 break;
36678
36679 case BUILT_IN_EXPF:
36680 case BUILT_IN_LOGF:
36681 case BUILT_IN_LOG10F:
36682 case BUILT_IN_POWF:
36683 case BUILT_IN_TANHF:
36684 case BUILT_IN_TANF:
36685 case BUILT_IN_ATANF:
36686 case BUILT_IN_ATAN2F:
36687 case BUILT_IN_ATANHF:
36688 case BUILT_IN_CBRTF:
36689 case BUILT_IN_SINHF:
36690 case BUILT_IN_SINF:
36691 case BUILT_IN_ASINHF:
36692 case BUILT_IN_ASINF:
36693 case BUILT_IN_COSHF:
36694 case BUILT_IN_COSF:
36695 case BUILT_IN_ACOSHF:
36696 case BUILT_IN_ACOSF:
36697 if (el_mode != SFmode || n != 4)
36698 return NULL_TREE;
36699 break;
36700
36701 default:
36702 return NULL_TREE;
36703 }
36704
36705 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
36706
36707 if (fn == BUILT_IN_LOGF)
36708 strcpy (name, "vmlsLn4");
36709 else if (fn == BUILT_IN_LOG)
36710 strcpy (name, "vmldLn2");
36711 else if (n == 4)
36712 {
36713 sprintf (name, "vmls%s", bname+10);
36714 name[strlen (name)-1] = '4';
36715 }
36716 else
36717 sprintf (name, "vmld%s2", bname+10);
36718
36719 /* Convert to uppercase. */
36720 name[4] &= ~0x20;
36721
36722 arity = 0;
36723 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
36724 args;
36725 args = TREE_CHAIN (args))
36726 arity++;
36727
36728 if (arity == 1)
36729 fntype = build_function_type_list (type_out, type_in, NULL);
36730 else
36731 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
36732
36733 /* Build a function declaration for the vectorized function. */
36734 new_fndecl = build_decl (BUILTINS_LOCATION,
36735 FUNCTION_DECL, get_identifier (name), fntype);
36736 TREE_PUBLIC (new_fndecl) = 1;
36737 DECL_EXTERNAL (new_fndecl) = 1;
36738 DECL_IS_NOVOPS (new_fndecl) = 1;
36739 TREE_READONLY (new_fndecl) = 1;
36740
36741 return new_fndecl;
36742 }
36743
36744 /* Handler for an ACML-style interface to
36745 a library with vectorized intrinsics. */
36746
36747 static tree
36748 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
36749 {
36750 char name[20] = "__vr.._";
36751 tree fntype, new_fndecl, args;
36752 unsigned arity;
36753 const char *bname;
36754 enum machine_mode el_mode, in_mode;
36755 int n, in_n;
36756
36757 /* The ACML is 64bits only and suitable for unsafe math only as
36758 it does not correctly support parts of IEEE with the required
36759 precision such as denormals. */
36760 if (!TARGET_64BIT
36761 || !flag_unsafe_math_optimizations)
36762 return NULL_TREE;
36763
36764 el_mode = TYPE_MODE (TREE_TYPE (type_out));
36765 n = TYPE_VECTOR_SUBPARTS (type_out);
36766 in_mode = TYPE_MODE (TREE_TYPE (type_in));
36767 in_n = TYPE_VECTOR_SUBPARTS (type_in);
36768 if (el_mode != in_mode
36769 || n != in_n)
36770 return NULL_TREE;
36771
36772 switch (fn)
36773 {
36774 case BUILT_IN_SIN:
36775 case BUILT_IN_COS:
36776 case BUILT_IN_EXP:
36777 case BUILT_IN_LOG:
36778 case BUILT_IN_LOG2:
36779 case BUILT_IN_LOG10:
36780 name[4] = 'd';
36781 name[5] = '2';
36782 if (el_mode != DFmode
36783 || n != 2)
36784 return NULL_TREE;
36785 break;
36786
36787 case BUILT_IN_SINF:
36788 case BUILT_IN_COSF:
36789 case BUILT_IN_EXPF:
36790 case BUILT_IN_POWF:
36791 case BUILT_IN_LOGF:
36792 case BUILT_IN_LOG2F:
36793 case BUILT_IN_LOG10F:
36794 name[4] = 's';
36795 name[5] = '4';
36796 if (el_mode != SFmode
36797 || n != 4)
36798 return NULL_TREE;
36799 break;
36800
36801 default:
36802 return NULL_TREE;
36803 }
36804
36805 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
36806 sprintf (name + 7, "%s", bname+10);
36807
36808 arity = 0;
36809 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
36810 args;
36811 args = TREE_CHAIN (args))
36812 arity++;
36813
36814 if (arity == 1)
36815 fntype = build_function_type_list (type_out, type_in, NULL);
36816 else
36817 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
36818
36819 /* Build a function declaration for the vectorized function. */
36820 new_fndecl = build_decl (BUILTINS_LOCATION,
36821 FUNCTION_DECL, get_identifier (name), fntype);
36822 TREE_PUBLIC (new_fndecl) = 1;
36823 DECL_EXTERNAL (new_fndecl) = 1;
36824 DECL_IS_NOVOPS (new_fndecl) = 1;
36825 TREE_READONLY (new_fndecl) = 1;
36826
36827 return new_fndecl;
36828 }
36829
36830 /* Returns a decl of a function that implements gather load with
36831 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
36832 Return NULL_TREE if it is not available. */
36833
36834 static tree
36835 ix86_vectorize_builtin_gather (const_tree mem_vectype,
36836 const_tree index_type, int scale)
36837 {
36838 bool si;
36839 enum ix86_builtins code;
36840
36841 if (! TARGET_AVX2)
36842 return NULL_TREE;
36843
36844 if ((TREE_CODE (index_type) != INTEGER_TYPE
36845 && !POINTER_TYPE_P (index_type))
36846 || (TYPE_MODE (index_type) != SImode
36847 && TYPE_MODE (index_type) != DImode))
36848 return NULL_TREE;
36849
36850 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
36851 return NULL_TREE;
36852
36853 /* v*gather* insn sign extends index to pointer mode. */
36854 if (TYPE_PRECISION (index_type) < POINTER_SIZE
36855 && TYPE_UNSIGNED (index_type))
36856 return NULL_TREE;
36857
36858 if (scale <= 0
36859 || scale > 8
36860 || (scale & (scale - 1)) != 0)
36861 return NULL_TREE;
36862
36863 si = TYPE_MODE (index_type) == SImode;
36864 switch (TYPE_MODE (mem_vectype))
36865 {
36866 case V2DFmode:
36867 code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
36868 break;
36869 case V4DFmode:
36870 code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
36871 break;
36872 case V2DImode:
36873 code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
36874 break;
36875 case V4DImode:
36876 code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
36877 break;
36878 case V4SFmode:
36879 code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
36880 break;
36881 case V8SFmode:
36882 code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
36883 break;
36884 case V4SImode:
36885 code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
36886 break;
36887 case V8SImode:
36888 code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
36889 break;
36890 case V8DFmode:
36891 if (TARGET_AVX512F)
36892 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DF : IX86_BUILTIN_GATHER3DIV8DF;
36893 else
36894 return NULL_TREE;
36895 break;
36896 case V8DImode:
36897 if (TARGET_AVX512F)
36898 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DI : IX86_BUILTIN_GATHER3DIV8DI;
36899 else
36900 return NULL_TREE;
36901 break;
36902 case V16SFmode:
36903 if (TARGET_AVX512F)
36904 code = si ? IX86_BUILTIN_GATHER3SIV16SF : IX86_BUILTIN_GATHER3ALTDIV16SF;
36905 else
36906 return NULL_TREE;
36907 break;
36908 case V16SImode:
36909 if (TARGET_AVX512F)
36910 code = si ? IX86_BUILTIN_GATHER3SIV16SI : IX86_BUILTIN_GATHER3ALTDIV16SI;
36911 else
36912 return NULL_TREE;
36913 break;
36914 default:
36915 return NULL_TREE;
36916 }
36917
36918 return ix86_get_builtin (code);
36919 }
36920
36921 /* Returns a code for a target-specific builtin that implements
36922 reciprocal of the function, or NULL_TREE if not available. */
36923
36924 static tree
36925 ix86_builtin_reciprocal (unsigned int fn, bool md_fn, bool)
36926 {
36927 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
36928 && flag_finite_math_only && !flag_trapping_math
36929 && flag_unsafe_math_optimizations))
36930 return NULL_TREE;
36931
36932 if (md_fn)
36933 /* Machine dependent builtins. */
36934 switch (fn)
36935 {
36936 /* Vectorized version of sqrt to rsqrt conversion. */
36937 case IX86_BUILTIN_SQRTPS_NR:
36938 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR);
36939
36940 case IX86_BUILTIN_SQRTPS_NR256:
36941 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256);
36942
36943 default:
36944 return NULL_TREE;
36945 }
36946 else
36947 /* Normal builtins. */
36948 switch (fn)
36949 {
36950 /* Sqrt to rsqrt conversion. */
36951 case BUILT_IN_SQRTF:
36952 return ix86_get_builtin (IX86_BUILTIN_RSQRTF);
36953
36954 default:
36955 return NULL_TREE;
36956 }
36957 }
36958 \f
36959 /* Helper for avx_vpermilps256_operand et al. This is also used by
36960 the expansion functions to turn the parallel back into a mask.
36961 The return value is 0 for no match and the imm8+1 for a match. */
36962
36963 int
36964 avx_vpermilp_parallel (rtx par, enum machine_mode mode)
36965 {
36966 unsigned i, nelt = GET_MODE_NUNITS (mode);
36967 unsigned mask = 0;
36968 unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
36969
36970 if (XVECLEN (par, 0) != (int) nelt)
36971 return 0;
36972
36973 /* Validate that all of the elements are constants, and not totally
36974 out of range. Copy the data into an integral array to make the
36975 subsequent checks easier. */
36976 for (i = 0; i < nelt; ++i)
36977 {
36978 rtx er = XVECEXP (par, 0, i);
36979 unsigned HOST_WIDE_INT ei;
36980
36981 if (!CONST_INT_P (er))
36982 return 0;
36983 ei = INTVAL (er);
36984 if (ei >= nelt)
36985 return 0;
36986 ipar[i] = ei;
36987 }
36988
36989 switch (mode)
36990 {
36991 case V8DFmode:
36992 /* In the 512-bit DFmode case, we can only move elements within
36993 a 128-bit lane. First fill the second part of the mask,
36994 then fallthru. */
36995 for (i = 4; i < 6; ++i)
36996 {
36997 if (ipar[i] < 4 || ipar[i] >= 6)
36998 return 0;
36999 mask |= (ipar[i] - 4) << i;
37000 }
37001 for (i = 6; i < 8; ++i)
37002 {
37003 if (ipar[i] < 6)
37004 return 0;
37005 mask |= (ipar[i] - 6) << i;
37006 }
37007 /* FALLTHRU */
37008
37009 case V4DFmode:
37010 /* In the 256-bit DFmode case, we can only move elements within
37011 a 128-bit lane. */
37012 for (i = 0; i < 2; ++i)
37013 {
37014 if (ipar[i] >= 2)
37015 return 0;
37016 mask |= ipar[i] << i;
37017 }
37018 for (i = 2; i < 4; ++i)
37019 {
37020 if (ipar[i] < 2)
37021 return 0;
37022 mask |= (ipar[i] - 2) << i;
37023 }
37024 break;
37025
37026 case V16SFmode:
37027 /* In 512 bit SFmode case, permutation in the upper 256 bits
37028 must mirror the permutation in the lower 256-bits. */
37029 for (i = 0; i < 8; ++i)
37030 if (ipar[i] + 8 != ipar[i + 8])
37031 return 0;
37032 /* FALLTHRU */
37033
37034 case V8SFmode:
37035 /* In 256 bit SFmode case, we have full freedom of
37036 movement within the low 128-bit lane, but the high 128-bit
37037 lane must mirror the exact same pattern. */
37038 for (i = 0; i < 4; ++i)
37039 if (ipar[i] + 4 != ipar[i + 4])
37040 return 0;
37041 nelt = 4;
37042 /* FALLTHRU */
37043
37044 case V2DFmode:
37045 case V4SFmode:
37046 /* In the 128-bit case, we've full freedom in the placement of
37047 the elements from the source operand. */
37048 for (i = 0; i < nelt; ++i)
37049 mask |= ipar[i] << (i * (nelt / 2));
37050 break;
37051
37052 default:
37053 gcc_unreachable ();
37054 }
37055
37056 /* Make sure success has a non-zero value by adding one. */
37057 return mask + 1;
37058 }
37059
37060 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
37061 the expansion functions to turn the parallel back into a mask.
37062 The return value is 0 for no match and the imm8+1 for a match. */
37063
37064 int
37065 avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
37066 {
37067 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
37068 unsigned mask = 0;
37069 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
37070
37071 if (XVECLEN (par, 0) != (int) nelt)
37072 return 0;
37073
37074 /* Validate that all of the elements are constants, and not totally
37075 out of range. Copy the data into an integral array to make the
37076 subsequent checks easier. */
37077 for (i = 0; i < nelt; ++i)
37078 {
37079 rtx er = XVECEXP (par, 0, i);
37080 unsigned HOST_WIDE_INT ei;
37081
37082 if (!CONST_INT_P (er))
37083 return 0;
37084 ei = INTVAL (er);
37085 if (ei >= 2 * nelt)
37086 return 0;
37087 ipar[i] = ei;
37088 }
37089
37090 /* Validate that the halves of the permute are halves. */
37091 for (i = 0; i < nelt2 - 1; ++i)
37092 if (ipar[i] + 1 != ipar[i + 1])
37093 return 0;
37094 for (i = nelt2; i < nelt - 1; ++i)
37095 if (ipar[i] + 1 != ipar[i + 1])
37096 return 0;
37097
37098 /* Reconstruct the mask. */
37099 for (i = 0; i < 2; ++i)
37100 {
37101 unsigned e = ipar[i * nelt2];
37102 if (e % nelt2)
37103 return 0;
37104 e /= nelt2;
37105 mask |= e << (i * 4);
37106 }
37107
37108 /* Make sure success has a non-zero value by adding one. */
37109 return mask + 1;
37110 }
37111 \f
37112 /* Return a register priority for hard reg REGNO. */
37113 static int
37114 ix86_register_priority (int hard_regno)
37115 {
37116 /* ebp and r13 as the base always wants a displacement, r12 as the
37117 base always wants an index. So discourage their usage in an
37118 address. */
37119 if (hard_regno == R12_REG || hard_regno == R13_REG)
37120 return 0;
37121 if (hard_regno == BP_REG)
37122 return 1;
37123 /* New x86-64 int registers result in bigger code size. Discourage
37124 them. */
37125 if (FIRST_REX_INT_REG <= hard_regno && hard_regno <= LAST_REX_INT_REG)
37126 return 2;
37127 /* New x86-64 SSE registers result in bigger code size. Discourage
37128 them. */
37129 if (FIRST_REX_SSE_REG <= hard_regno && hard_regno <= LAST_REX_SSE_REG)
37130 return 2;
37131 /* Usage of AX register results in smaller code. Prefer it. */
37132 if (hard_regno == 0)
37133 return 4;
37134 return 3;
37135 }
37136
37137 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
37138
37139 Put float CONST_DOUBLE in the constant pool instead of fp regs.
37140 QImode must go into class Q_REGS.
37141 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
37142 movdf to do mem-to-mem moves through integer regs. */
37143
37144 static reg_class_t
37145 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
37146 {
37147 enum machine_mode mode = GET_MODE (x);
37148
37149 /* We're only allowed to return a subclass of CLASS. Many of the
37150 following checks fail for NO_REGS, so eliminate that early. */
37151 if (regclass == NO_REGS)
37152 return NO_REGS;
37153
37154 /* All classes can load zeros. */
37155 if (x == CONST0_RTX (mode))
37156 return regclass;
37157
37158 /* Force constants into memory if we are loading a (nonzero) constant into
37159 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
37160 instructions to load from a constant. */
37161 if (CONSTANT_P (x)
37162 && (MAYBE_MMX_CLASS_P (regclass)
37163 || MAYBE_SSE_CLASS_P (regclass)
37164 || MAYBE_MASK_CLASS_P (regclass)))
37165 return NO_REGS;
37166
37167 /* Prefer SSE regs only, if we can use them for math. */
37168 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
37169 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
37170
37171 /* Floating-point constants need more complex checks. */
37172 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
37173 {
37174 /* General regs can load everything. */
37175 if (reg_class_subset_p (regclass, GENERAL_REGS))
37176 return regclass;
37177
37178 /* Floats can load 0 and 1 plus some others. Note that we eliminated
37179 zero above. We only want to wind up preferring 80387 registers if
37180 we plan on doing computation with them. */
37181 if (TARGET_80387
37182 && standard_80387_constant_p (x) > 0)
37183 {
37184 /* Limit class to non-sse. */
37185 if (regclass == FLOAT_SSE_REGS)
37186 return FLOAT_REGS;
37187 if (regclass == FP_TOP_SSE_REGS)
37188 return FP_TOP_REG;
37189 if (regclass == FP_SECOND_SSE_REGS)
37190 return FP_SECOND_REG;
37191 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
37192 return regclass;
37193 }
37194
37195 return NO_REGS;
37196 }
37197
37198 /* Generally when we see PLUS here, it's the function invariant
37199 (plus soft-fp const_int). Which can only be computed into general
37200 regs. */
37201 if (GET_CODE (x) == PLUS)
37202 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
37203
37204 /* QImode constants are easy to load, but non-constant QImode data
37205 must go into Q_REGS. */
37206 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
37207 {
37208 if (reg_class_subset_p (regclass, Q_REGS))
37209 return regclass;
37210 if (reg_class_subset_p (Q_REGS, regclass))
37211 return Q_REGS;
37212 return NO_REGS;
37213 }
37214
37215 return regclass;
37216 }
37217
37218 /* Discourage putting floating-point values in SSE registers unless
37219 SSE math is being used, and likewise for the 387 registers. */
37220 static reg_class_t
37221 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
37222 {
37223 enum machine_mode mode = GET_MODE (x);
37224
37225 /* Restrict the output reload class to the register bank that we are doing
37226 math on. If we would like not to return a subset of CLASS, reject this
37227 alternative: if reload cannot do this, it will still use its choice. */
37228 mode = GET_MODE (x);
37229 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
37230 return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
37231
37232 if (X87_FLOAT_MODE_P (mode))
37233 {
37234 if (regclass == FP_TOP_SSE_REGS)
37235 return FP_TOP_REG;
37236 else if (regclass == FP_SECOND_SSE_REGS)
37237 return FP_SECOND_REG;
37238 else
37239 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
37240 }
37241
37242 return regclass;
37243 }
37244
37245 static reg_class_t
37246 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
37247 enum machine_mode mode, secondary_reload_info *sri)
37248 {
37249 /* Double-word spills from general registers to non-offsettable memory
37250 references (zero-extended addresses) require special handling. */
37251 if (TARGET_64BIT
37252 && MEM_P (x)
37253 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
37254 && INTEGER_CLASS_P (rclass)
37255 && !offsettable_memref_p (x))
37256 {
37257 sri->icode = (in_p
37258 ? CODE_FOR_reload_noff_load
37259 : CODE_FOR_reload_noff_store);
37260 /* Add the cost of moving address to a temporary. */
37261 sri->extra_cost = 1;
37262
37263 return NO_REGS;
37264 }
37265
37266 /* QImode spills from non-QI registers require
37267 intermediate register on 32bit targets. */
37268 if (mode == QImode
37269 && (MAYBE_MASK_CLASS_P (rclass)
37270 || (!TARGET_64BIT && !in_p
37271 && INTEGER_CLASS_P (rclass)
37272 && MAYBE_NON_Q_CLASS_P (rclass))))
37273 {
37274 int regno;
37275
37276 if (REG_P (x))
37277 regno = REGNO (x);
37278 else
37279 regno = -1;
37280
37281 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
37282 regno = true_regnum (x);
37283
37284 /* Return Q_REGS if the operand is in memory. */
37285 if (regno == -1)
37286 return Q_REGS;
37287 }
37288
37289 /* This condition handles corner case where an expression involving
37290 pointers gets vectorized. We're trying to use the address of a
37291 stack slot as a vector initializer.
37292
37293 (set (reg:V2DI 74 [ vect_cst_.2 ])
37294 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
37295
37296 Eventually frame gets turned into sp+offset like this:
37297
37298 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37299 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
37300 (const_int 392 [0x188]))))
37301
37302 That later gets turned into:
37303
37304 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37305 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
37306 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
37307
37308 We'll have the following reload recorded:
37309
37310 Reload 0: reload_in (DI) =
37311 (plus:DI (reg/f:DI 7 sp)
37312 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
37313 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37314 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
37315 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
37316 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37317 reload_reg_rtx: (reg:V2DI 22 xmm1)
37318
37319 Which isn't going to work since SSE instructions can't handle scalar
37320 additions. Returning GENERAL_REGS forces the addition into integer
37321 register and reload can handle subsequent reloads without problems. */
37322
37323 if (in_p && GET_CODE (x) == PLUS
37324 && SSE_CLASS_P (rclass)
37325 && SCALAR_INT_MODE_P (mode))
37326 return GENERAL_REGS;
37327
37328 return NO_REGS;
37329 }
37330
37331 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
37332
37333 static bool
37334 ix86_class_likely_spilled_p (reg_class_t rclass)
37335 {
37336 switch (rclass)
37337 {
37338 case AREG:
37339 case DREG:
37340 case CREG:
37341 case BREG:
37342 case AD_REGS:
37343 case SIREG:
37344 case DIREG:
37345 case SSE_FIRST_REG:
37346 case FP_TOP_REG:
37347 case FP_SECOND_REG:
37348 return true;
37349
37350 default:
37351 break;
37352 }
37353
37354 return false;
37355 }
37356
37357 /* If we are copying between general and FP registers, we need a memory
37358 location. The same is true for SSE and MMX registers.
37359
37360 To optimize register_move_cost performance, allow inline variant.
37361
37362 The macro can't work reliably when one of the CLASSES is class containing
37363 registers from multiple units (SSE, MMX, integer). We avoid this by never
37364 combining those units in single alternative in the machine description.
37365 Ensure that this constraint holds to avoid unexpected surprises.
37366
37367 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
37368 enforce these sanity checks. */
37369
37370 static inline bool
37371 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
37372 enum machine_mode mode, int strict)
37373 {
37374 if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
37375 return false;
37376 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
37377 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
37378 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
37379 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
37380 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
37381 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
37382 {
37383 gcc_assert (!strict || lra_in_progress);
37384 return true;
37385 }
37386
37387 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
37388 return true;
37389
37390 /* ??? This is a lie. We do have moves between mmx/general, and for
37391 mmx/sse2. But by saying we need secondary memory we discourage the
37392 register allocator from using the mmx registers unless needed. */
37393 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
37394 return true;
37395
37396 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
37397 {
37398 /* SSE1 doesn't have any direct moves from other classes. */
37399 if (!TARGET_SSE2)
37400 return true;
37401
37402 /* If the target says that inter-unit moves are more expensive
37403 than moving through memory, then don't generate them. */
37404 if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
37405 || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
37406 return true;
37407
37408 /* Between SSE and general, we have moves no larger than word size. */
37409 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
37410 return true;
37411 }
37412
37413 return false;
37414 }
37415
37416 bool
37417 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
37418 enum machine_mode mode, int strict)
37419 {
37420 return inline_secondary_memory_needed (class1, class2, mode, strict);
37421 }
37422
37423 /* Implement the TARGET_CLASS_MAX_NREGS hook.
37424
37425 On the 80386, this is the size of MODE in words,
37426 except in the FP regs, where a single reg is always enough. */
37427
37428 static unsigned char
37429 ix86_class_max_nregs (reg_class_t rclass, enum machine_mode mode)
37430 {
37431 if (MAYBE_INTEGER_CLASS_P (rclass))
37432 {
37433 if (mode == XFmode)
37434 return (TARGET_64BIT ? 2 : 3);
37435 else if (mode == XCmode)
37436 return (TARGET_64BIT ? 4 : 6);
37437 else
37438 return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
37439 }
37440 else
37441 {
37442 if (COMPLEX_MODE_P (mode))
37443 return 2;
37444 else
37445 return 1;
37446 }
37447 }
37448
37449 /* Return true if the registers in CLASS cannot represent the change from
37450 modes FROM to TO. */
37451
37452 bool
37453 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
37454 enum reg_class regclass)
37455 {
37456 if (from == to)
37457 return false;
37458
37459 /* x87 registers can't do subreg at all, as all values are reformatted
37460 to extended precision. */
37461 if (MAYBE_FLOAT_CLASS_P (regclass))
37462 return true;
37463
37464 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
37465 {
37466 /* Vector registers do not support QI or HImode loads. If we don't
37467 disallow a change to these modes, reload will assume it's ok to
37468 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
37469 the vec_dupv4hi pattern. */
37470 if (GET_MODE_SIZE (from) < 4)
37471 return true;
37472
37473 /* Vector registers do not support subreg with nonzero offsets, which
37474 are otherwise valid for integer registers. Since we can't see
37475 whether we have a nonzero offset from here, prohibit all
37476 nonparadoxical subregs changing size. */
37477 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
37478 return true;
37479 }
37480
37481 return false;
37482 }
37483
37484 /* Return the cost of moving data of mode M between a
37485 register and memory. A value of 2 is the default; this cost is
37486 relative to those in `REGISTER_MOVE_COST'.
37487
37488 This function is used extensively by register_move_cost that is used to
37489 build tables at startup. Make it inline in this case.
37490 When IN is 2, return maximum of in and out move cost.
37491
37492 If moving between registers and memory is more expensive than
37493 between two registers, you should define this macro to express the
37494 relative cost.
37495
37496 Model also increased moving costs of QImode registers in non
37497 Q_REGS classes.
37498 */
37499 static inline int
37500 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
37501 int in)
37502 {
37503 int cost;
37504 if (FLOAT_CLASS_P (regclass))
37505 {
37506 int index;
37507 switch (mode)
37508 {
37509 case SFmode:
37510 index = 0;
37511 break;
37512 case DFmode:
37513 index = 1;
37514 break;
37515 case XFmode:
37516 index = 2;
37517 break;
37518 default:
37519 return 100;
37520 }
37521 if (in == 2)
37522 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
37523 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
37524 }
37525 if (SSE_CLASS_P (regclass))
37526 {
37527 int index;
37528 switch (GET_MODE_SIZE (mode))
37529 {
37530 case 4:
37531 index = 0;
37532 break;
37533 case 8:
37534 index = 1;
37535 break;
37536 case 16:
37537 index = 2;
37538 break;
37539 default:
37540 return 100;
37541 }
37542 if (in == 2)
37543 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
37544 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
37545 }
37546 if (MMX_CLASS_P (regclass))
37547 {
37548 int index;
37549 switch (GET_MODE_SIZE (mode))
37550 {
37551 case 4:
37552 index = 0;
37553 break;
37554 case 8:
37555 index = 1;
37556 break;
37557 default:
37558 return 100;
37559 }
37560 if (in)
37561 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
37562 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
37563 }
37564 switch (GET_MODE_SIZE (mode))
37565 {
37566 case 1:
37567 if (Q_CLASS_P (regclass) || TARGET_64BIT)
37568 {
37569 if (!in)
37570 return ix86_cost->int_store[0];
37571 if (TARGET_PARTIAL_REG_DEPENDENCY
37572 && optimize_function_for_speed_p (cfun))
37573 cost = ix86_cost->movzbl_load;
37574 else
37575 cost = ix86_cost->int_load[0];
37576 if (in == 2)
37577 return MAX (cost, ix86_cost->int_store[0]);
37578 return cost;
37579 }
37580 else
37581 {
37582 if (in == 2)
37583 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
37584 if (in)
37585 return ix86_cost->movzbl_load;
37586 else
37587 return ix86_cost->int_store[0] + 4;
37588 }
37589 break;
37590 case 2:
37591 if (in == 2)
37592 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
37593 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
37594 default:
37595 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
37596 if (mode == TFmode)
37597 mode = XFmode;
37598 if (in == 2)
37599 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
37600 else if (in)
37601 cost = ix86_cost->int_load[2];
37602 else
37603 cost = ix86_cost->int_store[2];
37604 return (cost * (((int) GET_MODE_SIZE (mode)
37605 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
37606 }
37607 }
37608
37609 static int
37610 ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
37611 bool in)
37612 {
37613 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
37614 }
37615
37616
37617 /* Return the cost of moving data from a register in class CLASS1 to
37618 one in class CLASS2.
37619
37620 It is not required that the cost always equal 2 when FROM is the same as TO;
37621 on some machines it is expensive to move between registers if they are not
37622 general registers. */
37623
37624 static int
37625 ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
37626 reg_class_t class2_i)
37627 {
37628 enum reg_class class1 = (enum reg_class) class1_i;
37629 enum reg_class class2 = (enum reg_class) class2_i;
37630
37631 /* In case we require secondary memory, compute cost of the store followed
37632 by load. In order to avoid bad register allocation choices, we need
37633 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
37634
37635 if (inline_secondary_memory_needed (class1, class2, mode, 0))
37636 {
37637 int cost = 1;
37638
37639 cost += inline_memory_move_cost (mode, class1, 2);
37640 cost += inline_memory_move_cost (mode, class2, 2);
37641
37642 /* In case of copying from general_purpose_register we may emit multiple
37643 stores followed by single load causing memory size mismatch stall.
37644 Count this as arbitrarily high cost of 20. */
37645 if (targetm.class_max_nregs (class1, mode)
37646 > targetm.class_max_nregs (class2, mode))
37647 cost += 20;
37648
37649 /* In the case of FP/MMX moves, the registers actually overlap, and we
37650 have to switch modes in order to treat them differently. */
37651 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
37652 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
37653 cost += 20;
37654
37655 return cost;
37656 }
37657
37658 /* Moves between SSE/MMX and integer unit are expensive. */
37659 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
37660 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
37661
37662 /* ??? By keeping returned value relatively high, we limit the number
37663 of moves between integer and MMX/SSE registers for all targets.
37664 Additionally, high value prevents problem with x86_modes_tieable_p(),
37665 where integer modes in MMX/SSE registers are not tieable
37666 because of missing QImode and HImode moves to, from or between
37667 MMX/SSE registers. */
37668 return MAX (8, ix86_cost->mmxsse_to_integer);
37669
37670 if (MAYBE_FLOAT_CLASS_P (class1))
37671 return ix86_cost->fp_move;
37672 if (MAYBE_SSE_CLASS_P (class1))
37673 return ix86_cost->sse_move;
37674 if (MAYBE_MMX_CLASS_P (class1))
37675 return ix86_cost->mmx_move;
37676 return 2;
37677 }
37678
37679 /* Return TRUE if hard register REGNO can hold a value of machine-mode
37680 MODE. */
37681
37682 bool
37683 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
37684 {
37685 /* Flags and only flags can only hold CCmode values. */
37686 if (CC_REGNO_P (regno))
37687 return GET_MODE_CLASS (mode) == MODE_CC;
37688 if (GET_MODE_CLASS (mode) == MODE_CC
37689 || GET_MODE_CLASS (mode) == MODE_RANDOM
37690 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
37691 return false;
37692 if (STACK_REGNO_P (regno))
37693 return VALID_FP_MODE_P (mode);
37694 if (MASK_REGNO_P (regno))
37695 return VALID_MASK_REG_MODE (mode);
37696 if (SSE_REGNO_P (regno))
37697 {
37698 /* We implement the move patterns for all vector modes into and
37699 out of SSE registers, even when no operation instructions
37700 are available. */
37701
37702 /* For AVX-512 we allow, regardless of regno:
37703 - XI mode
37704 - any of 512-bit wide vector mode
37705 - any scalar mode. */
37706 if (TARGET_AVX512F
37707 && (mode == XImode
37708 || VALID_AVX512F_REG_MODE (mode)
37709 || VALID_AVX512F_SCALAR_MODE (mode)))
37710 return true;
37711
37712 /* xmm16-xmm31 are only available for AVX-512. */
37713 if (EXT_REX_SSE_REGNO_P (regno))
37714 return false;
37715
37716 /* OImode and AVX modes are available only when AVX is enabled. */
37717 return ((TARGET_AVX
37718 && VALID_AVX256_REG_OR_OI_MODE (mode))
37719 || VALID_SSE_REG_MODE (mode)
37720 || VALID_SSE2_REG_MODE (mode)
37721 || VALID_MMX_REG_MODE (mode)
37722 || VALID_MMX_REG_MODE_3DNOW (mode));
37723 }
37724 if (MMX_REGNO_P (regno))
37725 {
37726 /* We implement the move patterns for 3DNOW modes even in MMX mode,
37727 so if the register is available at all, then we can move data of
37728 the given mode into or out of it. */
37729 return (VALID_MMX_REG_MODE (mode)
37730 || VALID_MMX_REG_MODE_3DNOW (mode));
37731 }
37732
37733 if (mode == QImode)
37734 {
37735 /* Take care for QImode values - they can be in non-QI regs,
37736 but then they do cause partial register stalls. */
37737 if (ANY_QI_REGNO_P (regno))
37738 return true;
37739 if (!TARGET_PARTIAL_REG_STALL)
37740 return true;
37741 /* LRA checks if the hard register is OK for the given mode.
37742 QImode values can live in non-QI regs, so we allow all
37743 registers here. */
37744 if (lra_in_progress)
37745 return true;
37746 return !can_create_pseudo_p ();
37747 }
37748 /* We handle both integer and floats in the general purpose registers. */
37749 else if (VALID_INT_MODE_P (mode))
37750 return true;
37751 else if (VALID_FP_MODE_P (mode))
37752 return true;
37753 else if (VALID_DFP_MODE_P (mode))
37754 return true;
37755 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
37756 on to use that value in smaller contexts, this can easily force a
37757 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
37758 supporting DImode, allow it. */
37759 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
37760 return true;
37761
37762 return false;
37763 }
37764
37765 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
37766 tieable integer mode. */
37767
37768 static bool
37769 ix86_tieable_integer_mode_p (enum machine_mode mode)
37770 {
37771 switch (mode)
37772 {
37773 case HImode:
37774 case SImode:
37775 return true;
37776
37777 case QImode:
37778 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
37779
37780 case DImode:
37781 return TARGET_64BIT;
37782
37783 default:
37784 return false;
37785 }
37786 }
37787
37788 /* Return true if MODE1 is accessible in a register that can hold MODE2
37789 without copying. That is, all register classes that can hold MODE2
37790 can also hold MODE1. */
37791
37792 bool
37793 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
37794 {
37795 if (mode1 == mode2)
37796 return true;
37797
37798 if (ix86_tieable_integer_mode_p (mode1)
37799 && ix86_tieable_integer_mode_p (mode2))
37800 return true;
37801
37802 /* MODE2 being XFmode implies fp stack or general regs, which means we
37803 can tie any smaller floating point modes to it. Note that we do not
37804 tie this with TFmode. */
37805 if (mode2 == XFmode)
37806 return mode1 == SFmode || mode1 == DFmode;
37807
37808 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
37809 that we can tie it with SFmode. */
37810 if (mode2 == DFmode)
37811 return mode1 == SFmode;
37812
37813 /* If MODE2 is only appropriate for an SSE register, then tie with
37814 any other mode acceptable to SSE registers. */
37815 if (GET_MODE_SIZE (mode2) == 32
37816 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
37817 return (GET_MODE_SIZE (mode1) == 32
37818 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
37819 if (GET_MODE_SIZE (mode2) == 16
37820 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
37821 return (GET_MODE_SIZE (mode1) == 16
37822 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
37823
37824 /* If MODE2 is appropriate for an MMX register, then tie
37825 with any other mode acceptable to MMX registers. */
37826 if (GET_MODE_SIZE (mode2) == 8
37827 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
37828 return (GET_MODE_SIZE (mode1) == 8
37829 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
37830
37831 return false;
37832 }
37833
37834 /* Return the cost of moving between two registers of mode MODE. */
37835
37836 static int
37837 ix86_set_reg_reg_cost (enum machine_mode mode)
37838 {
37839 unsigned int units = UNITS_PER_WORD;
37840
37841 switch (GET_MODE_CLASS (mode))
37842 {
37843 default:
37844 break;
37845
37846 case MODE_CC:
37847 units = GET_MODE_SIZE (CCmode);
37848 break;
37849
37850 case MODE_FLOAT:
37851 if ((TARGET_SSE && mode == TFmode)
37852 || (TARGET_80387 && mode == XFmode)
37853 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
37854 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
37855 units = GET_MODE_SIZE (mode);
37856 break;
37857
37858 case MODE_COMPLEX_FLOAT:
37859 if ((TARGET_SSE && mode == TCmode)
37860 || (TARGET_80387 && mode == XCmode)
37861 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
37862 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
37863 units = GET_MODE_SIZE (mode);
37864 break;
37865
37866 case MODE_VECTOR_INT:
37867 case MODE_VECTOR_FLOAT:
37868 if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
37869 || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
37870 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
37871 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
37872 || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
37873 units = GET_MODE_SIZE (mode);
37874 }
37875
37876 /* Return the cost of moving between two registers of mode MODE,
37877 assuming that the move will be in pieces of at most UNITS bytes. */
37878 return COSTS_N_INSNS ((GET_MODE_SIZE (mode) + units - 1) / units);
37879 }
37880
37881 /* Compute a (partial) cost for rtx X. Return true if the complete
37882 cost has been computed, and false if subexpressions should be
37883 scanned. In either case, *TOTAL contains the cost result. */
37884
37885 static bool
37886 ix86_rtx_costs (rtx x, int code_i, int outer_code_i, int opno, int *total,
37887 bool speed)
37888 {
37889 rtx mask;
37890 enum rtx_code code = (enum rtx_code) code_i;
37891 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
37892 enum machine_mode mode = GET_MODE (x);
37893 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
37894
37895 switch (code)
37896 {
37897 case SET:
37898 if (register_operand (SET_DEST (x), VOIDmode)
37899 && reg_or_0_operand (SET_SRC (x), VOIDmode))
37900 {
37901 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
37902 return true;
37903 }
37904 return false;
37905
37906 case CONST_INT:
37907 case CONST:
37908 case LABEL_REF:
37909 case SYMBOL_REF:
37910 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
37911 *total = 3;
37912 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
37913 *total = 2;
37914 else if (flag_pic && SYMBOLIC_CONST (x)
37915 && !(TARGET_64BIT
37916 && (GET_CODE (x) == LABEL_REF
37917 || (GET_CODE (x) == SYMBOL_REF
37918 && SYMBOL_REF_LOCAL_P (x)))))
37919 *total = 1;
37920 else
37921 *total = 0;
37922 return true;
37923
37924 case CONST_DOUBLE:
37925 if (mode == VOIDmode)
37926 {
37927 *total = 0;
37928 return true;
37929 }
37930 switch (standard_80387_constant_p (x))
37931 {
37932 case 1: /* 0.0 */
37933 *total = 1;
37934 return true;
37935 default: /* Other constants */
37936 *total = 2;
37937 return true;
37938 case 0:
37939 case -1:
37940 break;
37941 }
37942 if (SSE_FLOAT_MODE_P (mode))
37943 {
37944 case CONST_VECTOR:
37945 switch (standard_sse_constant_p (x))
37946 {
37947 case 0:
37948 break;
37949 case 1: /* 0: xor eliminates false dependency */
37950 *total = 0;
37951 return true;
37952 default: /* -1: cmp contains false dependency */
37953 *total = 1;
37954 return true;
37955 }
37956 }
37957 /* Fall back to (MEM (SYMBOL_REF)), since that's where
37958 it'll probably end up. Add a penalty for size. */
37959 *total = (COSTS_N_INSNS (1)
37960 + (flag_pic != 0 && !TARGET_64BIT)
37961 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
37962 return true;
37963
37964 case ZERO_EXTEND:
37965 /* The zero extensions is often completely free on x86_64, so make
37966 it as cheap as possible. */
37967 if (TARGET_64BIT && mode == DImode
37968 && GET_MODE (XEXP (x, 0)) == SImode)
37969 *total = 1;
37970 else if (TARGET_ZERO_EXTEND_WITH_AND)
37971 *total = cost->add;
37972 else
37973 *total = cost->movzx;
37974 return false;
37975
37976 case SIGN_EXTEND:
37977 *total = cost->movsx;
37978 return false;
37979
37980 case ASHIFT:
37981 if (SCALAR_INT_MODE_P (mode)
37982 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
37983 && CONST_INT_P (XEXP (x, 1)))
37984 {
37985 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
37986 if (value == 1)
37987 {
37988 *total = cost->add;
37989 return false;
37990 }
37991 if ((value == 2 || value == 3)
37992 && cost->lea <= cost->shift_const)
37993 {
37994 *total = cost->lea;
37995 return false;
37996 }
37997 }
37998 /* FALLTHRU */
37999
38000 case ROTATE:
38001 case ASHIFTRT:
38002 case LSHIFTRT:
38003 case ROTATERT:
38004 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
38005 {
38006 /* ??? Should be SSE vector operation cost. */
38007 /* At least for published AMD latencies, this really is the same
38008 as the latency for a simple fpu operation like fabs. */
38009 /* V*QImode is emulated with 1-11 insns. */
38010 if (mode == V16QImode || mode == V32QImode)
38011 {
38012 int count = 11;
38013 if (TARGET_XOP && mode == V16QImode)
38014 {
38015 /* For XOP we use vpshab, which requires a broadcast of the
38016 value to the variable shift insn. For constants this
38017 means a V16Q const in mem; even when we can perform the
38018 shift with one insn set the cost to prefer paddb. */
38019 if (CONSTANT_P (XEXP (x, 1)))
38020 {
38021 *total = (cost->fabs
38022 + rtx_cost (XEXP (x, 0), code, 0, speed)
38023 + (speed ? 2 : COSTS_N_BYTES (16)));
38024 return true;
38025 }
38026 count = 3;
38027 }
38028 else if (TARGET_SSSE3)
38029 count = 7;
38030 *total = cost->fabs * count;
38031 }
38032 else
38033 *total = cost->fabs;
38034 }
38035 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
38036 {
38037 if (CONST_INT_P (XEXP (x, 1)))
38038 {
38039 if (INTVAL (XEXP (x, 1)) > 32)
38040 *total = cost->shift_const + COSTS_N_INSNS (2);
38041 else
38042 *total = cost->shift_const * 2;
38043 }
38044 else
38045 {
38046 if (GET_CODE (XEXP (x, 1)) == AND)
38047 *total = cost->shift_var * 2;
38048 else
38049 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
38050 }
38051 }
38052 else
38053 {
38054 if (CONST_INT_P (XEXP (x, 1)))
38055 *total = cost->shift_const;
38056 else if (GET_CODE (XEXP (x, 1)) == SUBREG
38057 && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND)
38058 {
38059 /* Return the cost after shift-and truncation. */
38060 *total = cost->shift_var;
38061 return true;
38062 }
38063 else
38064 *total = cost->shift_var;
38065 }
38066 return false;
38067
38068 case FMA:
38069 {
38070 rtx sub;
38071
38072 gcc_assert (FLOAT_MODE_P (mode));
38073 gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
38074
38075 /* ??? SSE scalar/vector cost should be used here. */
38076 /* ??? Bald assumption that fma has the same cost as fmul. */
38077 *total = cost->fmul;
38078 *total += rtx_cost (XEXP (x, 1), FMA, 1, speed);
38079
38080 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
38081 sub = XEXP (x, 0);
38082 if (GET_CODE (sub) == NEG)
38083 sub = XEXP (sub, 0);
38084 *total += rtx_cost (sub, FMA, 0, speed);
38085
38086 sub = XEXP (x, 2);
38087 if (GET_CODE (sub) == NEG)
38088 sub = XEXP (sub, 0);
38089 *total += rtx_cost (sub, FMA, 2, speed);
38090 return true;
38091 }
38092
38093 case MULT:
38094 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38095 {
38096 /* ??? SSE scalar cost should be used here. */
38097 *total = cost->fmul;
38098 return false;
38099 }
38100 else if (X87_FLOAT_MODE_P (mode))
38101 {
38102 *total = cost->fmul;
38103 return false;
38104 }
38105 else if (FLOAT_MODE_P (mode))
38106 {
38107 /* ??? SSE vector cost should be used here. */
38108 *total = cost->fmul;
38109 return false;
38110 }
38111 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
38112 {
38113 /* V*QImode is emulated with 7-13 insns. */
38114 if (mode == V16QImode || mode == V32QImode)
38115 {
38116 int extra = 11;
38117 if (TARGET_XOP && mode == V16QImode)
38118 extra = 5;
38119 else if (TARGET_SSSE3)
38120 extra = 6;
38121 *total = cost->fmul * 2 + cost->fabs * extra;
38122 }
38123 /* V*DImode is emulated with 5-8 insns. */
38124 else if (mode == V2DImode || mode == V4DImode)
38125 {
38126 if (TARGET_XOP && mode == V2DImode)
38127 *total = cost->fmul * 2 + cost->fabs * 3;
38128 else
38129 *total = cost->fmul * 3 + cost->fabs * 5;
38130 }
38131 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
38132 insns, including two PMULUDQ. */
38133 else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
38134 *total = cost->fmul * 2 + cost->fabs * 5;
38135 else
38136 *total = cost->fmul;
38137 return false;
38138 }
38139 else
38140 {
38141 rtx op0 = XEXP (x, 0);
38142 rtx op1 = XEXP (x, 1);
38143 int nbits;
38144 if (CONST_INT_P (XEXP (x, 1)))
38145 {
38146 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
38147 for (nbits = 0; value != 0; value &= value - 1)
38148 nbits++;
38149 }
38150 else
38151 /* This is arbitrary. */
38152 nbits = 7;
38153
38154 /* Compute costs correctly for widening multiplication. */
38155 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
38156 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
38157 == GET_MODE_SIZE (mode))
38158 {
38159 int is_mulwiden = 0;
38160 enum machine_mode inner_mode = GET_MODE (op0);
38161
38162 if (GET_CODE (op0) == GET_CODE (op1))
38163 is_mulwiden = 1, op1 = XEXP (op1, 0);
38164 else if (CONST_INT_P (op1))
38165 {
38166 if (GET_CODE (op0) == SIGN_EXTEND)
38167 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
38168 == INTVAL (op1);
38169 else
38170 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
38171 }
38172
38173 if (is_mulwiden)
38174 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
38175 }
38176
38177 *total = (cost->mult_init[MODE_INDEX (mode)]
38178 + nbits * cost->mult_bit
38179 + rtx_cost (op0, outer_code, opno, speed)
38180 + rtx_cost (op1, outer_code, opno, speed));
38181
38182 return true;
38183 }
38184
38185 case DIV:
38186 case UDIV:
38187 case MOD:
38188 case UMOD:
38189 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38190 /* ??? SSE cost should be used here. */
38191 *total = cost->fdiv;
38192 else if (X87_FLOAT_MODE_P (mode))
38193 *total = cost->fdiv;
38194 else if (FLOAT_MODE_P (mode))
38195 /* ??? SSE vector cost should be used here. */
38196 *total = cost->fdiv;
38197 else
38198 *total = cost->divide[MODE_INDEX (mode)];
38199 return false;
38200
38201 case PLUS:
38202 if (GET_MODE_CLASS (mode) == MODE_INT
38203 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
38204 {
38205 if (GET_CODE (XEXP (x, 0)) == PLUS
38206 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
38207 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
38208 && CONSTANT_P (XEXP (x, 1)))
38209 {
38210 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
38211 if (val == 2 || val == 4 || val == 8)
38212 {
38213 *total = cost->lea;
38214 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
38215 outer_code, opno, speed);
38216 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
38217 outer_code, opno, speed);
38218 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
38219 return true;
38220 }
38221 }
38222 else if (GET_CODE (XEXP (x, 0)) == MULT
38223 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
38224 {
38225 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
38226 if (val == 2 || val == 4 || val == 8)
38227 {
38228 *total = cost->lea;
38229 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
38230 outer_code, opno, speed);
38231 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
38232 return true;
38233 }
38234 }
38235 else if (GET_CODE (XEXP (x, 0)) == PLUS)
38236 {
38237 *total = cost->lea;
38238 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
38239 outer_code, opno, speed);
38240 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
38241 outer_code, opno, speed);
38242 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
38243 return true;
38244 }
38245 }
38246 /* FALLTHRU */
38247
38248 case MINUS:
38249 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38250 {
38251 /* ??? SSE cost should be used here. */
38252 *total = cost->fadd;
38253 return false;
38254 }
38255 else if (X87_FLOAT_MODE_P (mode))
38256 {
38257 *total = cost->fadd;
38258 return false;
38259 }
38260 else if (FLOAT_MODE_P (mode))
38261 {
38262 /* ??? SSE vector cost should be used here. */
38263 *total = cost->fadd;
38264 return false;
38265 }
38266 /* FALLTHRU */
38267
38268 case AND:
38269 case IOR:
38270 case XOR:
38271 if (GET_MODE_CLASS (mode) == MODE_INT
38272 && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
38273 {
38274 *total = (cost->add * 2
38275 + (rtx_cost (XEXP (x, 0), outer_code, opno, speed)
38276 << (GET_MODE (XEXP (x, 0)) != DImode))
38277 + (rtx_cost (XEXP (x, 1), outer_code, opno, speed)
38278 << (GET_MODE (XEXP (x, 1)) != DImode)));
38279 return true;
38280 }
38281 /* FALLTHRU */
38282
38283 case NEG:
38284 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38285 {
38286 /* ??? SSE cost should be used here. */
38287 *total = cost->fchs;
38288 return false;
38289 }
38290 else if (X87_FLOAT_MODE_P (mode))
38291 {
38292 *total = cost->fchs;
38293 return false;
38294 }
38295 else if (FLOAT_MODE_P (mode))
38296 {
38297 /* ??? SSE vector cost should be used here. */
38298 *total = cost->fchs;
38299 return false;
38300 }
38301 /* FALLTHRU */
38302
38303 case NOT:
38304 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
38305 {
38306 /* ??? Should be SSE vector operation cost. */
38307 /* At least for published AMD latencies, this really is the same
38308 as the latency for a simple fpu operation like fabs. */
38309 *total = cost->fabs;
38310 }
38311 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
38312 *total = cost->add * 2;
38313 else
38314 *total = cost->add;
38315 return false;
38316
38317 case COMPARE:
38318 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
38319 && XEXP (XEXP (x, 0), 1) == const1_rtx
38320 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
38321 && XEXP (x, 1) == const0_rtx)
38322 {
38323 /* This kind of construct is implemented using test[bwl].
38324 Treat it as if we had an AND. */
38325 *total = (cost->add
38326 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, opno, speed)
38327 + rtx_cost (const1_rtx, outer_code, opno, speed));
38328 return true;
38329 }
38330 return false;
38331
38332 case FLOAT_EXTEND:
38333 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
38334 *total = 0;
38335 return false;
38336
38337 case ABS:
38338 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38339 /* ??? SSE cost should be used here. */
38340 *total = cost->fabs;
38341 else if (X87_FLOAT_MODE_P (mode))
38342 *total = cost->fabs;
38343 else if (FLOAT_MODE_P (mode))
38344 /* ??? SSE vector cost should be used here. */
38345 *total = cost->fabs;
38346 return false;
38347
38348 case SQRT:
38349 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38350 /* ??? SSE cost should be used here. */
38351 *total = cost->fsqrt;
38352 else if (X87_FLOAT_MODE_P (mode))
38353 *total = cost->fsqrt;
38354 else if (FLOAT_MODE_P (mode))
38355 /* ??? SSE vector cost should be used here. */
38356 *total = cost->fsqrt;
38357 return false;
38358
38359 case UNSPEC:
38360 if (XINT (x, 1) == UNSPEC_TP)
38361 *total = 0;
38362 return false;
38363
38364 case VEC_SELECT:
38365 case VEC_CONCAT:
38366 case VEC_DUPLICATE:
38367 /* ??? Assume all of these vector manipulation patterns are
38368 recognizable. In which case they all pretty much have the
38369 same cost. */
38370 *total = cost->fabs;
38371 return true;
38372 case VEC_MERGE:
38373 mask = XEXP (x, 2);
38374 /* This is masked instruction, assume the same cost,
38375 as nonmasked variant. */
38376 if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
38377 *total = rtx_cost (XEXP (x, 0), outer_code, opno, speed);
38378 else
38379 *total = cost->fabs;
38380 return true;
38381
38382 default:
38383 return false;
38384 }
38385 }
38386
38387 #if TARGET_MACHO
38388
38389 static int current_machopic_label_num;
38390
38391 /* Given a symbol name and its associated stub, write out the
38392 definition of the stub. */
38393
38394 void
38395 machopic_output_stub (FILE *file, const char *symb, const char *stub)
38396 {
38397 unsigned int length;
38398 char *binder_name, *symbol_name, lazy_ptr_name[32];
38399 int label = ++current_machopic_label_num;
38400
38401 /* For 64-bit we shouldn't get here. */
38402 gcc_assert (!TARGET_64BIT);
38403
38404 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
38405 symb = targetm.strip_name_encoding (symb);
38406
38407 length = strlen (stub);
38408 binder_name = XALLOCAVEC (char, length + 32);
38409 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
38410
38411 length = strlen (symb);
38412 symbol_name = XALLOCAVEC (char, length + 32);
38413 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
38414
38415 sprintf (lazy_ptr_name, "L%d$lz", label);
38416
38417 if (MACHOPIC_ATT_STUB)
38418 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
38419 else if (MACHOPIC_PURE)
38420 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
38421 else
38422 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
38423
38424 fprintf (file, "%s:\n", stub);
38425 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
38426
38427 if (MACHOPIC_ATT_STUB)
38428 {
38429 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
38430 }
38431 else if (MACHOPIC_PURE)
38432 {
38433 /* PIC stub. */
38434 /* 25-byte PIC stub using "CALL get_pc_thunk". */
38435 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
38436 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
38437 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
38438 label, lazy_ptr_name, label);
38439 fprintf (file, "\tjmp\t*%%ecx\n");
38440 }
38441 else
38442 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
38443
38444 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
38445 it needs no stub-binding-helper. */
38446 if (MACHOPIC_ATT_STUB)
38447 return;
38448
38449 fprintf (file, "%s:\n", binder_name);
38450
38451 if (MACHOPIC_PURE)
38452 {
38453 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
38454 fprintf (file, "\tpushl\t%%ecx\n");
38455 }
38456 else
38457 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
38458
38459 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
38460
38461 /* N.B. Keep the correspondence of these
38462 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
38463 old-pic/new-pic/non-pic stubs; altering this will break
38464 compatibility with existing dylibs. */
38465 if (MACHOPIC_PURE)
38466 {
38467 /* 25-byte PIC stub using "CALL get_pc_thunk". */
38468 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
38469 }
38470 else
38471 /* 16-byte -mdynamic-no-pic stub. */
38472 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
38473
38474 fprintf (file, "%s:\n", lazy_ptr_name);
38475 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
38476 fprintf (file, ASM_LONG "%s\n", binder_name);
38477 }
38478 #endif /* TARGET_MACHO */
38479
38480 /* Order the registers for register allocator. */
38481
38482 void
38483 x86_order_regs_for_local_alloc (void)
38484 {
38485 int pos = 0;
38486 int i;
38487
38488 /* First allocate the local general purpose registers. */
38489 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
38490 if (GENERAL_REGNO_P (i) && call_used_regs[i])
38491 reg_alloc_order [pos++] = i;
38492
38493 /* Global general purpose registers. */
38494 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
38495 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
38496 reg_alloc_order [pos++] = i;
38497
38498 /* x87 registers come first in case we are doing FP math
38499 using them. */
38500 if (!TARGET_SSE_MATH)
38501 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
38502 reg_alloc_order [pos++] = i;
38503
38504 /* SSE registers. */
38505 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
38506 reg_alloc_order [pos++] = i;
38507 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
38508 reg_alloc_order [pos++] = i;
38509
38510 /* Extended REX SSE registers. */
38511 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
38512 reg_alloc_order [pos++] = i;
38513
38514 /* Mask register. */
38515 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
38516 reg_alloc_order [pos++] = i;
38517
38518 /* x87 registers. */
38519 if (TARGET_SSE_MATH)
38520 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
38521 reg_alloc_order [pos++] = i;
38522
38523 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
38524 reg_alloc_order [pos++] = i;
38525
38526 /* Initialize the rest of array as we do not allocate some registers
38527 at all. */
38528 while (pos < FIRST_PSEUDO_REGISTER)
38529 reg_alloc_order [pos++] = 0;
38530 }
38531
38532 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
38533 in struct attribute_spec handler. */
38534 static tree
38535 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
38536 tree args,
38537 int,
38538 bool *no_add_attrs)
38539 {
38540 if (TREE_CODE (*node) != FUNCTION_TYPE
38541 && TREE_CODE (*node) != METHOD_TYPE
38542 && TREE_CODE (*node) != FIELD_DECL
38543 && TREE_CODE (*node) != TYPE_DECL)
38544 {
38545 warning (OPT_Wattributes, "%qE attribute only applies to functions",
38546 name);
38547 *no_add_attrs = true;
38548 return NULL_TREE;
38549 }
38550 if (TARGET_64BIT)
38551 {
38552 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
38553 name);
38554 *no_add_attrs = true;
38555 return NULL_TREE;
38556 }
38557 if (is_attribute_p ("callee_pop_aggregate_return", name))
38558 {
38559 tree cst;
38560
38561 cst = TREE_VALUE (args);
38562 if (TREE_CODE (cst) != INTEGER_CST)
38563 {
38564 warning (OPT_Wattributes,
38565 "%qE attribute requires an integer constant argument",
38566 name);
38567 *no_add_attrs = true;
38568 }
38569 else if (compare_tree_int (cst, 0) != 0
38570 && compare_tree_int (cst, 1) != 0)
38571 {
38572 warning (OPT_Wattributes,
38573 "argument to %qE attribute is neither zero, nor one",
38574 name);
38575 *no_add_attrs = true;
38576 }
38577
38578 return NULL_TREE;
38579 }
38580
38581 return NULL_TREE;
38582 }
38583
38584 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
38585 struct attribute_spec.handler. */
38586 static tree
38587 ix86_handle_abi_attribute (tree *node, tree name, tree, int,
38588 bool *no_add_attrs)
38589 {
38590 if (TREE_CODE (*node) != FUNCTION_TYPE
38591 && TREE_CODE (*node) != METHOD_TYPE
38592 && TREE_CODE (*node) != FIELD_DECL
38593 && TREE_CODE (*node) != TYPE_DECL)
38594 {
38595 warning (OPT_Wattributes, "%qE attribute only applies to functions",
38596 name);
38597 *no_add_attrs = true;
38598 return NULL_TREE;
38599 }
38600
38601 /* Can combine regparm with all attributes but fastcall. */
38602 if (is_attribute_p ("ms_abi", name))
38603 {
38604 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
38605 {
38606 error ("ms_abi and sysv_abi attributes are not compatible");
38607 }
38608
38609 return NULL_TREE;
38610 }
38611 else if (is_attribute_p ("sysv_abi", name))
38612 {
38613 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
38614 {
38615 error ("ms_abi and sysv_abi attributes are not compatible");
38616 }
38617
38618 return NULL_TREE;
38619 }
38620
38621 return NULL_TREE;
38622 }
38623
38624 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
38625 struct attribute_spec.handler. */
38626 static tree
38627 ix86_handle_struct_attribute (tree *node, tree name, tree, int,
38628 bool *no_add_attrs)
38629 {
38630 tree *type = NULL;
38631 if (DECL_P (*node))
38632 {
38633 if (TREE_CODE (*node) == TYPE_DECL)
38634 type = &TREE_TYPE (*node);
38635 }
38636 else
38637 type = node;
38638
38639 if (!(type && RECORD_OR_UNION_TYPE_P (*type)))
38640 {
38641 warning (OPT_Wattributes, "%qE attribute ignored",
38642 name);
38643 *no_add_attrs = true;
38644 }
38645
38646 else if ((is_attribute_p ("ms_struct", name)
38647 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
38648 || ((is_attribute_p ("gcc_struct", name)
38649 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
38650 {
38651 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
38652 name);
38653 *no_add_attrs = true;
38654 }
38655
38656 return NULL_TREE;
38657 }
38658
38659 static tree
38660 ix86_handle_fndecl_attribute (tree *node, tree name, tree, int,
38661 bool *no_add_attrs)
38662 {
38663 if (TREE_CODE (*node) != FUNCTION_DECL)
38664 {
38665 warning (OPT_Wattributes, "%qE attribute only applies to functions",
38666 name);
38667 *no_add_attrs = true;
38668 }
38669 return NULL_TREE;
38670 }
38671
38672 static bool
38673 ix86_ms_bitfield_layout_p (const_tree record_type)
38674 {
38675 return ((TARGET_MS_BITFIELD_LAYOUT
38676 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
38677 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
38678 }
38679
38680 /* Returns an expression indicating where the this parameter is
38681 located on entry to the FUNCTION. */
38682
38683 static rtx
38684 x86_this_parameter (tree function)
38685 {
38686 tree type = TREE_TYPE (function);
38687 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
38688 int nregs;
38689
38690 if (TARGET_64BIT)
38691 {
38692 const int *parm_regs;
38693
38694 if (ix86_function_type_abi (type) == MS_ABI)
38695 parm_regs = x86_64_ms_abi_int_parameter_registers;
38696 else
38697 parm_regs = x86_64_int_parameter_registers;
38698 return gen_rtx_REG (Pmode, parm_regs[aggr]);
38699 }
38700
38701 nregs = ix86_function_regparm (type, function);
38702
38703 if (nregs > 0 && !stdarg_p (type))
38704 {
38705 int regno;
38706 unsigned int ccvt = ix86_get_callcvt (type);
38707
38708 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
38709 regno = aggr ? DX_REG : CX_REG;
38710 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
38711 {
38712 regno = CX_REG;
38713 if (aggr)
38714 return gen_rtx_MEM (SImode,
38715 plus_constant (Pmode, stack_pointer_rtx, 4));
38716 }
38717 else
38718 {
38719 regno = AX_REG;
38720 if (aggr)
38721 {
38722 regno = DX_REG;
38723 if (nregs == 1)
38724 return gen_rtx_MEM (SImode,
38725 plus_constant (Pmode,
38726 stack_pointer_rtx, 4));
38727 }
38728 }
38729 return gen_rtx_REG (SImode, regno);
38730 }
38731
38732 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
38733 aggr ? 8 : 4));
38734 }
38735
38736 /* Determine whether x86_output_mi_thunk can succeed. */
38737
38738 static bool
38739 x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
38740 const_tree function)
38741 {
38742 /* 64-bit can handle anything. */
38743 if (TARGET_64BIT)
38744 return true;
38745
38746 /* For 32-bit, everything's fine if we have one free register. */
38747 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
38748 return true;
38749
38750 /* Need a free register for vcall_offset. */
38751 if (vcall_offset)
38752 return false;
38753
38754 /* Need a free register for GOT references. */
38755 if (flag_pic && !targetm.binds_local_p (function))
38756 return false;
38757
38758 /* Otherwise ok. */
38759 return true;
38760 }
38761
38762 /* Output the assembler code for a thunk function. THUNK_DECL is the
38763 declaration for the thunk function itself, FUNCTION is the decl for
38764 the target function. DELTA is an immediate constant offset to be
38765 added to THIS. If VCALL_OFFSET is nonzero, the word at
38766 *(*this + vcall_offset) should be added to THIS. */
38767
38768 static void
38769 x86_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
38770 HOST_WIDE_INT vcall_offset, tree function)
38771 {
38772 rtx this_param = x86_this_parameter (function);
38773 rtx this_reg, tmp, fnaddr;
38774 unsigned int tmp_regno;
38775
38776 if (TARGET_64BIT)
38777 tmp_regno = R10_REG;
38778 else
38779 {
38780 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
38781 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
38782 tmp_regno = AX_REG;
38783 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
38784 tmp_regno = DX_REG;
38785 else
38786 tmp_regno = CX_REG;
38787 }
38788
38789 emit_note (NOTE_INSN_PROLOGUE_END);
38790
38791 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
38792 pull it in now and let DELTA benefit. */
38793 if (REG_P (this_param))
38794 this_reg = this_param;
38795 else if (vcall_offset)
38796 {
38797 /* Put the this parameter into %eax. */
38798 this_reg = gen_rtx_REG (Pmode, AX_REG);
38799 emit_move_insn (this_reg, this_param);
38800 }
38801 else
38802 this_reg = NULL_RTX;
38803
38804 /* Adjust the this parameter by a fixed constant. */
38805 if (delta)
38806 {
38807 rtx delta_rtx = GEN_INT (delta);
38808 rtx delta_dst = this_reg ? this_reg : this_param;
38809
38810 if (TARGET_64BIT)
38811 {
38812 if (!x86_64_general_operand (delta_rtx, Pmode))
38813 {
38814 tmp = gen_rtx_REG (Pmode, tmp_regno);
38815 emit_move_insn (tmp, delta_rtx);
38816 delta_rtx = tmp;
38817 }
38818 }
38819
38820 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
38821 }
38822
38823 /* Adjust the this parameter by a value stored in the vtable. */
38824 if (vcall_offset)
38825 {
38826 rtx vcall_addr, vcall_mem, this_mem;
38827
38828 tmp = gen_rtx_REG (Pmode, tmp_regno);
38829
38830 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
38831 if (Pmode != ptr_mode)
38832 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
38833 emit_move_insn (tmp, this_mem);
38834
38835 /* Adjust the this parameter. */
38836 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
38837 if (TARGET_64BIT
38838 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
38839 {
38840 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
38841 emit_move_insn (tmp2, GEN_INT (vcall_offset));
38842 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
38843 }
38844
38845 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
38846 if (Pmode != ptr_mode)
38847 emit_insn (gen_addsi_1_zext (this_reg,
38848 gen_rtx_REG (ptr_mode,
38849 REGNO (this_reg)),
38850 vcall_mem));
38851 else
38852 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
38853 }
38854
38855 /* If necessary, drop THIS back to its stack slot. */
38856 if (this_reg && this_reg != this_param)
38857 emit_move_insn (this_param, this_reg);
38858
38859 fnaddr = XEXP (DECL_RTL (function), 0);
38860 if (TARGET_64BIT)
38861 {
38862 if (!flag_pic || targetm.binds_local_p (function)
38863 || TARGET_PECOFF)
38864 ;
38865 else
38866 {
38867 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
38868 tmp = gen_rtx_CONST (Pmode, tmp);
38869 fnaddr = gen_const_mem (Pmode, tmp);
38870 }
38871 }
38872 else
38873 {
38874 if (!flag_pic || targetm.binds_local_p (function))
38875 ;
38876 #if TARGET_MACHO
38877 else if (TARGET_MACHO)
38878 {
38879 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
38880 fnaddr = XEXP (fnaddr, 0);
38881 }
38882 #endif /* TARGET_MACHO */
38883 else
38884 {
38885 tmp = gen_rtx_REG (Pmode, CX_REG);
38886 output_set_got (tmp, NULL_RTX);
38887
38888 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
38889 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
38890 fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
38891 fnaddr = gen_const_mem (Pmode, fnaddr);
38892 }
38893 }
38894
38895 /* Our sibling call patterns do not allow memories, because we have no
38896 predicate that can distinguish between frame and non-frame memory.
38897 For our purposes here, we can get away with (ab)using a jump pattern,
38898 because we're going to do no optimization. */
38899 if (MEM_P (fnaddr))
38900 {
38901 if (sibcall_insn_operand (fnaddr, word_mode))
38902 {
38903 tmp = gen_rtx_CALL (VOIDmode, fnaddr, const0_rtx);
38904 tmp = emit_call_insn (tmp);
38905 SIBLING_CALL_P (tmp) = 1;
38906 }
38907 else
38908 emit_jump_insn (gen_indirect_jump (fnaddr));
38909 }
38910 else
38911 {
38912 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
38913 fnaddr = legitimize_pic_address (fnaddr,
38914 gen_rtx_REG (Pmode, tmp_regno));
38915
38916 if (!sibcall_insn_operand (fnaddr, word_mode))
38917 {
38918 tmp = gen_rtx_REG (word_mode, tmp_regno);
38919 if (GET_MODE (fnaddr) != word_mode)
38920 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
38921 emit_move_insn (tmp, fnaddr);
38922 fnaddr = tmp;
38923 }
38924
38925 tmp = gen_rtx_MEM (QImode, fnaddr);
38926 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
38927 tmp = emit_call_insn (tmp);
38928 SIBLING_CALL_P (tmp) = 1;
38929 }
38930 emit_barrier ();
38931
38932 /* Emit just enough of rest_of_compilation to get the insns emitted.
38933 Note that use_thunk calls assemble_start_function et al. */
38934 tmp = get_insns ();
38935 shorten_branches (tmp);
38936 final_start_function (tmp, file, 1);
38937 final (tmp, file, 1);
38938 final_end_function ();
38939 }
38940
38941 static void
38942 x86_file_start (void)
38943 {
38944 default_file_start ();
38945 if (TARGET_16BIT)
38946 fputs ("\t.code16gcc\n", asm_out_file);
38947 #if TARGET_MACHO
38948 darwin_file_start ();
38949 #endif
38950 if (X86_FILE_START_VERSION_DIRECTIVE)
38951 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
38952 if (X86_FILE_START_FLTUSED)
38953 fputs ("\t.global\t__fltused\n", asm_out_file);
38954 if (ix86_asm_dialect == ASM_INTEL)
38955 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
38956 }
38957
38958 int
38959 x86_field_alignment (tree field, int computed)
38960 {
38961 enum machine_mode mode;
38962 tree type = TREE_TYPE (field);
38963
38964 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
38965 return computed;
38966 mode = TYPE_MODE (strip_array_types (type));
38967 if (mode == DFmode || mode == DCmode
38968 || GET_MODE_CLASS (mode) == MODE_INT
38969 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
38970 return MIN (32, computed);
38971 return computed;
38972 }
38973
38974 /* Output assembler code to FILE to increment profiler label # LABELNO
38975 for profiling a function entry. */
38976 void
38977 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
38978 {
38979 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
38980 : MCOUNT_NAME);
38981
38982 if (TARGET_64BIT)
38983 {
38984 #ifndef NO_PROFILE_COUNTERS
38985 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
38986 #endif
38987
38988 if (!TARGET_PECOFF && flag_pic)
38989 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
38990 else
38991 fprintf (file, "\tcall\t%s\n", mcount_name);
38992 }
38993 else if (flag_pic)
38994 {
38995 #ifndef NO_PROFILE_COUNTERS
38996 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
38997 LPREFIX, labelno);
38998 #endif
38999 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
39000 }
39001 else
39002 {
39003 #ifndef NO_PROFILE_COUNTERS
39004 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
39005 LPREFIX, labelno);
39006 #endif
39007 fprintf (file, "\tcall\t%s\n", mcount_name);
39008 }
39009 }
39010
39011 /* We don't have exact information about the insn sizes, but we may assume
39012 quite safely that we are informed about all 1 byte insns and memory
39013 address sizes. This is enough to eliminate unnecessary padding in
39014 99% of cases. */
39015
39016 static int
39017 min_insn_size (rtx insn)
39018 {
39019 int l = 0, len;
39020
39021 if (!INSN_P (insn) || !active_insn_p (insn))
39022 return 0;
39023
39024 /* Discard alignments we've emit and jump instructions. */
39025 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
39026 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
39027 return 0;
39028
39029 /* Important case - calls are always 5 bytes.
39030 It is common to have many calls in the row. */
39031 if (CALL_P (insn)
39032 && symbolic_reference_mentioned_p (PATTERN (insn))
39033 && !SIBLING_CALL_P (insn))
39034 return 5;
39035 len = get_attr_length (insn);
39036 if (len <= 1)
39037 return 1;
39038
39039 /* For normal instructions we rely on get_attr_length being exact,
39040 with a few exceptions. */
39041 if (!JUMP_P (insn))
39042 {
39043 enum attr_type type = get_attr_type (insn);
39044
39045 switch (type)
39046 {
39047 case TYPE_MULTI:
39048 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
39049 || asm_noperands (PATTERN (insn)) >= 0)
39050 return 0;
39051 break;
39052 case TYPE_OTHER:
39053 case TYPE_FCMP:
39054 break;
39055 default:
39056 /* Otherwise trust get_attr_length. */
39057 return len;
39058 }
39059
39060 l = get_attr_length_address (insn);
39061 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
39062 l = 4;
39063 }
39064 if (l)
39065 return 1+l;
39066 else
39067 return 2;
39068 }
39069
39070 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
39071
39072 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
39073 window. */
39074
39075 static void
39076 ix86_avoid_jump_mispredicts (void)
39077 {
39078 rtx insn, start = get_insns ();
39079 int nbytes = 0, njumps = 0;
39080 int isjump = 0;
39081
39082 /* Look for all minimal intervals of instructions containing 4 jumps.
39083 The intervals are bounded by START and INSN. NBYTES is the total
39084 size of instructions in the interval including INSN and not including
39085 START. When the NBYTES is smaller than 16 bytes, it is possible
39086 that the end of START and INSN ends up in the same 16byte page.
39087
39088 The smallest offset in the page INSN can start is the case where START
39089 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
39090 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
39091
39092 Don't consider asm goto as jump, while it can contain a jump, it doesn't
39093 have to, control transfer to label(s) can be performed through other
39094 means, and also we estimate minimum length of all asm stmts as 0. */
39095 for (insn = start; insn; insn = NEXT_INSN (insn))
39096 {
39097 int min_size;
39098
39099 if (LABEL_P (insn))
39100 {
39101 int align = label_to_alignment (insn);
39102 int max_skip = label_to_max_skip (insn);
39103
39104 if (max_skip > 15)
39105 max_skip = 15;
39106 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
39107 already in the current 16 byte page, because otherwise
39108 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
39109 bytes to reach 16 byte boundary. */
39110 if (align <= 0
39111 || (align <= 3 && max_skip != (1 << align) - 1))
39112 max_skip = 0;
39113 if (dump_file)
39114 fprintf (dump_file, "Label %i with max_skip %i\n",
39115 INSN_UID (insn), max_skip);
39116 if (max_skip)
39117 {
39118 while (nbytes + max_skip >= 16)
39119 {
39120 start = NEXT_INSN (start);
39121 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
39122 || CALL_P (start))
39123 njumps--, isjump = 1;
39124 else
39125 isjump = 0;
39126 nbytes -= min_insn_size (start);
39127 }
39128 }
39129 continue;
39130 }
39131
39132 min_size = min_insn_size (insn);
39133 nbytes += min_size;
39134 if (dump_file)
39135 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
39136 INSN_UID (insn), min_size);
39137 if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
39138 || CALL_P (insn))
39139 njumps++;
39140 else
39141 continue;
39142
39143 while (njumps > 3)
39144 {
39145 start = NEXT_INSN (start);
39146 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
39147 || CALL_P (start))
39148 njumps--, isjump = 1;
39149 else
39150 isjump = 0;
39151 nbytes -= min_insn_size (start);
39152 }
39153 gcc_assert (njumps >= 0);
39154 if (dump_file)
39155 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
39156 INSN_UID (start), INSN_UID (insn), nbytes);
39157
39158 if (njumps == 3 && isjump && nbytes < 16)
39159 {
39160 int padsize = 15 - nbytes + min_insn_size (insn);
39161
39162 if (dump_file)
39163 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
39164 INSN_UID (insn), padsize);
39165 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
39166 }
39167 }
39168 }
39169 #endif
39170
39171 /* AMD Athlon works faster
39172 when RET is not destination of conditional jump or directly preceded
39173 by other jump instruction. We avoid the penalty by inserting NOP just
39174 before the RET instructions in such cases. */
39175 static void
39176 ix86_pad_returns (void)
39177 {
39178 edge e;
39179 edge_iterator ei;
39180
39181 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
39182 {
39183 basic_block bb = e->src;
39184 rtx ret = BB_END (bb);
39185 rtx prev;
39186 bool replace = false;
39187
39188 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
39189 || optimize_bb_for_size_p (bb))
39190 continue;
39191 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
39192 if (active_insn_p (prev) || LABEL_P (prev))
39193 break;
39194 if (prev && LABEL_P (prev))
39195 {
39196 edge e;
39197 edge_iterator ei;
39198
39199 FOR_EACH_EDGE (e, ei, bb->preds)
39200 if (EDGE_FREQUENCY (e) && e->src->index >= 0
39201 && !(e->flags & EDGE_FALLTHRU))
39202 {
39203 replace = true;
39204 break;
39205 }
39206 }
39207 if (!replace)
39208 {
39209 prev = prev_active_insn (ret);
39210 if (prev
39211 && ((JUMP_P (prev) && any_condjump_p (prev))
39212 || CALL_P (prev)))
39213 replace = true;
39214 /* Empty functions get branch mispredict even when
39215 the jump destination is not visible to us. */
39216 if (!prev && !optimize_function_for_size_p (cfun))
39217 replace = true;
39218 }
39219 if (replace)
39220 {
39221 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
39222 delete_insn (ret);
39223 }
39224 }
39225 }
39226
39227 /* Count the minimum number of instructions in BB. Return 4 if the
39228 number of instructions >= 4. */
39229
39230 static int
39231 ix86_count_insn_bb (basic_block bb)
39232 {
39233 rtx insn;
39234 int insn_count = 0;
39235
39236 /* Count number of instructions in this block. Return 4 if the number
39237 of instructions >= 4. */
39238 FOR_BB_INSNS (bb, insn)
39239 {
39240 /* Only happen in exit blocks. */
39241 if (JUMP_P (insn)
39242 && ANY_RETURN_P (PATTERN (insn)))
39243 break;
39244
39245 if (NONDEBUG_INSN_P (insn)
39246 && GET_CODE (PATTERN (insn)) != USE
39247 && GET_CODE (PATTERN (insn)) != CLOBBER)
39248 {
39249 insn_count++;
39250 if (insn_count >= 4)
39251 return insn_count;
39252 }
39253 }
39254
39255 return insn_count;
39256 }
39257
39258
39259 /* Count the minimum number of instructions in code path in BB.
39260 Return 4 if the number of instructions >= 4. */
39261
39262 static int
39263 ix86_count_insn (basic_block bb)
39264 {
39265 edge e;
39266 edge_iterator ei;
39267 int min_prev_count;
39268
39269 /* Only bother counting instructions along paths with no
39270 more than 2 basic blocks between entry and exit. Given
39271 that BB has an edge to exit, determine if a predecessor
39272 of BB has an edge from entry. If so, compute the number
39273 of instructions in the predecessor block. If there
39274 happen to be multiple such blocks, compute the minimum. */
39275 min_prev_count = 4;
39276 FOR_EACH_EDGE (e, ei, bb->preds)
39277 {
39278 edge prev_e;
39279 edge_iterator prev_ei;
39280
39281 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
39282 {
39283 min_prev_count = 0;
39284 break;
39285 }
39286 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
39287 {
39288 if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
39289 {
39290 int count = ix86_count_insn_bb (e->src);
39291 if (count < min_prev_count)
39292 min_prev_count = count;
39293 break;
39294 }
39295 }
39296 }
39297
39298 if (min_prev_count < 4)
39299 min_prev_count += ix86_count_insn_bb (bb);
39300
39301 return min_prev_count;
39302 }
39303
39304 /* Pad short function to 4 instructions. */
39305
39306 static void
39307 ix86_pad_short_function (void)
39308 {
39309 edge e;
39310 edge_iterator ei;
39311
39312 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
39313 {
39314 rtx ret = BB_END (e->src);
39315 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
39316 {
39317 int insn_count = ix86_count_insn (e->src);
39318
39319 /* Pad short function. */
39320 if (insn_count < 4)
39321 {
39322 rtx insn = ret;
39323
39324 /* Find epilogue. */
39325 while (insn
39326 && (!NOTE_P (insn)
39327 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
39328 insn = PREV_INSN (insn);
39329
39330 if (!insn)
39331 insn = ret;
39332
39333 /* Two NOPs count as one instruction. */
39334 insn_count = 2 * (4 - insn_count);
39335 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
39336 }
39337 }
39338 }
39339 }
39340
39341 /* Fix up a Windows system unwinder issue. If an EH region falls through into
39342 the epilogue, the Windows system unwinder will apply epilogue logic and
39343 produce incorrect offsets. This can be avoided by adding a nop between
39344 the last insn that can throw and the first insn of the epilogue. */
39345
39346 static void
39347 ix86_seh_fixup_eh_fallthru (void)
39348 {
39349 edge e;
39350 edge_iterator ei;
39351
39352 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
39353 {
39354 rtx insn, next;
39355
39356 /* Find the beginning of the epilogue. */
39357 for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
39358 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
39359 break;
39360 if (insn == NULL)
39361 continue;
39362
39363 /* We only care about preceding insns that can throw. */
39364 insn = prev_active_insn (insn);
39365 if (insn == NULL || !can_throw_internal (insn))
39366 continue;
39367
39368 /* Do not separate calls from their debug information. */
39369 for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
39370 if (NOTE_P (next)
39371 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
39372 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION))
39373 insn = next;
39374 else
39375 break;
39376
39377 emit_insn_after (gen_nops (const1_rtx), insn);
39378 }
39379 }
39380
39381 /* Implement machine specific optimizations. We implement padding of returns
39382 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
39383 static void
39384 ix86_reorg (void)
39385 {
39386 /* We are freeing block_for_insn in the toplev to keep compatibility
39387 with old MDEP_REORGS that are not CFG based. Recompute it now. */
39388 compute_bb_for_insn ();
39389
39390 if (TARGET_SEH && current_function_has_exception_handlers ())
39391 ix86_seh_fixup_eh_fallthru ();
39392
39393 if (optimize && optimize_function_for_speed_p (cfun))
39394 {
39395 if (TARGET_PAD_SHORT_FUNCTION)
39396 ix86_pad_short_function ();
39397 else if (TARGET_PAD_RETURNS)
39398 ix86_pad_returns ();
39399 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
39400 if (TARGET_FOUR_JUMP_LIMIT)
39401 ix86_avoid_jump_mispredicts ();
39402 #endif
39403 }
39404 }
39405
39406 /* Return nonzero when QImode register that must be represented via REX prefix
39407 is used. */
39408 bool
39409 x86_extended_QIreg_mentioned_p (rtx insn)
39410 {
39411 int i;
39412 extract_insn_cached (insn);
39413 for (i = 0; i < recog_data.n_operands; i++)
39414 if (GENERAL_REG_P (recog_data.operand[i])
39415 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
39416 return true;
39417 return false;
39418 }
39419
39420 /* Return nonzero when P points to register encoded via REX prefix.
39421 Called via for_each_rtx. */
39422 static int
39423 extended_reg_mentioned_1 (rtx *p, void *)
39424 {
39425 unsigned int regno;
39426 if (!REG_P (*p))
39427 return 0;
39428 regno = REGNO (*p);
39429 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
39430 }
39431
39432 /* Return true when INSN mentions register that must be encoded using REX
39433 prefix. */
39434 bool
39435 x86_extended_reg_mentioned_p (rtx insn)
39436 {
39437 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
39438 extended_reg_mentioned_1, NULL);
39439 }
39440
39441 /* If profitable, negate (without causing overflow) integer constant
39442 of mode MODE at location LOC. Return true in this case. */
39443 bool
39444 x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
39445 {
39446 HOST_WIDE_INT val;
39447
39448 if (!CONST_INT_P (*loc))
39449 return false;
39450
39451 switch (mode)
39452 {
39453 case DImode:
39454 /* DImode x86_64 constants must fit in 32 bits. */
39455 gcc_assert (x86_64_immediate_operand (*loc, mode));
39456
39457 mode = SImode;
39458 break;
39459
39460 case SImode:
39461 case HImode:
39462 case QImode:
39463 break;
39464
39465 default:
39466 gcc_unreachable ();
39467 }
39468
39469 /* Avoid overflows. */
39470 if (mode_signbit_p (mode, *loc))
39471 return false;
39472
39473 val = INTVAL (*loc);
39474
39475 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
39476 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
39477 if ((val < 0 && val != -128)
39478 || val == 128)
39479 {
39480 *loc = GEN_INT (-val);
39481 return true;
39482 }
39483
39484 return false;
39485 }
39486
39487 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
39488 optabs would emit if we didn't have TFmode patterns. */
39489
39490 void
39491 x86_emit_floatuns (rtx operands[2])
39492 {
39493 rtx neglab, donelab, i0, i1, f0, in, out;
39494 enum machine_mode mode, inmode;
39495
39496 inmode = GET_MODE (operands[1]);
39497 gcc_assert (inmode == SImode || inmode == DImode);
39498
39499 out = operands[0];
39500 in = force_reg (inmode, operands[1]);
39501 mode = GET_MODE (out);
39502 neglab = gen_label_rtx ();
39503 donelab = gen_label_rtx ();
39504 f0 = gen_reg_rtx (mode);
39505
39506 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
39507
39508 expand_float (out, in, 0);
39509
39510 emit_jump_insn (gen_jump (donelab));
39511 emit_barrier ();
39512
39513 emit_label (neglab);
39514
39515 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
39516 1, OPTAB_DIRECT);
39517 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
39518 1, OPTAB_DIRECT);
39519 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
39520
39521 expand_float (f0, i0, 0);
39522
39523 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
39524
39525 emit_label (donelab);
39526 }
39527 \f
39528 /* AVX512F does support 64-byte integer vector operations,
39529 thus the longest vector we are faced with is V64QImode. */
39530 #define MAX_VECT_LEN 64
39531
39532 struct expand_vec_perm_d
39533 {
39534 rtx target, op0, op1;
39535 unsigned char perm[MAX_VECT_LEN];
39536 enum machine_mode vmode;
39537 unsigned char nelt;
39538 bool one_operand_p;
39539 bool testing_p;
39540 };
39541
39542 static bool canonicalize_perm (struct expand_vec_perm_d *d);
39543 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
39544 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
39545
39546 /* Get a vector mode of the same size as the original but with elements
39547 twice as wide. This is only guaranteed to apply to integral vectors. */
39548
39549 static inline enum machine_mode
39550 get_mode_wider_vector (enum machine_mode o)
39551 {
39552 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
39553 enum machine_mode n = GET_MODE_WIDER_MODE (o);
39554 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
39555 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
39556 return n;
39557 }
39558
39559 /* A subroutine of ix86_expand_vector_init_duplicate. Tries to
39560 fill target with val via vec_duplicate. */
39561
39562 static bool
39563 ix86_vector_duplicate_value (enum machine_mode mode, rtx target, rtx val)
39564 {
39565 bool ok;
39566 rtx insn, dup;
39567
39568 /* First attempt to recognize VAL as-is. */
39569 dup = gen_rtx_VEC_DUPLICATE (mode, val);
39570 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
39571 if (recog_memoized (insn) < 0)
39572 {
39573 rtx seq;
39574 /* If that fails, force VAL into a register. */
39575
39576 start_sequence ();
39577 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
39578 seq = get_insns ();
39579 end_sequence ();
39580 if (seq)
39581 emit_insn_before (seq, insn);
39582
39583 ok = recog_memoized (insn) >= 0;
39584 gcc_assert (ok);
39585 }
39586 return true;
39587 }
39588
39589 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
39590 with all elements equal to VAR. Return true if successful. */
39591
39592 static bool
39593 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
39594 rtx target, rtx val)
39595 {
39596 bool ok;
39597
39598 switch (mode)
39599 {
39600 case V2SImode:
39601 case V2SFmode:
39602 if (!mmx_ok)
39603 return false;
39604 /* FALLTHRU */
39605
39606 case V4DFmode:
39607 case V4DImode:
39608 case V8SFmode:
39609 case V8SImode:
39610 case V2DFmode:
39611 case V2DImode:
39612 case V4SFmode:
39613 case V4SImode:
39614 case V16SImode:
39615 case V8DImode:
39616 case V16SFmode:
39617 case V8DFmode:
39618 return ix86_vector_duplicate_value (mode, target, val);
39619
39620 case V4HImode:
39621 if (!mmx_ok)
39622 return false;
39623 if (TARGET_SSE || TARGET_3DNOW_A)
39624 {
39625 rtx x;
39626
39627 val = gen_lowpart (SImode, val);
39628 x = gen_rtx_TRUNCATE (HImode, val);
39629 x = gen_rtx_VEC_DUPLICATE (mode, x);
39630 emit_insn (gen_rtx_SET (VOIDmode, target, x));
39631 return true;
39632 }
39633 goto widen;
39634
39635 case V8QImode:
39636 if (!mmx_ok)
39637 return false;
39638 goto widen;
39639
39640 case V8HImode:
39641 if (TARGET_SSE2)
39642 {
39643 struct expand_vec_perm_d dperm;
39644 rtx tmp1, tmp2;
39645
39646 permute:
39647 memset (&dperm, 0, sizeof (dperm));
39648 dperm.target = target;
39649 dperm.vmode = mode;
39650 dperm.nelt = GET_MODE_NUNITS (mode);
39651 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
39652 dperm.one_operand_p = true;
39653
39654 /* Extend to SImode using a paradoxical SUBREG. */
39655 tmp1 = gen_reg_rtx (SImode);
39656 emit_move_insn (tmp1, gen_lowpart (SImode, val));
39657
39658 /* Insert the SImode value as low element of a V4SImode vector. */
39659 tmp2 = gen_reg_rtx (V4SImode);
39660 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
39661 emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2));
39662
39663 ok = (expand_vec_perm_1 (&dperm)
39664 || expand_vec_perm_broadcast_1 (&dperm));
39665 gcc_assert (ok);
39666 return ok;
39667 }
39668 goto widen;
39669
39670 case V16QImode:
39671 if (TARGET_SSE2)
39672 goto permute;
39673 goto widen;
39674
39675 widen:
39676 /* Replicate the value once into the next wider mode and recurse. */
39677 {
39678 enum machine_mode smode, wsmode, wvmode;
39679 rtx x;
39680
39681 smode = GET_MODE_INNER (mode);
39682 wvmode = get_mode_wider_vector (mode);
39683 wsmode = GET_MODE_INNER (wvmode);
39684
39685 val = convert_modes (wsmode, smode, val, true);
39686 x = expand_simple_binop (wsmode, ASHIFT, val,
39687 GEN_INT (GET_MODE_BITSIZE (smode)),
39688 NULL_RTX, 1, OPTAB_LIB_WIDEN);
39689 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
39690
39691 x = gen_reg_rtx (wvmode);
39692 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
39693 gcc_assert (ok);
39694 emit_move_insn (target, gen_lowpart (GET_MODE (target), x));
39695 return ok;
39696 }
39697
39698 case V16HImode:
39699 case V32QImode:
39700 {
39701 enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
39702 rtx x = gen_reg_rtx (hvmode);
39703
39704 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
39705 gcc_assert (ok);
39706
39707 x = gen_rtx_VEC_CONCAT (mode, x, x);
39708 emit_insn (gen_rtx_SET (VOIDmode, target, x));
39709 }
39710 return true;
39711
39712 default:
39713 return false;
39714 }
39715 }
39716
39717 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
39718 whose ONE_VAR element is VAR, and other elements are zero. Return true
39719 if successful. */
39720
39721 static bool
39722 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
39723 rtx target, rtx var, int one_var)
39724 {
39725 enum machine_mode vsimode;
39726 rtx new_target;
39727 rtx x, tmp;
39728 bool use_vector_set = false;
39729
39730 switch (mode)
39731 {
39732 case V2DImode:
39733 /* For SSE4.1, we normally use vector set. But if the second
39734 element is zero and inter-unit moves are OK, we use movq
39735 instead. */
39736 use_vector_set = (TARGET_64BIT && TARGET_SSE4_1
39737 && !(TARGET_INTER_UNIT_MOVES_TO_VEC
39738 && one_var == 0));
39739 break;
39740 case V16QImode:
39741 case V4SImode:
39742 case V4SFmode:
39743 use_vector_set = TARGET_SSE4_1;
39744 break;
39745 case V8HImode:
39746 use_vector_set = TARGET_SSE2;
39747 break;
39748 case V4HImode:
39749 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
39750 break;
39751 case V32QImode:
39752 case V16HImode:
39753 case V8SImode:
39754 case V8SFmode:
39755 case V4DFmode:
39756 use_vector_set = TARGET_AVX;
39757 break;
39758 case V4DImode:
39759 /* Use ix86_expand_vector_set in 64bit mode only. */
39760 use_vector_set = TARGET_AVX && TARGET_64BIT;
39761 break;
39762 default:
39763 break;
39764 }
39765
39766 if (use_vector_set)
39767 {
39768 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
39769 var = force_reg (GET_MODE_INNER (mode), var);
39770 ix86_expand_vector_set (mmx_ok, target, var, one_var);
39771 return true;
39772 }
39773
39774 switch (mode)
39775 {
39776 case V2SFmode:
39777 case V2SImode:
39778 if (!mmx_ok)
39779 return false;
39780 /* FALLTHRU */
39781
39782 case V2DFmode:
39783 case V2DImode:
39784 if (one_var != 0)
39785 return false;
39786 var = force_reg (GET_MODE_INNER (mode), var);
39787 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
39788 emit_insn (gen_rtx_SET (VOIDmode, target, x));
39789 return true;
39790
39791 case V4SFmode:
39792 case V4SImode:
39793 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
39794 new_target = gen_reg_rtx (mode);
39795 else
39796 new_target = target;
39797 var = force_reg (GET_MODE_INNER (mode), var);
39798 x = gen_rtx_VEC_DUPLICATE (mode, var);
39799 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
39800 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
39801 if (one_var != 0)
39802 {
39803 /* We need to shuffle the value to the correct position, so
39804 create a new pseudo to store the intermediate result. */
39805
39806 /* With SSE2, we can use the integer shuffle insns. */
39807 if (mode != V4SFmode && TARGET_SSE2)
39808 {
39809 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
39810 const1_rtx,
39811 GEN_INT (one_var == 1 ? 0 : 1),
39812 GEN_INT (one_var == 2 ? 0 : 1),
39813 GEN_INT (one_var == 3 ? 0 : 1)));
39814 if (target != new_target)
39815 emit_move_insn (target, new_target);
39816 return true;
39817 }
39818
39819 /* Otherwise convert the intermediate result to V4SFmode and
39820 use the SSE1 shuffle instructions. */
39821 if (mode != V4SFmode)
39822 {
39823 tmp = gen_reg_rtx (V4SFmode);
39824 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
39825 }
39826 else
39827 tmp = new_target;
39828
39829 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
39830 const1_rtx,
39831 GEN_INT (one_var == 1 ? 0 : 1),
39832 GEN_INT (one_var == 2 ? 0+4 : 1+4),
39833 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
39834
39835 if (mode != V4SFmode)
39836 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
39837 else if (tmp != target)
39838 emit_move_insn (target, tmp);
39839 }
39840 else if (target != new_target)
39841 emit_move_insn (target, new_target);
39842 return true;
39843
39844 case V8HImode:
39845 case V16QImode:
39846 vsimode = V4SImode;
39847 goto widen;
39848 case V4HImode:
39849 case V8QImode:
39850 if (!mmx_ok)
39851 return false;
39852 vsimode = V2SImode;
39853 goto widen;
39854 widen:
39855 if (one_var != 0)
39856 return false;
39857
39858 /* Zero extend the variable element to SImode and recurse. */
39859 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
39860
39861 x = gen_reg_rtx (vsimode);
39862 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
39863 var, one_var))
39864 gcc_unreachable ();
39865
39866 emit_move_insn (target, gen_lowpart (mode, x));
39867 return true;
39868
39869 default:
39870 return false;
39871 }
39872 }
39873
39874 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
39875 consisting of the values in VALS. It is known that all elements
39876 except ONE_VAR are constants. Return true if successful. */
39877
39878 static bool
39879 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
39880 rtx target, rtx vals, int one_var)
39881 {
39882 rtx var = XVECEXP (vals, 0, one_var);
39883 enum machine_mode wmode;
39884 rtx const_vec, x;
39885
39886 const_vec = copy_rtx (vals);
39887 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
39888 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
39889
39890 switch (mode)
39891 {
39892 case V2DFmode:
39893 case V2DImode:
39894 case V2SFmode:
39895 case V2SImode:
39896 /* For the two element vectors, it's just as easy to use
39897 the general case. */
39898 return false;
39899
39900 case V4DImode:
39901 /* Use ix86_expand_vector_set in 64bit mode only. */
39902 if (!TARGET_64BIT)
39903 return false;
39904 case V4DFmode:
39905 case V8SFmode:
39906 case V8SImode:
39907 case V16HImode:
39908 case V32QImode:
39909 case V4SFmode:
39910 case V4SImode:
39911 case V8HImode:
39912 case V4HImode:
39913 break;
39914
39915 case V16QImode:
39916 if (TARGET_SSE4_1)
39917 break;
39918 wmode = V8HImode;
39919 goto widen;
39920 case V8QImode:
39921 wmode = V4HImode;
39922 goto widen;
39923 widen:
39924 /* There's no way to set one QImode entry easily. Combine
39925 the variable value with its adjacent constant value, and
39926 promote to an HImode set. */
39927 x = XVECEXP (vals, 0, one_var ^ 1);
39928 if (one_var & 1)
39929 {
39930 var = convert_modes (HImode, QImode, var, true);
39931 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
39932 NULL_RTX, 1, OPTAB_LIB_WIDEN);
39933 x = GEN_INT (INTVAL (x) & 0xff);
39934 }
39935 else
39936 {
39937 var = convert_modes (HImode, QImode, var, true);
39938 x = gen_int_mode (INTVAL (x) << 8, HImode);
39939 }
39940 if (x != const0_rtx)
39941 var = expand_simple_binop (HImode, IOR, var, x, var,
39942 1, OPTAB_LIB_WIDEN);
39943
39944 x = gen_reg_rtx (wmode);
39945 emit_move_insn (x, gen_lowpart (wmode, const_vec));
39946 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
39947
39948 emit_move_insn (target, gen_lowpart (mode, x));
39949 return true;
39950
39951 default:
39952 return false;
39953 }
39954
39955 emit_move_insn (target, const_vec);
39956 ix86_expand_vector_set (mmx_ok, target, var, one_var);
39957 return true;
39958 }
39959
39960 /* A subroutine of ix86_expand_vector_init_general. Use vector
39961 concatenate to handle the most general case: all values variable,
39962 and none identical. */
39963
39964 static void
39965 ix86_expand_vector_init_concat (enum machine_mode mode,
39966 rtx target, rtx *ops, int n)
39967 {
39968 enum machine_mode cmode, hmode = VOIDmode, gmode = VOIDmode;
39969 rtx first[16], second[8], third[4];
39970 rtvec v;
39971 int i, j;
39972
39973 switch (n)
39974 {
39975 case 2:
39976 switch (mode)
39977 {
39978 case V16SImode:
39979 cmode = V8SImode;
39980 break;
39981 case V16SFmode:
39982 cmode = V8SFmode;
39983 break;
39984 case V8DImode:
39985 cmode = V4DImode;
39986 break;
39987 case V8DFmode:
39988 cmode = V4DFmode;
39989 break;
39990 case V8SImode:
39991 cmode = V4SImode;
39992 break;
39993 case V8SFmode:
39994 cmode = V4SFmode;
39995 break;
39996 case V4DImode:
39997 cmode = V2DImode;
39998 break;
39999 case V4DFmode:
40000 cmode = V2DFmode;
40001 break;
40002 case V4SImode:
40003 cmode = V2SImode;
40004 break;
40005 case V4SFmode:
40006 cmode = V2SFmode;
40007 break;
40008 case V2DImode:
40009 cmode = DImode;
40010 break;
40011 case V2SImode:
40012 cmode = SImode;
40013 break;
40014 case V2DFmode:
40015 cmode = DFmode;
40016 break;
40017 case V2SFmode:
40018 cmode = SFmode;
40019 break;
40020 default:
40021 gcc_unreachable ();
40022 }
40023
40024 if (!register_operand (ops[1], cmode))
40025 ops[1] = force_reg (cmode, ops[1]);
40026 if (!register_operand (ops[0], cmode))
40027 ops[0] = force_reg (cmode, ops[0]);
40028 emit_insn (gen_rtx_SET (VOIDmode, target,
40029 gen_rtx_VEC_CONCAT (mode, ops[0],
40030 ops[1])));
40031 break;
40032
40033 case 4:
40034 switch (mode)
40035 {
40036 case V4DImode:
40037 cmode = V2DImode;
40038 break;
40039 case V4DFmode:
40040 cmode = V2DFmode;
40041 break;
40042 case V4SImode:
40043 cmode = V2SImode;
40044 break;
40045 case V4SFmode:
40046 cmode = V2SFmode;
40047 break;
40048 default:
40049 gcc_unreachable ();
40050 }
40051 goto half;
40052
40053 case 8:
40054 switch (mode)
40055 {
40056 case V8DImode:
40057 cmode = V2DImode;
40058 hmode = V4DImode;
40059 break;
40060 case V8DFmode:
40061 cmode = V2DFmode;
40062 hmode = V4DFmode;
40063 break;
40064 case V8SImode:
40065 cmode = V2SImode;
40066 hmode = V4SImode;
40067 break;
40068 case V8SFmode:
40069 cmode = V2SFmode;
40070 hmode = V4SFmode;
40071 break;
40072 default:
40073 gcc_unreachable ();
40074 }
40075 goto half;
40076
40077 case 16:
40078 switch (mode)
40079 {
40080 case V16SImode:
40081 cmode = V2SImode;
40082 hmode = V4SImode;
40083 gmode = V8SImode;
40084 break;
40085 case V16SFmode:
40086 cmode = V2SFmode;
40087 hmode = V4SFmode;
40088 gmode = V8SFmode;
40089 break;
40090 default:
40091 gcc_unreachable ();
40092 }
40093 goto half;
40094
40095 half:
40096 /* FIXME: We process inputs backward to help RA. PR 36222. */
40097 i = n - 1;
40098 j = (n >> 1) - 1;
40099 for (; i > 0; i -= 2, j--)
40100 {
40101 first[j] = gen_reg_rtx (cmode);
40102 v = gen_rtvec (2, ops[i - 1], ops[i]);
40103 ix86_expand_vector_init (false, first[j],
40104 gen_rtx_PARALLEL (cmode, v));
40105 }
40106
40107 n >>= 1;
40108 if (n > 4)
40109 {
40110 gcc_assert (hmode != VOIDmode);
40111 gcc_assert (gmode != VOIDmode);
40112 for (i = j = 0; i < n; i += 2, j++)
40113 {
40114 second[j] = gen_reg_rtx (hmode);
40115 ix86_expand_vector_init_concat (hmode, second [j],
40116 &first [i], 2);
40117 }
40118 n >>= 1;
40119 for (i = j = 0; i < n; i += 2, j++)
40120 {
40121 third[j] = gen_reg_rtx (gmode);
40122 ix86_expand_vector_init_concat (gmode, third[j],
40123 &second[i], 2);
40124 }
40125 n >>= 1;
40126 ix86_expand_vector_init_concat (mode, target, third, n);
40127 }
40128 else if (n > 2)
40129 {
40130 gcc_assert (hmode != VOIDmode);
40131 for (i = j = 0; i < n; i += 2, j++)
40132 {
40133 second[j] = gen_reg_rtx (hmode);
40134 ix86_expand_vector_init_concat (hmode, second [j],
40135 &first [i], 2);
40136 }
40137 n >>= 1;
40138 ix86_expand_vector_init_concat (mode, target, second, n);
40139 }
40140 else
40141 ix86_expand_vector_init_concat (mode, target, first, n);
40142 break;
40143
40144 default:
40145 gcc_unreachable ();
40146 }
40147 }
40148
40149 /* A subroutine of ix86_expand_vector_init_general. Use vector
40150 interleave to handle the most general case: all values variable,
40151 and none identical. */
40152
40153 static void
40154 ix86_expand_vector_init_interleave (enum machine_mode mode,
40155 rtx target, rtx *ops, int n)
40156 {
40157 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
40158 int i, j;
40159 rtx op0, op1;
40160 rtx (*gen_load_even) (rtx, rtx, rtx);
40161 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
40162 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
40163
40164 switch (mode)
40165 {
40166 case V8HImode:
40167 gen_load_even = gen_vec_setv8hi;
40168 gen_interleave_first_low = gen_vec_interleave_lowv4si;
40169 gen_interleave_second_low = gen_vec_interleave_lowv2di;
40170 inner_mode = HImode;
40171 first_imode = V4SImode;
40172 second_imode = V2DImode;
40173 third_imode = VOIDmode;
40174 break;
40175 case V16QImode:
40176 gen_load_even = gen_vec_setv16qi;
40177 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
40178 gen_interleave_second_low = gen_vec_interleave_lowv4si;
40179 inner_mode = QImode;
40180 first_imode = V8HImode;
40181 second_imode = V4SImode;
40182 third_imode = V2DImode;
40183 break;
40184 default:
40185 gcc_unreachable ();
40186 }
40187
40188 for (i = 0; i < n; i++)
40189 {
40190 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
40191 op0 = gen_reg_rtx (SImode);
40192 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
40193
40194 /* Insert the SImode value as low element of V4SImode vector. */
40195 op1 = gen_reg_rtx (V4SImode);
40196 op0 = gen_rtx_VEC_MERGE (V4SImode,
40197 gen_rtx_VEC_DUPLICATE (V4SImode,
40198 op0),
40199 CONST0_RTX (V4SImode),
40200 const1_rtx);
40201 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
40202
40203 /* Cast the V4SImode vector back to a vector in orignal mode. */
40204 op0 = gen_reg_rtx (mode);
40205 emit_move_insn (op0, gen_lowpart (mode, op1));
40206
40207 /* Load even elements into the second position. */
40208 emit_insn (gen_load_even (op0,
40209 force_reg (inner_mode,
40210 ops [i + i + 1]),
40211 const1_rtx));
40212
40213 /* Cast vector to FIRST_IMODE vector. */
40214 ops[i] = gen_reg_rtx (first_imode);
40215 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
40216 }
40217
40218 /* Interleave low FIRST_IMODE vectors. */
40219 for (i = j = 0; i < n; i += 2, j++)
40220 {
40221 op0 = gen_reg_rtx (first_imode);
40222 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
40223
40224 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
40225 ops[j] = gen_reg_rtx (second_imode);
40226 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
40227 }
40228
40229 /* Interleave low SECOND_IMODE vectors. */
40230 switch (second_imode)
40231 {
40232 case V4SImode:
40233 for (i = j = 0; i < n / 2; i += 2, j++)
40234 {
40235 op0 = gen_reg_rtx (second_imode);
40236 emit_insn (gen_interleave_second_low (op0, ops[i],
40237 ops[i + 1]));
40238
40239 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
40240 vector. */
40241 ops[j] = gen_reg_rtx (third_imode);
40242 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
40243 }
40244 second_imode = V2DImode;
40245 gen_interleave_second_low = gen_vec_interleave_lowv2di;
40246 /* FALLTHRU */
40247
40248 case V2DImode:
40249 op0 = gen_reg_rtx (second_imode);
40250 emit_insn (gen_interleave_second_low (op0, ops[0],
40251 ops[1]));
40252
40253 /* Cast the SECOND_IMODE vector back to a vector on original
40254 mode. */
40255 emit_insn (gen_rtx_SET (VOIDmode, target,
40256 gen_lowpart (mode, op0)));
40257 break;
40258
40259 default:
40260 gcc_unreachable ();
40261 }
40262 }
40263
40264 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
40265 all values variable, and none identical. */
40266
40267 static void
40268 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
40269 rtx target, rtx vals)
40270 {
40271 rtx ops[64], op0, op1;
40272 enum machine_mode half_mode = VOIDmode;
40273 int n, i;
40274
40275 switch (mode)
40276 {
40277 case V2SFmode:
40278 case V2SImode:
40279 if (!mmx_ok && !TARGET_SSE)
40280 break;
40281 /* FALLTHRU */
40282
40283 case V16SImode:
40284 case V16SFmode:
40285 case V8DFmode:
40286 case V8DImode:
40287 case V8SFmode:
40288 case V8SImode:
40289 case V4DFmode:
40290 case V4DImode:
40291 case V4SFmode:
40292 case V4SImode:
40293 case V2DFmode:
40294 case V2DImode:
40295 n = GET_MODE_NUNITS (mode);
40296 for (i = 0; i < n; i++)
40297 ops[i] = XVECEXP (vals, 0, i);
40298 ix86_expand_vector_init_concat (mode, target, ops, n);
40299 return;
40300
40301 case V32QImode:
40302 half_mode = V16QImode;
40303 goto half;
40304
40305 case V16HImode:
40306 half_mode = V8HImode;
40307 goto half;
40308
40309 half:
40310 n = GET_MODE_NUNITS (mode);
40311 for (i = 0; i < n; i++)
40312 ops[i] = XVECEXP (vals, 0, i);
40313 op0 = gen_reg_rtx (half_mode);
40314 op1 = gen_reg_rtx (half_mode);
40315 ix86_expand_vector_init_interleave (half_mode, op0, ops,
40316 n >> 2);
40317 ix86_expand_vector_init_interleave (half_mode, op1,
40318 &ops [n >> 1], n >> 2);
40319 emit_insn (gen_rtx_SET (VOIDmode, target,
40320 gen_rtx_VEC_CONCAT (mode, op0, op1)));
40321 return;
40322
40323 case V16QImode:
40324 if (!TARGET_SSE4_1)
40325 break;
40326 /* FALLTHRU */
40327
40328 case V8HImode:
40329 if (!TARGET_SSE2)
40330 break;
40331
40332 /* Don't use ix86_expand_vector_init_interleave if we can't
40333 move from GPR to SSE register directly. */
40334 if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
40335 break;
40336
40337 n = GET_MODE_NUNITS (mode);
40338 for (i = 0; i < n; i++)
40339 ops[i] = XVECEXP (vals, 0, i);
40340 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
40341 return;
40342
40343 case V4HImode:
40344 case V8QImode:
40345 break;
40346
40347 default:
40348 gcc_unreachable ();
40349 }
40350
40351 {
40352 int i, j, n_elts, n_words, n_elt_per_word;
40353 enum machine_mode inner_mode;
40354 rtx words[4], shift;
40355
40356 inner_mode = GET_MODE_INNER (mode);
40357 n_elts = GET_MODE_NUNITS (mode);
40358 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
40359 n_elt_per_word = n_elts / n_words;
40360 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
40361
40362 for (i = 0; i < n_words; ++i)
40363 {
40364 rtx word = NULL_RTX;
40365
40366 for (j = 0; j < n_elt_per_word; ++j)
40367 {
40368 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
40369 elt = convert_modes (word_mode, inner_mode, elt, true);
40370
40371 if (j == 0)
40372 word = elt;
40373 else
40374 {
40375 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
40376 word, 1, OPTAB_LIB_WIDEN);
40377 word = expand_simple_binop (word_mode, IOR, word, elt,
40378 word, 1, OPTAB_LIB_WIDEN);
40379 }
40380 }
40381
40382 words[i] = word;
40383 }
40384
40385 if (n_words == 1)
40386 emit_move_insn (target, gen_lowpart (mode, words[0]));
40387 else if (n_words == 2)
40388 {
40389 rtx tmp = gen_reg_rtx (mode);
40390 emit_clobber (tmp);
40391 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
40392 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
40393 emit_move_insn (target, tmp);
40394 }
40395 else if (n_words == 4)
40396 {
40397 rtx tmp = gen_reg_rtx (V4SImode);
40398 gcc_assert (word_mode == SImode);
40399 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
40400 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
40401 emit_move_insn (target, gen_lowpart (mode, tmp));
40402 }
40403 else
40404 gcc_unreachable ();
40405 }
40406 }
40407
40408 /* Initialize vector TARGET via VALS. Suppress the use of MMX
40409 instructions unless MMX_OK is true. */
40410
40411 void
40412 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
40413 {
40414 enum machine_mode mode = GET_MODE (target);
40415 enum machine_mode inner_mode = GET_MODE_INNER (mode);
40416 int n_elts = GET_MODE_NUNITS (mode);
40417 int n_var = 0, one_var = -1;
40418 bool all_same = true, all_const_zero = true;
40419 int i;
40420 rtx x;
40421
40422 for (i = 0; i < n_elts; ++i)
40423 {
40424 x = XVECEXP (vals, 0, i);
40425 if (!(CONST_INT_P (x)
40426 || GET_CODE (x) == CONST_DOUBLE
40427 || GET_CODE (x) == CONST_FIXED))
40428 n_var++, one_var = i;
40429 else if (x != CONST0_RTX (inner_mode))
40430 all_const_zero = false;
40431 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
40432 all_same = false;
40433 }
40434
40435 /* Constants are best loaded from the constant pool. */
40436 if (n_var == 0)
40437 {
40438 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
40439 return;
40440 }
40441
40442 /* If all values are identical, broadcast the value. */
40443 if (all_same
40444 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
40445 XVECEXP (vals, 0, 0)))
40446 return;
40447
40448 /* Values where only one field is non-constant are best loaded from
40449 the pool and overwritten via move later. */
40450 if (n_var == 1)
40451 {
40452 if (all_const_zero
40453 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
40454 XVECEXP (vals, 0, one_var),
40455 one_var))
40456 return;
40457
40458 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
40459 return;
40460 }
40461
40462 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
40463 }
40464
40465 void
40466 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
40467 {
40468 enum machine_mode mode = GET_MODE (target);
40469 enum machine_mode inner_mode = GET_MODE_INNER (mode);
40470 enum machine_mode half_mode;
40471 bool use_vec_merge = false;
40472 rtx tmp;
40473 static rtx (*gen_extract[6][2]) (rtx, rtx)
40474 = {
40475 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
40476 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
40477 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
40478 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
40479 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
40480 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
40481 };
40482 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
40483 = {
40484 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
40485 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
40486 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
40487 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
40488 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
40489 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
40490 };
40491 int i, j, n;
40492
40493 switch (mode)
40494 {
40495 case V2SFmode:
40496 case V2SImode:
40497 if (mmx_ok)
40498 {
40499 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
40500 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
40501 if (elt == 0)
40502 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
40503 else
40504 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
40505 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40506 return;
40507 }
40508 break;
40509
40510 case V2DImode:
40511 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
40512 if (use_vec_merge)
40513 break;
40514
40515 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
40516 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
40517 if (elt == 0)
40518 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
40519 else
40520 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
40521 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40522 return;
40523
40524 case V2DFmode:
40525 {
40526 rtx op0, op1;
40527
40528 /* For the two element vectors, we implement a VEC_CONCAT with
40529 the extraction of the other element. */
40530
40531 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
40532 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
40533
40534 if (elt == 0)
40535 op0 = val, op1 = tmp;
40536 else
40537 op0 = tmp, op1 = val;
40538
40539 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
40540 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40541 }
40542 return;
40543
40544 case V4SFmode:
40545 use_vec_merge = TARGET_SSE4_1;
40546 if (use_vec_merge)
40547 break;
40548
40549 switch (elt)
40550 {
40551 case 0:
40552 use_vec_merge = true;
40553 break;
40554
40555 case 1:
40556 /* tmp = target = A B C D */
40557 tmp = copy_to_reg (target);
40558 /* target = A A B B */
40559 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
40560 /* target = X A B B */
40561 ix86_expand_vector_set (false, target, val, 0);
40562 /* target = A X C D */
40563 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
40564 const1_rtx, const0_rtx,
40565 GEN_INT (2+4), GEN_INT (3+4)));
40566 return;
40567
40568 case 2:
40569 /* tmp = target = A B C D */
40570 tmp = copy_to_reg (target);
40571 /* tmp = X B C D */
40572 ix86_expand_vector_set (false, tmp, val, 0);
40573 /* target = A B X D */
40574 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
40575 const0_rtx, const1_rtx,
40576 GEN_INT (0+4), GEN_INT (3+4)));
40577 return;
40578
40579 case 3:
40580 /* tmp = target = A B C D */
40581 tmp = copy_to_reg (target);
40582 /* tmp = X B C D */
40583 ix86_expand_vector_set (false, tmp, val, 0);
40584 /* target = A B X D */
40585 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
40586 const0_rtx, const1_rtx,
40587 GEN_INT (2+4), GEN_INT (0+4)));
40588 return;
40589
40590 default:
40591 gcc_unreachable ();
40592 }
40593 break;
40594
40595 case V4SImode:
40596 use_vec_merge = TARGET_SSE4_1;
40597 if (use_vec_merge)
40598 break;
40599
40600 /* Element 0 handled by vec_merge below. */
40601 if (elt == 0)
40602 {
40603 use_vec_merge = true;
40604 break;
40605 }
40606
40607 if (TARGET_SSE2)
40608 {
40609 /* With SSE2, use integer shuffles to swap element 0 and ELT,
40610 store into element 0, then shuffle them back. */
40611
40612 rtx order[4];
40613
40614 order[0] = GEN_INT (elt);
40615 order[1] = const1_rtx;
40616 order[2] = const2_rtx;
40617 order[3] = GEN_INT (3);
40618 order[elt] = const0_rtx;
40619
40620 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
40621 order[1], order[2], order[3]));
40622
40623 ix86_expand_vector_set (false, target, val, 0);
40624
40625 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
40626 order[1], order[2], order[3]));
40627 }
40628 else
40629 {
40630 /* For SSE1, we have to reuse the V4SF code. */
40631 rtx t = gen_reg_rtx (V4SFmode);
40632 ix86_expand_vector_set (false, t, gen_lowpart (SFmode, val), elt);
40633 emit_move_insn (target, gen_lowpart (mode, t));
40634 }
40635 return;
40636
40637 case V8HImode:
40638 use_vec_merge = TARGET_SSE2;
40639 break;
40640 case V4HImode:
40641 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
40642 break;
40643
40644 case V16QImode:
40645 use_vec_merge = TARGET_SSE4_1;
40646 break;
40647
40648 case V8QImode:
40649 break;
40650
40651 case V32QImode:
40652 half_mode = V16QImode;
40653 j = 0;
40654 n = 16;
40655 goto half;
40656
40657 case V16HImode:
40658 half_mode = V8HImode;
40659 j = 1;
40660 n = 8;
40661 goto half;
40662
40663 case V8SImode:
40664 half_mode = V4SImode;
40665 j = 2;
40666 n = 4;
40667 goto half;
40668
40669 case V4DImode:
40670 half_mode = V2DImode;
40671 j = 3;
40672 n = 2;
40673 goto half;
40674
40675 case V8SFmode:
40676 half_mode = V4SFmode;
40677 j = 4;
40678 n = 4;
40679 goto half;
40680
40681 case V4DFmode:
40682 half_mode = V2DFmode;
40683 j = 5;
40684 n = 2;
40685 goto half;
40686
40687 half:
40688 /* Compute offset. */
40689 i = elt / n;
40690 elt %= n;
40691
40692 gcc_assert (i <= 1);
40693
40694 /* Extract the half. */
40695 tmp = gen_reg_rtx (half_mode);
40696 emit_insn (gen_extract[j][i] (tmp, target));
40697
40698 /* Put val in tmp at elt. */
40699 ix86_expand_vector_set (false, tmp, val, elt);
40700
40701 /* Put it back. */
40702 emit_insn (gen_insert[j][i] (target, target, tmp));
40703 return;
40704
40705 default:
40706 break;
40707 }
40708
40709 if (use_vec_merge)
40710 {
40711 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
40712 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
40713 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40714 }
40715 else
40716 {
40717 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
40718
40719 emit_move_insn (mem, target);
40720
40721 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
40722 emit_move_insn (tmp, val);
40723
40724 emit_move_insn (target, mem);
40725 }
40726 }
40727
40728 void
40729 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
40730 {
40731 enum machine_mode mode = GET_MODE (vec);
40732 enum machine_mode inner_mode = GET_MODE_INNER (mode);
40733 bool use_vec_extr = false;
40734 rtx tmp;
40735
40736 switch (mode)
40737 {
40738 case V2SImode:
40739 case V2SFmode:
40740 if (!mmx_ok)
40741 break;
40742 /* FALLTHRU */
40743
40744 case V2DFmode:
40745 case V2DImode:
40746 use_vec_extr = true;
40747 break;
40748
40749 case V4SFmode:
40750 use_vec_extr = TARGET_SSE4_1;
40751 if (use_vec_extr)
40752 break;
40753
40754 switch (elt)
40755 {
40756 case 0:
40757 tmp = vec;
40758 break;
40759
40760 case 1:
40761 case 3:
40762 tmp = gen_reg_rtx (mode);
40763 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
40764 GEN_INT (elt), GEN_INT (elt),
40765 GEN_INT (elt+4), GEN_INT (elt+4)));
40766 break;
40767
40768 case 2:
40769 tmp = gen_reg_rtx (mode);
40770 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
40771 break;
40772
40773 default:
40774 gcc_unreachable ();
40775 }
40776 vec = tmp;
40777 use_vec_extr = true;
40778 elt = 0;
40779 break;
40780
40781 case V4SImode:
40782 use_vec_extr = TARGET_SSE4_1;
40783 if (use_vec_extr)
40784 break;
40785
40786 if (TARGET_SSE2)
40787 {
40788 switch (elt)
40789 {
40790 case 0:
40791 tmp = vec;
40792 break;
40793
40794 case 1:
40795 case 3:
40796 tmp = gen_reg_rtx (mode);
40797 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
40798 GEN_INT (elt), GEN_INT (elt),
40799 GEN_INT (elt), GEN_INT (elt)));
40800 break;
40801
40802 case 2:
40803 tmp = gen_reg_rtx (mode);
40804 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
40805 break;
40806
40807 default:
40808 gcc_unreachable ();
40809 }
40810 vec = tmp;
40811 use_vec_extr = true;
40812 elt = 0;
40813 }
40814 else
40815 {
40816 /* For SSE1, we have to reuse the V4SF code. */
40817 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
40818 gen_lowpart (V4SFmode, vec), elt);
40819 return;
40820 }
40821 break;
40822
40823 case V8HImode:
40824 use_vec_extr = TARGET_SSE2;
40825 break;
40826 case V4HImode:
40827 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
40828 break;
40829
40830 case V16QImode:
40831 use_vec_extr = TARGET_SSE4_1;
40832 break;
40833
40834 case V8SFmode:
40835 if (TARGET_AVX)
40836 {
40837 tmp = gen_reg_rtx (V4SFmode);
40838 if (elt < 4)
40839 emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
40840 else
40841 emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
40842 ix86_expand_vector_extract (false, target, tmp, elt & 3);
40843 return;
40844 }
40845 break;
40846
40847 case V4DFmode:
40848 if (TARGET_AVX)
40849 {
40850 tmp = gen_reg_rtx (V2DFmode);
40851 if (elt < 2)
40852 emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
40853 else
40854 emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
40855 ix86_expand_vector_extract (false, target, tmp, elt & 1);
40856 return;
40857 }
40858 break;
40859
40860 case V32QImode:
40861 if (TARGET_AVX)
40862 {
40863 tmp = gen_reg_rtx (V16QImode);
40864 if (elt < 16)
40865 emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
40866 else
40867 emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
40868 ix86_expand_vector_extract (false, target, tmp, elt & 15);
40869 return;
40870 }
40871 break;
40872
40873 case V16HImode:
40874 if (TARGET_AVX)
40875 {
40876 tmp = gen_reg_rtx (V8HImode);
40877 if (elt < 8)
40878 emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
40879 else
40880 emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
40881 ix86_expand_vector_extract (false, target, tmp, elt & 7);
40882 return;
40883 }
40884 break;
40885
40886 case V8SImode:
40887 if (TARGET_AVX)
40888 {
40889 tmp = gen_reg_rtx (V4SImode);
40890 if (elt < 4)
40891 emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
40892 else
40893 emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
40894 ix86_expand_vector_extract (false, target, tmp, elt & 3);
40895 return;
40896 }
40897 break;
40898
40899 case V4DImode:
40900 if (TARGET_AVX)
40901 {
40902 tmp = gen_reg_rtx (V2DImode);
40903 if (elt < 2)
40904 emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
40905 else
40906 emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
40907 ix86_expand_vector_extract (false, target, tmp, elt & 1);
40908 return;
40909 }
40910 break;
40911
40912 case V16SFmode:
40913 tmp = gen_reg_rtx (V8SFmode);
40914 if (elt < 8)
40915 emit_insn (gen_vec_extract_lo_v16sf (tmp, vec));
40916 else
40917 emit_insn (gen_vec_extract_hi_v16sf (tmp, vec));
40918 ix86_expand_vector_extract (false, target, tmp, elt & 7);
40919 return;
40920
40921 case V8DFmode:
40922 tmp = gen_reg_rtx (V4DFmode);
40923 if (elt < 4)
40924 emit_insn (gen_vec_extract_lo_v8df (tmp, vec));
40925 else
40926 emit_insn (gen_vec_extract_hi_v8df (tmp, vec));
40927 ix86_expand_vector_extract (false, target, tmp, elt & 3);
40928 return;
40929
40930 case V16SImode:
40931 tmp = gen_reg_rtx (V8SImode);
40932 if (elt < 8)
40933 emit_insn (gen_vec_extract_lo_v16si (tmp, vec));
40934 else
40935 emit_insn (gen_vec_extract_hi_v16si (tmp, vec));
40936 ix86_expand_vector_extract (false, target, tmp, elt & 7);
40937 return;
40938
40939 case V8DImode:
40940 tmp = gen_reg_rtx (V4DImode);
40941 if (elt < 4)
40942 emit_insn (gen_vec_extract_lo_v8di (tmp, vec));
40943 else
40944 emit_insn (gen_vec_extract_hi_v8di (tmp, vec));
40945 ix86_expand_vector_extract (false, target, tmp, elt & 3);
40946 return;
40947
40948 case V8QImode:
40949 /* ??? Could extract the appropriate HImode element and shift. */
40950 default:
40951 break;
40952 }
40953
40954 if (use_vec_extr)
40955 {
40956 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
40957 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
40958
40959 /* Let the rtl optimizers know about the zero extension performed. */
40960 if (inner_mode == QImode || inner_mode == HImode)
40961 {
40962 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
40963 target = gen_lowpart (SImode, target);
40964 }
40965
40966 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40967 }
40968 else
40969 {
40970 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
40971
40972 emit_move_insn (mem, vec);
40973
40974 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
40975 emit_move_insn (target, tmp);
40976 }
40977 }
40978
40979 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
40980 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
40981 The upper bits of DEST are undefined, though they shouldn't cause
40982 exceptions (some bits from src or all zeros are ok). */
40983
40984 static void
40985 emit_reduc_half (rtx dest, rtx src, int i)
40986 {
40987 rtx tem, d = dest;
40988 switch (GET_MODE (src))
40989 {
40990 case V4SFmode:
40991 if (i == 128)
40992 tem = gen_sse_movhlps (dest, src, src);
40993 else
40994 tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx,
40995 GEN_INT (1 + 4), GEN_INT (1 + 4));
40996 break;
40997 case V2DFmode:
40998 tem = gen_vec_interleave_highv2df (dest, src, src);
40999 break;
41000 case V16QImode:
41001 case V8HImode:
41002 case V4SImode:
41003 case V2DImode:
41004 d = gen_reg_rtx (V1TImode);
41005 tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src),
41006 GEN_INT (i / 2));
41007 break;
41008 case V8SFmode:
41009 if (i == 256)
41010 tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx);
41011 else
41012 tem = gen_avx_shufps256 (dest, src, src,
41013 GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
41014 break;
41015 case V4DFmode:
41016 if (i == 256)
41017 tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx);
41018 else
41019 tem = gen_avx_shufpd256 (dest, src, src, const1_rtx);
41020 break;
41021 case V32QImode:
41022 case V16HImode:
41023 case V8SImode:
41024 case V4DImode:
41025 if (i == 256)
41026 {
41027 if (GET_MODE (dest) != V4DImode)
41028 d = gen_reg_rtx (V4DImode);
41029 tem = gen_avx2_permv2ti (d, gen_lowpart (V4DImode, src),
41030 gen_lowpart (V4DImode, src),
41031 const1_rtx);
41032 }
41033 else
41034 {
41035 d = gen_reg_rtx (V2TImode);
41036 tem = gen_avx2_lshrv2ti3 (d, gen_lowpart (V2TImode, src),
41037 GEN_INT (i / 2));
41038 }
41039 break;
41040 case V16SImode:
41041 case V16SFmode:
41042 case V8DImode:
41043 case V8DFmode:
41044 if (i > 128)
41045 tem = gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode, dest),
41046 gen_lowpart (V16SImode, src),
41047 gen_lowpart (V16SImode, src),
41048 GEN_INT (0x4 + (i == 512 ? 4 : 0)),
41049 GEN_INT (0x5 + (i == 512 ? 4 : 0)),
41050 GEN_INT (0x6 + (i == 512 ? 4 : 0)),
41051 GEN_INT (0x7 + (i == 512 ? 4 : 0)),
41052 GEN_INT (0xC), GEN_INT (0xD),
41053 GEN_INT (0xE), GEN_INT (0xF),
41054 GEN_INT (0x10), GEN_INT (0x11),
41055 GEN_INT (0x12), GEN_INT (0x13),
41056 GEN_INT (0x14), GEN_INT (0x15),
41057 GEN_INT (0x16), GEN_INT (0x17));
41058 else
41059 tem = gen_avx512f_pshufd_1 (gen_lowpart (V16SImode, dest),
41060 gen_lowpart (V16SImode, src),
41061 GEN_INT (i == 128 ? 0x2 : 0x1),
41062 GEN_INT (0x3),
41063 GEN_INT (0x3),
41064 GEN_INT (0x3),
41065 GEN_INT (i == 128 ? 0x6 : 0x5),
41066 GEN_INT (0x7),
41067 GEN_INT (0x7),
41068 GEN_INT (0x7),
41069 GEN_INT (i == 128 ? 0xA : 0x9),
41070 GEN_INT (0xB),
41071 GEN_INT (0xB),
41072 GEN_INT (0xB),
41073 GEN_INT (i == 128 ? 0xE : 0xD),
41074 GEN_INT (0xF),
41075 GEN_INT (0xF),
41076 GEN_INT (0xF));
41077 break;
41078 default:
41079 gcc_unreachable ();
41080 }
41081 emit_insn (tem);
41082 if (d != dest)
41083 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
41084 }
41085
41086 /* Expand a vector reduction. FN is the binary pattern to reduce;
41087 DEST is the destination; IN is the input vector. */
41088
41089 void
41090 ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
41091 {
41092 rtx half, dst, vec = in;
41093 enum machine_mode mode = GET_MODE (in);
41094 int i;
41095
41096 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
41097 if (TARGET_SSE4_1
41098 && mode == V8HImode
41099 && fn == gen_uminv8hi3)
41100 {
41101 emit_insn (gen_sse4_1_phminposuw (dest, in));
41102 return;
41103 }
41104
41105 for (i = GET_MODE_BITSIZE (mode);
41106 i > GET_MODE_BITSIZE (GET_MODE_INNER (mode));
41107 i >>= 1)
41108 {
41109 half = gen_reg_rtx (mode);
41110 emit_reduc_half (half, vec, i);
41111 if (i == GET_MODE_BITSIZE (GET_MODE_INNER (mode)) * 2)
41112 dst = dest;
41113 else
41114 dst = gen_reg_rtx (mode);
41115 emit_insn (fn (dst, half, vec));
41116 vec = dst;
41117 }
41118 }
41119 \f
41120 /* Target hook for scalar_mode_supported_p. */
41121 static bool
41122 ix86_scalar_mode_supported_p (enum machine_mode mode)
41123 {
41124 if (DECIMAL_FLOAT_MODE_P (mode))
41125 return default_decimal_float_supported_p ();
41126 else if (mode == TFmode)
41127 return true;
41128 else
41129 return default_scalar_mode_supported_p (mode);
41130 }
41131
41132 /* Implements target hook vector_mode_supported_p. */
41133 static bool
41134 ix86_vector_mode_supported_p (enum machine_mode mode)
41135 {
41136 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
41137 return true;
41138 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
41139 return true;
41140 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
41141 return true;
41142 if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
41143 return true;
41144 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
41145 return true;
41146 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
41147 return true;
41148 return false;
41149 }
41150
41151 /* Target hook for c_mode_for_suffix. */
41152 static enum machine_mode
41153 ix86_c_mode_for_suffix (char suffix)
41154 {
41155 if (suffix == 'q')
41156 return TFmode;
41157 if (suffix == 'w')
41158 return XFmode;
41159
41160 return VOIDmode;
41161 }
41162
41163 /* Worker function for TARGET_MD_ASM_CLOBBERS.
41164
41165 We do this in the new i386 backend to maintain source compatibility
41166 with the old cc0-based compiler. */
41167
41168 static tree
41169 ix86_md_asm_clobbers (tree, tree, tree clobbers)
41170 {
41171 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
41172 clobbers);
41173 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
41174 clobbers);
41175 return clobbers;
41176 }
41177
41178 /* Implements target vector targetm.asm.encode_section_info. */
41179
41180 static void ATTRIBUTE_UNUSED
41181 ix86_encode_section_info (tree decl, rtx rtl, int first)
41182 {
41183 default_encode_section_info (decl, rtl, first);
41184
41185 if (TREE_CODE (decl) == VAR_DECL
41186 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
41187 && ix86_in_large_data_p (decl))
41188 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
41189 }
41190
41191 /* Worker function for REVERSE_CONDITION. */
41192
41193 enum rtx_code
41194 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
41195 {
41196 return (mode != CCFPmode && mode != CCFPUmode
41197 ? reverse_condition (code)
41198 : reverse_condition_maybe_unordered (code));
41199 }
41200
41201 /* Output code to perform an x87 FP register move, from OPERANDS[1]
41202 to OPERANDS[0]. */
41203
41204 const char *
41205 output_387_reg_move (rtx insn, rtx *operands)
41206 {
41207 if (REG_P (operands[0]))
41208 {
41209 if (REG_P (operands[1])
41210 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
41211 {
41212 if (REGNO (operands[0]) == FIRST_STACK_REG)
41213 return output_387_ffreep (operands, 0);
41214 return "fstp\t%y0";
41215 }
41216 if (STACK_TOP_P (operands[0]))
41217 return "fld%Z1\t%y1";
41218 return "fst\t%y0";
41219 }
41220 else if (MEM_P (operands[0]))
41221 {
41222 gcc_assert (REG_P (operands[1]));
41223 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
41224 return "fstp%Z0\t%y0";
41225 else
41226 {
41227 /* There is no non-popping store to memory for XFmode.
41228 So if we need one, follow the store with a load. */
41229 if (GET_MODE (operands[0]) == XFmode)
41230 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
41231 else
41232 return "fst%Z0\t%y0";
41233 }
41234 }
41235 else
41236 gcc_unreachable();
41237 }
41238
41239 /* Output code to perform a conditional jump to LABEL, if C2 flag in
41240 FP status register is set. */
41241
41242 void
41243 ix86_emit_fp_unordered_jump (rtx label)
41244 {
41245 rtx reg = gen_reg_rtx (HImode);
41246 rtx temp;
41247
41248 emit_insn (gen_x86_fnstsw_1 (reg));
41249
41250 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
41251 {
41252 emit_insn (gen_x86_sahf_1 (reg));
41253
41254 temp = gen_rtx_REG (CCmode, FLAGS_REG);
41255 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
41256 }
41257 else
41258 {
41259 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
41260
41261 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
41262 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
41263 }
41264
41265 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
41266 gen_rtx_LABEL_REF (VOIDmode, label),
41267 pc_rtx);
41268 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
41269
41270 emit_jump_insn (temp);
41271 predict_jump (REG_BR_PROB_BASE * 10 / 100);
41272 }
41273
41274 /* Output code to perform a log1p XFmode calculation. */
41275
41276 void ix86_emit_i387_log1p (rtx op0, rtx op1)
41277 {
41278 rtx label1 = gen_label_rtx ();
41279 rtx label2 = gen_label_rtx ();
41280
41281 rtx tmp = gen_reg_rtx (XFmode);
41282 rtx tmp2 = gen_reg_rtx (XFmode);
41283 rtx test;
41284
41285 emit_insn (gen_absxf2 (tmp, op1));
41286 test = gen_rtx_GE (VOIDmode, tmp,
41287 CONST_DOUBLE_FROM_REAL_VALUE (
41288 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
41289 XFmode));
41290 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
41291
41292 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
41293 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
41294 emit_jump (label2);
41295
41296 emit_label (label1);
41297 emit_move_insn (tmp, CONST1_RTX (XFmode));
41298 emit_insn (gen_addxf3 (tmp, op1, tmp));
41299 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
41300 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
41301
41302 emit_label (label2);
41303 }
41304
41305 /* Emit code for round calculation. */
41306 void ix86_emit_i387_round (rtx op0, rtx op1)
41307 {
41308 enum machine_mode inmode = GET_MODE (op1);
41309 enum machine_mode outmode = GET_MODE (op0);
41310 rtx e1, e2, res, tmp, tmp1, half;
41311 rtx scratch = gen_reg_rtx (HImode);
41312 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
41313 rtx jump_label = gen_label_rtx ();
41314 rtx insn;
41315 rtx (*gen_abs) (rtx, rtx);
41316 rtx (*gen_neg) (rtx, rtx);
41317
41318 switch (inmode)
41319 {
41320 case SFmode:
41321 gen_abs = gen_abssf2;
41322 break;
41323 case DFmode:
41324 gen_abs = gen_absdf2;
41325 break;
41326 case XFmode:
41327 gen_abs = gen_absxf2;
41328 break;
41329 default:
41330 gcc_unreachable ();
41331 }
41332
41333 switch (outmode)
41334 {
41335 case SFmode:
41336 gen_neg = gen_negsf2;
41337 break;
41338 case DFmode:
41339 gen_neg = gen_negdf2;
41340 break;
41341 case XFmode:
41342 gen_neg = gen_negxf2;
41343 break;
41344 case HImode:
41345 gen_neg = gen_neghi2;
41346 break;
41347 case SImode:
41348 gen_neg = gen_negsi2;
41349 break;
41350 case DImode:
41351 gen_neg = gen_negdi2;
41352 break;
41353 default:
41354 gcc_unreachable ();
41355 }
41356
41357 e1 = gen_reg_rtx (inmode);
41358 e2 = gen_reg_rtx (inmode);
41359 res = gen_reg_rtx (outmode);
41360
41361 half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, inmode);
41362
41363 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
41364
41365 /* scratch = fxam(op1) */
41366 emit_insn (gen_rtx_SET (VOIDmode, scratch,
41367 gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
41368 UNSPEC_FXAM)));
41369 /* e1 = fabs(op1) */
41370 emit_insn (gen_abs (e1, op1));
41371
41372 /* e2 = e1 + 0.5 */
41373 half = force_reg (inmode, half);
41374 emit_insn (gen_rtx_SET (VOIDmode, e2,
41375 gen_rtx_PLUS (inmode, e1, half)));
41376
41377 /* res = floor(e2) */
41378 if (inmode != XFmode)
41379 {
41380 tmp1 = gen_reg_rtx (XFmode);
41381
41382 emit_insn (gen_rtx_SET (VOIDmode, tmp1,
41383 gen_rtx_FLOAT_EXTEND (XFmode, e2)));
41384 }
41385 else
41386 tmp1 = e2;
41387
41388 switch (outmode)
41389 {
41390 case SFmode:
41391 case DFmode:
41392 {
41393 rtx tmp0 = gen_reg_rtx (XFmode);
41394
41395 emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
41396
41397 emit_insn (gen_rtx_SET (VOIDmode, res,
41398 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
41399 UNSPEC_TRUNC_NOOP)));
41400 }
41401 break;
41402 case XFmode:
41403 emit_insn (gen_frndintxf2_floor (res, tmp1));
41404 break;
41405 case HImode:
41406 emit_insn (gen_lfloorxfhi2 (res, tmp1));
41407 break;
41408 case SImode:
41409 emit_insn (gen_lfloorxfsi2 (res, tmp1));
41410 break;
41411 case DImode:
41412 emit_insn (gen_lfloorxfdi2 (res, tmp1));
41413 break;
41414 default:
41415 gcc_unreachable ();
41416 }
41417
41418 /* flags = signbit(a) */
41419 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02)));
41420
41421 /* if (flags) then res = -res */
41422 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
41423 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
41424 gen_rtx_LABEL_REF (VOIDmode, jump_label),
41425 pc_rtx);
41426 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
41427 predict_jump (REG_BR_PROB_BASE * 50 / 100);
41428 JUMP_LABEL (insn) = jump_label;
41429
41430 emit_insn (gen_neg (res, res));
41431
41432 emit_label (jump_label);
41433 LABEL_NUSES (jump_label) = 1;
41434
41435 emit_move_insn (op0, res);
41436 }
41437
41438 /* Output code to perform a Newton-Rhapson approximation of a single precision
41439 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
41440
41441 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
41442 {
41443 rtx x0, x1, e0, e1;
41444
41445 x0 = gen_reg_rtx (mode);
41446 e0 = gen_reg_rtx (mode);
41447 e1 = gen_reg_rtx (mode);
41448 x1 = gen_reg_rtx (mode);
41449
41450 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
41451
41452 b = force_reg (mode, b);
41453
41454 /* x0 = rcp(b) estimate */
41455 if (mode == V16SFmode || mode == V8DFmode)
41456 emit_insn (gen_rtx_SET (VOIDmode, x0,
41457 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
41458 UNSPEC_RCP14)));
41459 else
41460 emit_insn (gen_rtx_SET (VOIDmode, x0,
41461 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
41462 UNSPEC_RCP)));
41463
41464 /* e0 = x0 * b */
41465 emit_insn (gen_rtx_SET (VOIDmode, e0,
41466 gen_rtx_MULT (mode, x0, b)));
41467
41468 /* e0 = x0 * e0 */
41469 emit_insn (gen_rtx_SET (VOIDmode, e0,
41470 gen_rtx_MULT (mode, x0, e0)));
41471
41472 /* e1 = x0 + x0 */
41473 emit_insn (gen_rtx_SET (VOIDmode, e1,
41474 gen_rtx_PLUS (mode, x0, x0)));
41475
41476 /* x1 = e1 - e0 */
41477 emit_insn (gen_rtx_SET (VOIDmode, x1,
41478 gen_rtx_MINUS (mode, e1, e0)));
41479
41480 /* res = a * x1 */
41481 emit_insn (gen_rtx_SET (VOIDmode, res,
41482 gen_rtx_MULT (mode, a, x1)));
41483 }
41484
41485 /* Output code to perform a Newton-Rhapson approximation of a
41486 single precision floating point [reciprocal] square root. */
41487
41488 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
41489 bool recip)
41490 {
41491 rtx x0, e0, e1, e2, e3, mthree, mhalf;
41492 REAL_VALUE_TYPE r;
41493 int unspec;
41494
41495 x0 = gen_reg_rtx (mode);
41496 e0 = gen_reg_rtx (mode);
41497 e1 = gen_reg_rtx (mode);
41498 e2 = gen_reg_rtx (mode);
41499 e3 = gen_reg_rtx (mode);
41500
41501 real_from_integer (&r, VOIDmode, -3, SIGNED);
41502 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
41503
41504 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
41505 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
41506 unspec = UNSPEC_RSQRT;
41507
41508 if (VECTOR_MODE_P (mode))
41509 {
41510 mthree = ix86_build_const_vector (mode, true, mthree);
41511 mhalf = ix86_build_const_vector (mode, true, mhalf);
41512 /* There is no 512-bit rsqrt. There is however rsqrt14. */
41513 if (GET_MODE_SIZE (mode) == 64)
41514 unspec = UNSPEC_RSQRT14;
41515 }
41516
41517 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
41518 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
41519
41520 a = force_reg (mode, a);
41521
41522 /* x0 = rsqrt(a) estimate */
41523 emit_insn (gen_rtx_SET (VOIDmode, x0,
41524 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
41525 unspec)));
41526
41527 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
41528 if (!recip)
41529 {
41530 rtx zero, mask;
41531
41532 zero = gen_reg_rtx (mode);
41533 mask = gen_reg_rtx (mode);
41534
41535 zero = force_reg (mode, CONST0_RTX(mode));
41536
41537 /* Handle masked compare. */
41538 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
41539 {
41540 mask = gen_reg_rtx (HImode);
41541 /* Imm value 0x4 corresponds to not-equal comparison. */
41542 emit_insn (gen_avx512f_cmpv16sf3 (mask, zero, a, GEN_INT (0x4)));
41543 emit_insn (gen_avx512f_blendmv16sf (x0, zero, x0, mask));
41544 }
41545 else
41546 {
41547 emit_insn (gen_rtx_SET (VOIDmode, mask,
41548 gen_rtx_NE (mode, zero, a)));
41549
41550 emit_insn (gen_rtx_SET (VOIDmode, x0,
41551 gen_rtx_AND (mode, x0, mask)));
41552 }
41553 }
41554
41555 /* e0 = x0 * a */
41556 emit_insn (gen_rtx_SET (VOIDmode, e0,
41557 gen_rtx_MULT (mode, x0, a)));
41558 /* e1 = e0 * x0 */
41559 emit_insn (gen_rtx_SET (VOIDmode, e1,
41560 gen_rtx_MULT (mode, e0, x0)));
41561
41562 /* e2 = e1 - 3. */
41563 mthree = force_reg (mode, mthree);
41564 emit_insn (gen_rtx_SET (VOIDmode, e2,
41565 gen_rtx_PLUS (mode, e1, mthree)));
41566
41567 mhalf = force_reg (mode, mhalf);
41568 if (recip)
41569 /* e3 = -.5 * x0 */
41570 emit_insn (gen_rtx_SET (VOIDmode, e3,
41571 gen_rtx_MULT (mode, x0, mhalf)));
41572 else
41573 /* e3 = -.5 * e0 */
41574 emit_insn (gen_rtx_SET (VOIDmode, e3,
41575 gen_rtx_MULT (mode, e0, mhalf)));
41576 /* ret = e2 * e3 */
41577 emit_insn (gen_rtx_SET (VOIDmode, res,
41578 gen_rtx_MULT (mode, e2, e3)));
41579 }
41580
41581 #ifdef TARGET_SOLARIS
41582 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
41583
41584 static void
41585 i386_solaris_elf_named_section (const char *name, unsigned int flags,
41586 tree decl)
41587 {
41588 /* With Binutils 2.15, the "@unwind" marker must be specified on
41589 every occurrence of the ".eh_frame" section, not just the first
41590 one. */
41591 if (TARGET_64BIT
41592 && strcmp (name, ".eh_frame") == 0)
41593 {
41594 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
41595 flags & SECTION_WRITE ? "aw" : "a");
41596 return;
41597 }
41598
41599 #ifndef USE_GAS
41600 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
41601 {
41602 solaris_elf_asm_comdat_section (name, flags, decl);
41603 return;
41604 }
41605 #endif
41606
41607 default_elf_asm_named_section (name, flags, decl);
41608 }
41609 #endif /* TARGET_SOLARIS */
41610
41611 /* Return the mangling of TYPE if it is an extended fundamental type. */
41612
41613 static const char *
41614 ix86_mangle_type (const_tree type)
41615 {
41616 type = TYPE_MAIN_VARIANT (type);
41617
41618 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
41619 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
41620 return NULL;
41621
41622 switch (TYPE_MODE (type))
41623 {
41624 case TFmode:
41625 /* __float128 is "g". */
41626 return "g";
41627 case XFmode:
41628 /* "long double" or __float80 is "e". */
41629 return "e";
41630 default:
41631 return NULL;
41632 }
41633 }
41634
41635 /* For 32-bit code we can save PIC register setup by using
41636 __stack_chk_fail_local hidden function instead of calling
41637 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
41638 register, so it is better to call __stack_chk_fail directly. */
41639
41640 static tree ATTRIBUTE_UNUSED
41641 ix86_stack_protect_fail (void)
41642 {
41643 return TARGET_64BIT
41644 ? default_external_stack_protect_fail ()
41645 : default_hidden_stack_protect_fail ();
41646 }
41647
41648 /* Select a format to encode pointers in exception handling data. CODE
41649 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
41650 true if the symbol may be affected by dynamic relocations.
41651
41652 ??? All x86 object file formats are capable of representing this.
41653 After all, the relocation needed is the same as for the call insn.
41654 Whether or not a particular assembler allows us to enter such, I
41655 guess we'll have to see. */
41656 int
41657 asm_preferred_eh_data_format (int code, int global)
41658 {
41659 if (flag_pic)
41660 {
41661 int type = DW_EH_PE_sdata8;
41662 if (!TARGET_64BIT
41663 || ix86_cmodel == CM_SMALL_PIC
41664 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
41665 type = DW_EH_PE_sdata4;
41666 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
41667 }
41668 if (ix86_cmodel == CM_SMALL
41669 || (ix86_cmodel == CM_MEDIUM && code))
41670 return DW_EH_PE_udata4;
41671 return DW_EH_PE_absptr;
41672 }
41673 \f
41674 /* Expand copysign from SIGN to the positive value ABS_VALUE
41675 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
41676 the sign-bit. */
41677 static void
41678 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
41679 {
41680 enum machine_mode mode = GET_MODE (sign);
41681 rtx sgn = gen_reg_rtx (mode);
41682 if (mask == NULL_RTX)
41683 {
41684 enum machine_mode vmode;
41685
41686 if (mode == SFmode)
41687 vmode = V4SFmode;
41688 else if (mode == DFmode)
41689 vmode = V2DFmode;
41690 else
41691 vmode = mode;
41692
41693 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
41694 if (!VECTOR_MODE_P (mode))
41695 {
41696 /* We need to generate a scalar mode mask in this case. */
41697 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
41698 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
41699 mask = gen_reg_rtx (mode);
41700 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
41701 }
41702 }
41703 else
41704 mask = gen_rtx_NOT (mode, mask);
41705 emit_insn (gen_rtx_SET (VOIDmode, sgn,
41706 gen_rtx_AND (mode, mask, sign)));
41707 emit_insn (gen_rtx_SET (VOIDmode, result,
41708 gen_rtx_IOR (mode, abs_value, sgn)));
41709 }
41710
41711 /* Expand fabs (OP0) and return a new rtx that holds the result. The
41712 mask for masking out the sign-bit is stored in *SMASK, if that is
41713 non-null. */
41714 static rtx
41715 ix86_expand_sse_fabs (rtx op0, rtx *smask)
41716 {
41717 enum machine_mode vmode, mode = GET_MODE (op0);
41718 rtx xa, mask;
41719
41720 xa = gen_reg_rtx (mode);
41721 if (mode == SFmode)
41722 vmode = V4SFmode;
41723 else if (mode == DFmode)
41724 vmode = V2DFmode;
41725 else
41726 vmode = mode;
41727 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
41728 if (!VECTOR_MODE_P (mode))
41729 {
41730 /* We need to generate a scalar mode mask in this case. */
41731 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
41732 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
41733 mask = gen_reg_rtx (mode);
41734 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
41735 }
41736 emit_insn (gen_rtx_SET (VOIDmode, xa,
41737 gen_rtx_AND (mode, op0, mask)));
41738
41739 if (smask)
41740 *smask = mask;
41741
41742 return xa;
41743 }
41744
41745 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
41746 swapping the operands if SWAP_OPERANDS is true. The expanded
41747 code is a forward jump to a newly created label in case the
41748 comparison is true. The generated label rtx is returned. */
41749 static rtx
41750 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
41751 bool swap_operands)
41752 {
41753 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
41754 rtx label, tmp;
41755
41756 if (swap_operands)
41757 {
41758 tmp = op0;
41759 op0 = op1;
41760 op1 = tmp;
41761 }
41762
41763 label = gen_label_rtx ();
41764 tmp = gen_rtx_REG (fpcmp_mode, FLAGS_REG);
41765 emit_insn (gen_rtx_SET (VOIDmode, tmp,
41766 gen_rtx_COMPARE (fpcmp_mode, op0, op1)));
41767 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
41768 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
41769 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
41770 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
41771 JUMP_LABEL (tmp) = label;
41772
41773 return label;
41774 }
41775
41776 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
41777 using comparison code CODE. Operands are swapped for the comparison if
41778 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
41779 static rtx
41780 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
41781 bool swap_operands)
41782 {
41783 rtx (*insn)(rtx, rtx, rtx, rtx);
41784 enum machine_mode mode = GET_MODE (op0);
41785 rtx mask = gen_reg_rtx (mode);
41786
41787 if (swap_operands)
41788 {
41789 rtx tmp = op0;
41790 op0 = op1;
41791 op1 = tmp;
41792 }
41793
41794 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
41795
41796 emit_insn (insn (mask, op0, op1,
41797 gen_rtx_fmt_ee (code, mode, op0, op1)));
41798 return mask;
41799 }
41800
41801 /* Generate and return a rtx of mode MODE for 2**n where n is the number
41802 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
41803 static rtx
41804 ix86_gen_TWO52 (enum machine_mode mode)
41805 {
41806 REAL_VALUE_TYPE TWO52r;
41807 rtx TWO52;
41808
41809 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
41810 TWO52 = const_double_from_real_value (TWO52r, mode);
41811 TWO52 = force_reg (mode, TWO52);
41812
41813 return TWO52;
41814 }
41815
41816 /* Expand SSE sequence for computing lround from OP1 storing
41817 into OP0. */
41818 void
41819 ix86_expand_lround (rtx op0, rtx op1)
41820 {
41821 /* C code for the stuff we're doing below:
41822 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
41823 return (long)tmp;
41824 */
41825 enum machine_mode mode = GET_MODE (op1);
41826 const struct real_format *fmt;
41827 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
41828 rtx adj;
41829
41830 /* load nextafter (0.5, 0.0) */
41831 fmt = REAL_MODE_FORMAT (mode);
41832 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
41833 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
41834
41835 /* adj = copysign (0.5, op1) */
41836 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
41837 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
41838
41839 /* adj = op1 + adj */
41840 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
41841
41842 /* op0 = (imode)adj */
41843 expand_fix (op0, adj, 0);
41844 }
41845
41846 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
41847 into OPERAND0. */
41848 void
41849 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
41850 {
41851 /* C code for the stuff we're doing below (for do_floor):
41852 xi = (long)op1;
41853 xi -= (double)xi > op1 ? 1 : 0;
41854 return xi;
41855 */
41856 enum machine_mode fmode = GET_MODE (op1);
41857 enum machine_mode imode = GET_MODE (op0);
41858 rtx ireg, freg, label, tmp;
41859
41860 /* reg = (long)op1 */
41861 ireg = gen_reg_rtx (imode);
41862 expand_fix (ireg, op1, 0);
41863
41864 /* freg = (double)reg */
41865 freg = gen_reg_rtx (fmode);
41866 expand_float (freg, ireg, 0);
41867
41868 /* ireg = (freg > op1) ? ireg - 1 : ireg */
41869 label = ix86_expand_sse_compare_and_jump (UNLE,
41870 freg, op1, !do_floor);
41871 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
41872 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
41873 emit_move_insn (ireg, tmp);
41874
41875 emit_label (label);
41876 LABEL_NUSES (label) = 1;
41877
41878 emit_move_insn (op0, ireg);
41879 }
41880
41881 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
41882 result in OPERAND0. */
41883 void
41884 ix86_expand_rint (rtx operand0, rtx operand1)
41885 {
41886 /* C code for the stuff we're doing below:
41887 xa = fabs (operand1);
41888 if (!isless (xa, 2**52))
41889 return operand1;
41890 xa = xa + 2**52 - 2**52;
41891 return copysign (xa, operand1);
41892 */
41893 enum machine_mode mode = GET_MODE (operand0);
41894 rtx res, xa, label, TWO52, mask;
41895
41896 res = gen_reg_rtx (mode);
41897 emit_move_insn (res, operand1);
41898
41899 /* xa = abs (operand1) */
41900 xa = ix86_expand_sse_fabs (res, &mask);
41901
41902 /* if (!isless (xa, TWO52)) goto label; */
41903 TWO52 = ix86_gen_TWO52 (mode);
41904 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
41905
41906 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
41907 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
41908
41909 ix86_sse_copysign_to_positive (res, xa, res, mask);
41910
41911 emit_label (label);
41912 LABEL_NUSES (label) = 1;
41913
41914 emit_move_insn (operand0, res);
41915 }
41916
41917 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
41918 into OPERAND0. */
41919 void
41920 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
41921 {
41922 /* C code for the stuff we expand below.
41923 double xa = fabs (x), x2;
41924 if (!isless (xa, TWO52))
41925 return x;
41926 xa = xa + TWO52 - TWO52;
41927 x2 = copysign (xa, x);
41928 Compensate. Floor:
41929 if (x2 > x)
41930 x2 -= 1;
41931 Compensate. Ceil:
41932 if (x2 < x)
41933 x2 -= -1;
41934 return x2;
41935 */
41936 enum machine_mode mode = GET_MODE (operand0);
41937 rtx xa, TWO52, tmp, label, one, res, mask;
41938
41939 TWO52 = ix86_gen_TWO52 (mode);
41940
41941 /* Temporary for holding the result, initialized to the input
41942 operand to ease control flow. */
41943 res = gen_reg_rtx (mode);
41944 emit_move_insn (res, operand1);
41945
41946 /* xa = abs (operand1) */
41947 xa = ix86_expand_sse_fabs (res, &mask);
41948
41949 /* if (!isless (xa, TWO52)) goto label; */
41950 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
41951
41952 /* xa = xa + TWO52 - TWO52; */
41953 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
41954 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
41955
41956 /* xa = copysign (xa, operand1) */
41957 ix86_sse_copysign_to_positive (xa, xa, res, mask);
41958
41959 /* generate 1.0 or -1.0 */
41960 one = force_reg (mode,
41961 const_double_from_real_value (do_floor
41962 ? dconst1 : dconstm1, mode));
41963
41964 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
41965 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
41966 emit_insn (gen_rtx_SET (VOIDmode, tmp,
41967 gen_rtx_AND (mode, one, tmp)));
41968 /* We always need to subtract here to preserve signed zero. */
41969 tmp = expand_simple_binop (mode, MINUS,
41970 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
41971 emit_move_insn (res, tmp);
41972
41973 emit_label (label);
41974 LABEL_NUSES (label) = 1;
41975
41976 emit_move_insn (operand0, res);
41977 }
41978
41979 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
41980 into OPERAND0. */
41981 void
41982 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
41983 {
41984 /* C code for the stuff we expand below.
41985 double xa = fabs (x), x2;
41986 if (!isless (xa, TWO52))
41987 return x;
41988 x2 = (double)(long)x;
41989 Compensate. Floor:
41990 if (x2 > x)
41991 x2 -= 1;
41992 Compensate. Ceil:
41993 if (x2 < x)
41994 x2 += 1;
41995 if (HONOR_SIGNED_ZEROS (mode))
41996 return copysign (x2, x);
41997 return x2;
41998 */
41999 enum machine_mode mode = GET_MODE (operand0);
42000 rtx xa, xi, TWO52, tmp, label, one, res, mask;
42001
42002 TWO52 = ix86_gen_TWO52 (mode);
42003
42004 /* Temporary for holding the result, initialized to the input
42005 operand to ease control flow. */
42006 res = gen_reg_rtx (mode);
42007 emit_move_insn (res, operand1);
42008
42009 /* xa = abs (operand1) */
42010 xa = ix86_expand_sse_fabs (res, &mask);
42011
42012 /* if (!isless (xa, TWO52)) goto label; */
42013 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42014
42015 /* xa = (double)(long)x */
42016 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
42017 expand_fix (xi, res, 0);
42018 expand_float (xa, xi, 0);
42019
42020 /* generate 1.0 */
42021 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
42022
42023 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
42024 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
42025 emit_insn (gen_rtx_SET (VOIDmode, tmp,
42026 gen_rtx_AND (mode, one, tmp)));
42027 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
42028 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
42029 emit_move_insn (res, tmp);
42030
42031 if (HONOR_SIGNED_ZEROS (mode))
42032 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
42033
42034 emit_label (label);
42035 LABEL_NUSES (label) = 1;
42036
42037 emit_move_insn (operand0, res);
42038 }
42039
42040 /* Expand SSE sequence for computing round from OPERAND1 storing
42041 into OPERAND0. Sequence that works without relying on DImode truncation
42042 via cvttsd2siq that is only available on 64bit targets. */
42043 void
42044 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
42045 {
42046 /* C code for the stuff we expand below.
42047 double xa = fabs (x), xa2, x2;
42048 if (!isless (xa, TWO52))
42049 return x;
42050 Using the absolute value and copying back sign makes
42051 -0.0 -> -0.0 correct.
42052 xa2 = xa + TWO52 - TWO52;
42053 Compensate.
42054 dxa = xa2 - xa;
42055 if (dxa <= -0.5)
42056 xa2 += 1;
42057 else if (dxa > 0.5)
42058 xa2 -= 1;
42059 x2 = copysign (xa2, x);
42060 return x2;
42061 */
42062 enum machine_mode mode = GET_MODE (operand0);
42063 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
42064
42065 TWO52 = ix86_gen_TWO52 (mode);
42066
42067 /* Temporary for holding the result, initialized to the input
42068 operand to ease control flow. */
42069 res = gen_reg_rtx (mode);
42070 emit_move_insn (res, operand1);
42071
42072 /* xa = abs (operand1) */
42073 xa = ix86_expand_sse_fabs (res, &mask);
42074
42075 /* if (!isless (xa, TWO52)) goto label; */
42076 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42077
42078 /* xa2 = xa + TWO52 - TWO52; */
42079 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
42080 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
42081
42082 /* dxa = xa2 - xa; */
42083 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
42084
42085 /* generate 0.5, 1.0 and -0.5 */
42086 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
42087 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
42088 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
42089 0, OPTAB_DIRECT);
42090
42091 /* Compensate. */
42092 tmp = gen_reg_rtx (mode);
42093 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
42094 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
42095 emit_insn (gen_rtx_SET (VOIDmode, tmp,
42096 gen_rtx_AND (mode, one, tmp)));
42097 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
42098 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
42099 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
42100 emit_insn (gen_rtx_SET (VOIDmode, tmp,
42101 gen_rtx_AND (mode, one, tmp)));
42102 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
42103
42104 /* res = copysign (xa2, operand1) */
42105 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
42106
42107 emit_label (label);
42108 LABEL_NUSES (label) = 1;
42109
42110 emit_move_insn (operand0, res);
42111 }
42112
42113 /* Expand SSE sequence for computing trunc from OPERAND1 storing
42114 into OPERAND0. */
42115 void
42116 ix86_expand_trunc (rtx operand0, rtx operand1)
42117 {
42118 /* C code for SSE variant we expand below.
42119 double xa = fabs (x), x2;
42120 if (!isless (xa, TWO52))
42121 return x;
42122 x2 = (double)(long)x;
42123 if (HONOR_SIGNED_ZEROS (mode))
42124 return copysign (x2, x);
42125 return x2;
42126 */
42127 enum machine_mode mode = GET_MODE (operand0);
42128 rtx xa, xi, TWO52, label, res, mask;
42129
42130 TWO52 = ix86_gen_TWO52 (mode);
42131
42132 /* Temporary for holding the result, initialized to the input
42133 operand to ease control flow. */
42134 res = gen_reg_rtx (mode);
42135 emit_move_insn (res, operand1);
42136
42137 /* xa = abs (operand1) */
42138 xa = ix86_expand_sse_fabs (res, &mask);
42139
42140 /* if (!isless (xa, TWO52)) goto label; */
42141 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42142
42143 /* x = (double)(long)x */
42144 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
42145 expand_fix (xi, res, 0);
42146 expand_float (res, xi, 0);
42147
42148 if (HONOR_SIGNED_ZEROS (mode))
42149 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
42150
42151 emit_label (label);
42152 LABEL_NUSES (label) = 1;
42153
42154 emit_move_insn (operand0, res);
42155 }
42156
42157 /* Expand SSE sequence for computing trunc from OPERAND1 storing
42158 into OPERAND0. */
42159 void
42160 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
42161 {
42162 enum machine_mode mode = GET_MODE (operand0);
42163 rtx xa, mask, TWO52, label, one, res, smask, tmp;
42164
42165 /* C code for SSE variant we expand below.
42166 double xa = fabs (x), x2;
42167 if (!isless (xa, TWO52))
42168 return x;
42169 xa2 = xa + TWO52 - TWO52;
42170 Compensate:
42171 if (xa2 > xa)
42172 xa2 -= 1.0;
42173 x2 = copysign (xa2, x);
42174 return x2;
42175 */
42176
42177 TWO52 = ix86_gen_TWO52 (mode);
42178
42179 /* Temporary for holding the result, initialized to the input
42180 operand to ease control flow. */
42181 res = gen_reg_rtx (mode);
42182 emit_move_insn (res, operand1);
42183
42184 /* xa = abs (operand1) */
42185 xa = ix86_expand_sse_fabs (res, &smask);
42186
42187 /* if (!isless (xa, TWO52)) goto label; */
42188 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42189
42190 /* res = xa + TWO52 - TWO52; */
42191 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
42192 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
42193 emit_move_insn (res, tmp);
42194
42195 /* generate 1.0 */
42196 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
42197
42198 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
42199 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
42200 emit_insn (gen_rtx_SET (VOIDmode, mask,
42201 gen_rtx_AND (mode, mask, one)));
42202 tmp = expand_simple_binop (mode, MINUS,
42203 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
42204 emit_move_insn (res, tmp);
42205
42206 /* res = copysign (res, operand1) */
42207 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
42208
42209 emit_label (label);
42210 LABEL_NUSES (label) = 1;
42211
42212 emit_move_insn (operand0, res);
42213 }
42214
42215 /* Expand SSE sequence for computing round from OPERAND1 storing
42216 into OPERAND0. */
42217 void
42218 ix86_expand_round (rtx operand0, rtx operand1)
42219 {
42220 /* C code for the stuff we're doing below:
42221 double xa = fabs (x);
42222 if (!isless (xa, TWO52))
42223 return x;
42224 xa = (double)(long)(xa + nextafter (0.5, 0.0));
42225 return copysign (xa, x);
42226 */
42227 enum machine_mode mode = GET_MODE (operand0);
42228 rtx res, TWO52, xa, label, xi, half, mask;
42229 const struct real_format *fmt;
42230 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
42231
42232 /* Temporary for holding the result, initialized to the input
42233 operand to ease control flow. */
42234 res = gen_reg_rtx (mode);
42235 emit_move_insn (res, operand1);
42236
42237 TWO52 = ix86_gen_TWO52 (mode);
42238 xa = ix86_expand_sse_fabs (res, &mask);
42239 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42240
42241 /* load nextafter (0.5, 0.0) */
42242 fmt = REAL_MODE_FORMAT (mode);
42243 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
42244 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
42245
42246 /* xa = xa + 0.5 */
42247 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
42248 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
42249
42250 /* xa = (double)(int64_t)xa */
42251 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
42252 expand_fix (xi, xa, 0);
42253 expand_float (xa, xi, 0);
42254
42255 /* res = copysign (xa, operand1) */
42256 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
42257
42258 emit_label (label);
42259 LABEL_NUSES (label) = 1;
42260
42261 emit_move_insn (operand0, res);
42262 }
42263
42264 /* Expand SSE sequence for computing round
42265 from OP1 storing into OP0 using sse4 round insn. */
42266 void
42267 ix86_expand_round_sse4 (rtx op0, rtx op1)
42268 {
42269 enum machine_mode mode = GET_MODE (op0);
42270 rtx e1, e2, res, half;
42271 const struct real_format *fmt;
42272 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
42273 rtx (*gen_copysign) (rtx, rtx, rtx);
42274 rtx (*gen_round) (rtx, rtx, rtx);
42275
42276 switch (mode)
42277 {
42278 case SFmode:
42279 gen_copysign = gen_copysignsf3;
42280 gen_round = gen_sse4_1_roundsf2;
42281 break;
42282 case DFmode:
42283 gen_copysign = gen_copysigndf3;
42284 gen_round = gen_sse4_1_rounddf2;
42285 break;
42286 default:
42287 gcc_unreachable ();
42288 }
42289
42290 /* round (a) = trunc (a + copysign (0.5, a)) */
42291
42292 /* load nextafter (0.5, 0.0) */
42293 fmt = REAL_MODE_FORMAT (mode);
42294 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
42295 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
42296 half = const_double_from_real_value (pred_half, mode);
42297
42298 /* e1 = copysign (0.5, op1) */
42299 e1 = gen_reg_rtx (mode);
42300 emit_insn (gen_copysign (e1, half, op1));
42301
42302 /* e2 = op1 + e1 */
42303 e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
42304
42305 /* res = trunc (e2) */
42306 res = gen_reg_rtx (mode);
42307 emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
42308
42309 emit_move_insn (op0, res);
42310 }
42311 \f
42312
42313 /* Table of valid machine attributes. */
42314 static const struct attribute_spec ix86_attribute_table[] =
42315 {
42316 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
42317 affects_type_identity } */
42318 /* Stdcall attribute says callee is responsible for popping arguments
42319 if they are not variable. */
42320 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42321 true },
42322 /* Fastcall attribute says callee is responsible for popping arguments
42323 if they are not variable. */
42324 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42325 true },
42326 /* Thiscall attribute says callee is responsible for popping arguments
42327 if they are not variable. */
42328 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42329 true },
42330 /* Cdecl attribute says the callee is a normal C declaration */
42331 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42332 true },
42333 /* Regparm attribute specifies how many integer arguments are to be
42334 passed in registers. */
42335 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
42336 true },
42337 /* Sseregparm attribute says we are using x86_64 calling conventions
42338 for FP arguments. */
42339 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42340 true },
42341 /* The transactional memory builtins are implicitly regparm or fastcall
42342 depending on the ABI. Override the generic do-nothing attribute that
42343 these builtins were declared with. */
42344 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute,
42345 true },
42346 /* force_align_arg_pointer says this function realigns the stack at entry. */
42347 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
42348 false, true, true, ix86_handle_cconv_attribute, false },
42349 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
42350 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
42351 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
42352 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
42353 false },
42354 #endif
42355 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
42356 false },
42357 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
42358 false },
42359 #ifdef SUBTARGET_ATTRIBUTE_TABLE
42360 SUBTARGET_ATTRIBUTE_TABLE,
42361 #endif
42362 /* ms_abi and sysv_abi calling convention function attributes. */
42363 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
42364 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
42365 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
42366 false },
42367 { "callee_pop_aggregate_return", 1, 1, false, true, true,
42368 ix86_handle_callee_pop_aggregate_return, true },
42369 /* End element. */
42370 { NULL, 0, 0, false, false, false, NULL, false }
42371 };
42372
42373 /* Implement targetm.vectorize.builtin_vectorization_cost. */
42374 static int
42375 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
42376 tree vectype, int)
42377 {
42378 unsigned elements;
42379
42380 switch (type_of_cost)
42381 {
42382 case scalar_stmt:
42383 return ix86_cost->scalar_stmt_cost;
42384
42385 case scalar_load:
42386 return ix86_cost->scalar_load_cost;
42387
42388 case scalar_store:
42389 return ix86_cost->scalar_store_cost;
42390
42391 case vector_stmt:
42392 return ix86_cost->vec_stmt_cost;
42393
42394 case vector_load:
42395 return ix86_cost->vec_align_load_cost;
42396
42397 case vector_store:
42398 return ix86_cost->vec_store_cost;
42399
42400 case vec_to_scalar:
42401 return ix86_cost->vec_to_scalar_cost;
42402
42403 case scalar_to_vec:
42404 return ix86_cost->scalar_to_vec_cost;
42405
42406 case unaligned_load:
42407 case unaligned_store:
42408 return ix86_cost->vec_unalign_load_cost;
42409
42410 case cond_branch_taken:
42411 return ix86_cost->cond_taken_branch_cost;
42412
42413 case cond_branch_not_taken:
42414 return ix86_cost->cond_not_taken_branch_cost;
42415
42416 case vec_perm:
42417 case vec_promote_demote:
42418 return ix86_cost->vec_stmt_cost;
42419
42420 case vec_construct:
42421 elements = TYPE_VECTOR_SUBPARTS (vectype);
42422 return elements / 2 + 1;
42423
42424 default:
42425 gcc_unreachable ();
42426 }
42427 }
42428
42429 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
42430 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
42431 insn every time. */
42432
42433 static GTY(()) rtx vselect_insn;
42434
42435 /* Initialize vselect_insn. */
42436
42437 static void
42438 init_vselect_insn (void)
42439 {
42440 unsigned i;
42441 rtx x;
42442
42443 x = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (MAX_VECT_LEN));
42444 for (i = 0; i < MAX_VECT_LEN; ++i)
42445 XVECEXP (x, 0, i) = const0_rtx;
42446 x = gen_rtx_VEC_SELECT (V2DFmode, gen_rtx_VEC_CONCAT (V4DFmode, const0_rtx,
42447 const0_rtx), x);
42448 x = gen_rtx_SET (VOIDmode, const0_rtx, x);
42449 start_sequence ();
42450 vselect_insn = emit_insn (x);
42451 end_sequence ();
42452 }
42453
42454 /* Construct (set target (vec_select op0 (parallel perm))) and
42455 return true if that's a valid instruction in the active ISA. */
42456
42457 static bool
42458 expand_vselect (rtx target, rtx op0, const unsigned char *perm,
42459 unsigned nelt, bool testing_p)
42460 {
42461 unsigned int i;
42462 rtx x, save_vconcat;
42463 int icode;
42464
42465 if (vselect_insn == NULL_RTX)
42466 init_vselect_insn ();
42467
42468 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 1);
42469 PUT_NUM_ELEM (XVEC (x, 0), nelt);
42470 for (i = 0; i < nelt; ++i)
42471 XVECEXP (x, 0, i) = GEN_INT (perm[i]);
42472 save_vconcat = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
42473 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = op0;
42474 PUT_MODE (SET_SRC (PATTERN (vselect_insn)), GET_MODE (target));
42475 SET_DEST (PATTERN (vselect_insn)) = target;
42476 icode = recog_memoized (vselect_insn);
42477
42478 if (icode >= 0 && !testing_p)
42479 emit_insn (copy_rtx (PATTERN (vselect_insn)));
42480
42481 SET_DEST (PATTERN (vselect_insn)) = const0_rtx;
42482 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = save_vconcat;
42483 INSN_CODE (vselect_insn) = -1;
42484
42485 return icode >= 0;
42486 }
42487
42488 /* Similar, but generate a vec_concat from op0 and op1 as well. */
42489
42490 static bool
42491 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
42492 const unsigned char *perm, unsigned nelt,
42493 bool testing_p)
42494 {
42495 enum machine_mode v2mode;
42496 rtx x;
42497 bool ok;
42498
42499 if (vselect_insn == NULL_RTX)
42500 init_vselect_insn ();
42501
42502 v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
42503 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
42504 PUT_MODE (x, v2mode);
42505 XEXP (x, 0) = op0;
42506 XEXP (x, 1) = op1;
42507 ok = expand_vselect (target, x, perm, nelt, testing_p);
42508 XEXP (x, 0) = const0_rtx;
42509 XEXP (x, 1) = const0_rtx;
42510 return ok;
42511 }
42512
42513 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
42514 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
42515
42516 static bool
42517 expand_vec_perm_blend (struct expand_vec_perm_d *d)
42518 {
42519 enum machine_mode vmode = d->vmode;
42520 unsigned i, mask, nelt = d->nelt;
42521 rtx target, op0, op1, x;
42522 rtx rperm[32], vperm;
42523
42524 if (d->one_operand_p)
42525 return false;
42526 if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
42527 ;
42528 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
42529 ;
42530 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
42531 ;
42532 else
42533 return false;
42534
42535 /* This is a blend, not a permute. Elements must stay in their
42536 respective lanes. */
42537 for (i = 0; i < nelt; ++i)
42538 {
42539 unsigned e = d->perm[i];
42540 if (!(e == i || e == i + nelt))
42541 return false;
42542 }
42543
42544 if (d->testing_p)
42545 return true;
42546
42547 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
42548 decision should be extracted elsewhere, so that we only try that
42549 sequence once all budget==3 options have been tried. */
42550 target = d->target;
42551 op0 = d->op0;
42552 op1 = d->op1;
42553 mask = 0;
42554
42555 switch (vmode)
42556 {
42557 case V4DFmode:
42558 case V8SFmode:
42559 case V2DFmode:
42560 case V4SFmode:
42561 case V8HImode:
42562 case V8SImode:
42563 for (i = 0; i < nelt; ++i)
42564 mask |= (d->perm[i] >= nelt) << i;
42565 break;
42566
42567 case V2DImode:
42568 for (i = 0; i < 2; ++i)
42569 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
42570 vmode = V8HImode;
42571 goto do_subreg;
42572
42573 case V4SImode:
42574 for (i = 0; i < 4; ++i)
42575 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
42576 vmode = V8HImode;
42577 goto do_subreg;
42578
42579 case V16QImode:
42580 /* See if bytes move in pairs so we can use pblendw with
42581 an immediate argument, rather than pblendvb with a vector
42582 argument. */
42583 for (i = 0; i < 16; i += 2)
42584 if (d->perm[i] + 1 != d->perm[i + 1])
42585 {
42586 use_pblendvb:
42587 for (i = 0; i < nelt; ++i)
42588 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
42589
42590 finish_pblendvb:
42591 vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
42592 vperm = force_reg (vmode, vperm);
42593
42594 if (GET_MODE_SIZE (vmode) == 16)
42595 emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
42596 else
42597 emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm));
42598 if (target != d->target)
42599 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42600 return true;
42601 }
42602
42603 for (i = 0; i < 8; ++i)
42604 mask |= (d->perm[i * 2] >= 16) << i;
42605 vmode = V8HImode;
42606 /* FALLTHRU */
42607
42608 do_subreg:
42609 target = gen_reg_rtx (vmode);
42610 op0 = gen_lowpart (vmode, op0);
42611 op1 = gen_lowpart (vmode, op1);
42612 break;
42613
42614 case V32QImode:
42615 /* See if bytes move in pairs. If not, vpblendvb must be used. */
42616 for (i = 0; i < 32; i += 2)
42617 if (d->perm[i] + 1 != d->perm[i + 1])
42618 goto use_pblendvb;
42619 /* See if bytes move in quadruplets. If yes, vpblendd
42620 with immediate can be used. */
42621 for (i = 0; i < 32; i += 4)
42622 if (d->perm[i] + 2 != d->perm[i + 2])
42623 break;
42624 if (i < 32)
42625 {
42626 /* See if bytes move the same in both lanes. If yes,
42627 vpblendw with immediate can be used. */
42628 for (i = 0; i < 16; i += 2)
42629 if (d->perm[i] + 16 != d->perm[i + 16])
42630 goto use_pblendvb;
42631
42632 /* Use vpblendw. */
42633 for (i = 0; i < 16; ++i)
42634 mask |= (d->perm[i * 2] >= 32) << i;
42635 vmode = V16HImode;
42636 goto do_subreg;
42637 }
42638
42639 /* Use vpblendd. */
42640 for (i = 0; i < 8; ++i)
42641 mask |= (d->perm[i * 4] >= 32) << i;
42642 vmode = V8SImode;
42643 goto do_subreg;
42644
42645 case V16HImode:
42646 /* See if words move in pairs. If yes, vpblendd can be used. */
42647 for (i = 0; i < 16; i += 2)
42648 if (d->perm[i] + 1 != d->perm[i + 1])
42649 break;
42650 if (i < 16)
42651 {
42652 /* See if words move the same in both lanes. If not,
42653 vpblendvb must be used. */
42654 for (i = 0; i < 8; i++)
42655 if (d->perm[i] + 8 != d->perm[i + 8])
42656 {
42657 /* Use vpblendvb. */
42658 for (i = 0; i < 32; ++i)
42659 rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx);
42660
42661 vmode = V32QImode;
42662 nelt = 32;
42663 target = gen_reg_rtx (vmode);
42664 op0 = gen_lowpart (vmode, op0);
42665 op1 = gen_lowpart (vmode, op1);
42666 goto finish_pblendvb;
42667 }
42668
42669 /* Use vpblendw. */
42670 for (i = 0; i < 16; ++i)
42671 mask |= (d->perm[i] >= 16) << i;
42672 break;
42673 }
42674
42675 /* Use vpblendd. */
42676 for (i = 0; i < 8; ++i)
42677 mask |= (d->perm[i * 2] >= 16) << i;
42678 vmode = V8SImode;
42679 goto do_subreg;
42680
42681 case V4DImode:
42682 /* Use vpblendd. */
42683 for (i = 0; i < 4; ++i)
42684 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
42685 vmode = V8SImode;
42686 goto do_subreg;
42687
42688 default:
42689 gcc_unreachable ();
42690 }
42691
42692 /* This matches five different patterns with the different modes. */
42693 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
42694 x = gen_rtx_SET (VOIDmode, target, x);
42695 emit_insn (x);
42696 if (target != d->target)
42697 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42698
42699 return true;
42700 }
42701
42702 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
42703 in terms of the variable form of vpermilps.
42704
42705 Note that we will have already failed the immediate input vpermilps,
42706 which requires that the high and low part shuffle be identical; the
42707 variable form doesn't require that. */
42708
42709 static bool
42710 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
42711 {
42712 rtx rperm[8], vperm;
42713 unsigned i;
42714
42715 if (!TARGET_AVX || d->vmode != V8SFmode || !d->one_operand_p)
42716 return false;
42717
42718 /* We can only permute within the 128-bit lane. */
42719 for (i = 0; i < 8; ++i)
42720 {
42721 unsigned e = d->perm[i];
42722 if (i < 4 ? e >= 4 : e < 4)
42723 return false;
42724 }
42725
42726 if (d->testing_p)
42727 return true;
42728
42729 for (i = 0; i < 8; ++i)
42730 {
42731 unsigned e = d->perm[i];
42732
42733 /* Within each 128-bit lane, the elements of op0 are numbered
42734 from 0 and the elements of op1 are numbered from 4. */
42735 if (e >= 8 + 4)
42736 e -= 8;
42737 else if (e >= 4)
42738 e -= 4;
42739
42740 rperm[i] = GEN_INT (e);
42741 }
42742
42743 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
42744 vperm = force_reg (V8SImode, vperm);
42745 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
42746
42747 return true;
42748 }
42749
42750 /* Return true if permutation D can be performed as VMODE permutation
42751 instead. */
42752
42753 static bool
42754 valid_perm_using_mode_p (enum machine_mode vmode, struct expand_vec_perm_d *d)
42755 {
42756 unsigned int i, j, chunk;
42757
42758 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT
42759 || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT
42760 || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode))
42761 return false;
42762
42763 if (GET_MODE_NUNITS (vmode) >= d->nelt)
42764 return true;
42765
42766 chunk = d->nelt / GET_MODE_NUNITS (vmode);
42767 for (i = 0; i < d->nelt; i += chunk)
42768 if (d->perm[i] & (chunk - 1))
42769 return false;
42770 else
42771 for (j = 1; j < chunk; ++j)
42772 if (d->perm[i] + j != d->perm[i + j])
42773 return false;
42774
42775 return true;
42776 }
42777
42778 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
42779 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
42780
42781 static bool
42782 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
42783 {
42784 unsigned i, nelt, eltsz, mask;
42785 unsigned char perm[32];
42786 enum machine_mode vmode = V16QImode;
42787 rtx rperm[32], vperm, target, op0, op1;
42788
42789 nelt = d->nelt;
42790
42791 if (!d->one_operand_p)
42792 {
42793 if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
42794 {
42795 if (TARGET_AVX2
42796 && valid_perm_using_mode_p (V2TImode, d))
42797 {
42798 if (d->testing_p)
42799 return true;
42800
42801 /* Use vperm2i128 insn. The pattern uses
42802 V4DImode instead of V2TImode. */
42803 target = d->target;
42804 if (d->vmode != V4DImode)
42805 target = gen_reg_rtx (V4DImode);
42806 op0 = gen_lowpart (V4DImode, d->op0);
42807 op1 = gen_lowpart (V4DImode, d->op1);
42808 rperm[0]
42809 = GEN_INT (((d->perm[0] & (nelt / 2)) ? 1 : 0)
42810 || ((d->perm[nelt / 2] & (nelt / 2)) ? 2 : 0));
42811 emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
42812 if (target != d->target)
42813 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42814 return true;
42815 }
42816 return false;
42817 }
42818 }
42819 else
42820 {
42821 if (GET_MODE_SIZE (d->vmode) == 16)
42822 {
42823 if (!TARGET_SSSE3)
42824 return false;
42825 }
42826 else if (GET_MODE_SIZE (d->vmode) == 32)
42827 {
42828 if (!TARGET_AVX2)
42829 return false;
42830
42831 /* V4DImode should be already handled through
42832 expand_vselect by vpermq instruction. */
42833 gcc_assert (d->vmode != V4DImode);
42834
42835 vmode = V32QImode;
42836 if (d->vmode == V8SImode
42837 || d->vmode == V16HImode
42838 || d->vmode == V32QImode)
42839 {
42840 /* First see if vpermq can be used for
42841 V8SImode/V16HImode/V32QImode. */
42842 if (valid_perm_using_mode_p (V4DImode, d))
42843 {
42844 for (i = 0; i < 4; i++)
42845 perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
42846 if (d->testing_p)
42847 return true;
42848 target = gen_reg_rtx (V4DImode);
42849 if (expand_vselect (target, gen_lowpart (V4DImode, d->op0),
42850 perm, 4, false))
42851 {
42852 emit_move_insn (d->target,
42853 gen_lowpart (d->vmode, target));
42854 return true;
42855 }
42856 return false;
42857 }
42858
42859 /* Next see if vpermd can be used. */
42860 if (valid_perm_using_mode_p (V8SImode, d))
42861 vmode = V8SImode;
42862 }
42863 /* Or if vpermps can be used. */
42864 else if (d->vmode == V8SFmode)
42865 vmode = V8SImode;
42866
42867 if (vmode == V32QImode)
42868 {
42869 /* vpshufb only works intra lanes, it is not
42870 possible to shuffle bytes in between the lanes. */
42871 for (i = 0; i < nelt; ++i)
42872 if ((d->perm[i] ^ i) & (nelt / 2))
42873 return false;
42874 }
42875 }
42876 else
42877 return false;
42878 }
42879
42880 if (d->testing_p)
42881 return true;
42882
42883 if (vmode == V8SImode)
42884 for (i = 0; i < 8; ++i)
42885 rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7);
42886 else
42887 {
42888 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
42889 if (!d->one_operand_p)
42890 mask = 2 * nelt - 1;
42891 else if (vmode == V16QImode)
42892 mask = nelt - 1;
42893 else
42894 mask = nelt / 2 - 1;
42895
42896 for (i = 0; i < nelt; ++i)
42897 {
42898 unsigned j, e = d->perm[i] & mask;
42899 for (j = 0; j < eltsz; ++j)
42900 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
42901 }
42902 }
42903
42904 vperm = gen_rtx_CONST_VECTOR (vmode,
42905 gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
42906 vperm = force_reg (vmode, vperm);
42907
42908 target = d->target;
42909 if (d->vmode != vmode)
42910 target = gen_reg_rtx (vmode);
42911 op0 = gen_lowpart (vmode, d->op0);
42912 if (d->one_operand_p)
42913 {
42914 if (vmode == V16QImode)
42915 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
42916 else if (vmode == V32QImode)
42917 emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
42918 else if (vmode == V8SFmode)
42919 emit_insn (gen_avx2_permvarv8sf (target, op0, vperm));
42920 else
42921 emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
42922 }
42923 else
42924 {
42925 op1 = gen_lowpart (vmode, d->op1);
42926 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
42927 }
42928 if (target != d->target)
42929 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42930
42931 return true;
42932 }
42933
42934 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
42935 in a single instruction. */
42936
42937 static bool
42938 expand_vec_perm_1 (struct expand_vec_perm_d *d)
42939 {
42940 unsigned i, nelt = d->nelt;
42941 unsigned char perm2[MAX_VECT_LEN];
42942
42943 /* Check plain VEC_SELECT first, because AVX has instructions that could
42944 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
42945 input where SEL+CONCAT may not. */
42946 if (d->one_operand_p)
42947 {
42948 int mask = nelt - 1;
42949 bool identity_perm = true;
42950 bool broadcast_perm = true;
42951
42952 for (i = 0; i < nelt; i++)
42953 {
42954 perm2[i] = d->perm[i] & mask;
42955 if (perm2[i] != i)
42956 identity_perm = false;
42957 if (perm2[i])
42958 broadcast_perm = false;
42959 }
42960
42961 if (identity_perm)
42962 {
42963 if (!d->testing_p)
42964 emit_move_insn (d->target, d->op0);
42965 return true;
42966 }
42967 else if (broadcast_perm && TARGET_AVX2)
42968 {
42969 /* Use vpbroadcast{b,w,d}. */
42970 rtx (*gen) (rtx, rtx) = NULL;
42971 switch (d->vmode)
42972 {
42973 case V32QImode:
42974 gen = gen_avx2_pbroadcastv32qi_1;
42975 break;
42976 case V16HImode:
42977 gen = gen_avx2_pbroadcastv16hi_1;
42978 break;
42979 case V8SImode:
42980 gen = gen_avx2_pbroadcastv8si_1;
42981 break;
42982 case V16QImode:
42983 gen = gen_avx2_pbroadcastv16qi;
42984 break;
42985 case V8HImode:
42986 gen = gen_avx2_pbroadcastv8hi;
42987 break;
42988 case V8SFmode:
42989 gen = gen_avx2_vec_dupv8sf_1;
42990 break;
42991 /* For other modes prefer other shuffles this function creates. */
42992 default: break;
42993 }
42994 if (gen != NULL)
42995 {
42996 if (!d->testing_p)
42997 emit_insn (gen (d->target, d->op0));
42998 return true;
42999 }
43000 }
43001
43002 if (expand_vselect (d->target, d->op0, perm2, nelt, d->testing_p))
43003 return true;
43004
43005 /* There are plenty of patterns in sse.md that are written for
43006 SEL+CONCAT and are not replicated for a single op. Perhaps
43007 that should be changed, to avoid the nastiness here. */
43008
43009 /* Recognize interleave style patterns, which means incrementing
43010 every other permutation operand. */
43011 for (i = 0; i < nelt; i += 2)
43012 {
43013 perm2[i] = d->perm[i] & mask;
43014 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
43015 }
43016 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
43017 d->testing_p))
43018 return true;
43019
43020 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
43021 if (nelt >= 4)
43022 {
43023 for (i = 0; i < nelt; i += 4)
43024 {
43025 perm2[i + 0] = d->perm[i + 0] & mask;
43026 perm2[i + 1] = d->perm[i + 1] & mask;
43027 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
43028 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
43029 }
43030
43031 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
43032 d->testing_p))
43033 return true;
43034 }
43035 }
43036
43037 /* Finally, try the fully general two operand permute. */
43038 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt,
43039 d->testing_p))
43040 return true;
43041
43042 /* Recognize interleave style patterns with reversed operands. */
43043 if (!d->one_operand_p)
43044 {
43045 for (i = 0; i < nelt; ++i)
43046 {
43047 unsigned e = d->perm[i];
43048 if (e >= nelt)
43049 e -= nelt;
43050 else
43051 e += nelt;
43052 perm2[i] = e;
43053 }
43054
43055 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt,
43056 d->testing_p))
43057 return true;
43058 }
43059
43060 /* Try the SSE4.1 blend variable merge instructions. */
43061 if (expand_vec_perm_blend (d))
43062 return true;
43063
43064 /* Try one of the AVX vpermil variable permutations. */
43065 if (expand_vec_perm_vpermil (d))
43066 return true;
43067
43068 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
43069 vpshufb, vpermd, vpermps or vpermq variable permutation. */
43070 if (expand_vec_perm_pshufb (d))
43071 return true;
43072
43073 /* Try the AVX512F vpermi2 instructions. */
43074 rtx vec[64];
43075 enum machine_mode mode = d->vmode;
43076 if (mode == V8DFmode)
43077 mode = V8DImode;
43078 else if (mode == V16SFmode)
43079 mode = V16SImode;
43080 for (i = 0; i < nelt; ++i)
43081 vec[i] = GEN_INT (d->perm[i]);
43082 rtx mask = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt, vec));
43083 if (ix86_expand_vec_perm_vpermi2 (d->target, d->op0, mask, d->op1))
43084 return true;
43085
43086 return false;
43087 }
43088
43089 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
43090 in terms of a pair of pshuflw + pshufhw instructions. */
43091
43092 static bool
43093 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
43094 {
43095 unsigned char perm2[MAX_VECT_LEN];
43096 unsigned i;
43097 bool ok;
43098
43099 if (d->vmode != V8HImode || !d->one_operand_p)
43100 return false;
43101
43102 /* The two permutations only operate in 64-bit lanes. */
43103 for (i = 0; i < 4; ++i)
43104 if (d->perm[i] >= 4)
43105 return false;
43106 for (i = 4; i < 8; ++i)
43107 if (d->perm[i] < 4)
43108 return false;
43109
43110 if (d->testing_p)
43111 return true;
43112
43113 /* Emit the pshuflw. */
43114 memcpy (perm2, d->perm, 4);
43115 for (i = 4; i < 8; ++i)
43116 perm2[i] = i;
43117 ok = expand_vselect (d->target, d->op0, perm2, 8, d->testing_p);
43118 gcc_assert (ok);
43119
43120 /* Emit the pshufhw. */
43121 memcpy (perm2 + 4, d->perm + 4, 4);
43122 for (i = 0; i < 4; ++i)
43123 perm2[i] = i;
43124 ok = expand_vselect (d->target, d->target, perm2, 8, d->testing_p);
43125 gcc_assert (ok);
43126
43127 return true;
43128 }
43129
43130 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43131 the permutation using the SSSE3 palignr instruction. This succeeds
43132 when all of the elements in PERM fit within one vector and we merely
43133 need to shift them down so that a single vector permutation has a
43134 chance to succeed. */
43135
43136 static bool
43137 expand_vec_perm_palignr (struct expand_vec_perm_d *d)
43138 {
43139 unsigned i, nelt = d->nelt;
43140 unsigned min, max;
43141 bool in_order, ok;
43142 rtx shift, target;
43143 struct expand_vec_perm_d dcopy;
43144
43145 /* Even with AVX, palignr only operates on 128-bit vectors. */
43146 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
43147 return false;
43148
43149 min = nelt, max = 0;
43150 for (i = 0; i < nelt; ++i)
43151 {
43152 unsigned e = d->perm[i];
43153 if (e < min)
43154 min = e;
43155 if (e > max)
43156 max = e;
43157 }
43158 if (min == 0 || max - min >= nelt)
43159 return false;
43160
43161 /* Given that we have SSSE3, we know we'll be able to implement the
43162 single operand permutation after the palignr with pshufb. */
43163 if (d->testing_p)
43164 return true;
43165
43166 dcopy = *d;
43167 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
43168 target = gen_reg_rtx (TImode);
43169 emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, d->op1),
43170 gen_lowpart (TImode, d->op0), shift));
43171
43172 dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target);
43173 dcopy.one_operand_p = true;
43174
43175 in_order = true;
43176 for (i = 0; i < nelt; ++i)
43177 {
43178 unsigned e = dcopy.perm[i] - min;
43179 if (e != i)
43180 in_order = false;
43181 dcopy.perm[i] = e;
43182 }
43183
43184 /* Test for the degenerate case where the alignment by itself
43185 produces the desired permutation. */
43186 if (in_order)
43187 {
43188 emit_move_insn (d->target, dcopy.op0);
43189 return true;
43190 }
43191
43192 ok = expand_vec_perm_1 (&dcopy);
43193 gcc_assert (ok);
43194
43195 return ok;
43196 }
43197
43198 /* A subroutine of ix86_expand_vec_perm_const_1. Try to simplify
43199 the permutation using the SSE4_1 pblendv instruction. Potentially
43200 reduces permutaion from 2 pshufb and or to 1 pshufb and pblendv. */
43201
43202 static bool
43203 expand_vec_perm_pblendv (struct expand_vec_perm_d *d)
43204 {
43205 unsigned i, which, nelt = d->nelt;
43206 struct expand_vec_perm_d dcopy, dcopy1;
43207 enum machine_mode vmode = d->vmode;
43208 bool ok;
43209
43210 /* Use the same checks as in expand_vec_perm_blend, but skipping
43211 AVX and AVX2 as they require more than 2 instructions. */
43212 if (d->one_operand_p)
43213 return false;
43214 if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
43215 ;
43216 else
43217 return false;
43218
43219 /* Figure out where permutation elements stay not in their
43220 respective lanes. */
43221 for (i = 0, which = 0; i < nelt; ++i)
43222 {
43223 unsigned e = d->perm[i];
43224 if (e != i)
43225 which |= (e < nelt ? 1 : 2);
43226 }
43227 /* We can pblend the part where elements stay not in their
43228 respective lanes only when these elements are all in one
43229 half of a permutation.
43230 {0 1 8 3 4 5 9 7} is ok as 8, 9 are at not at their respective
43231 lanes, but both 8 and 9 >= 8
43232 {0 1 8 3 4 5 2 7} is not ok as 2 and 8 are not at their
43233 respective lanes and 8 >= 8, but 2 not. */
43234 if (which != 1 && which != 2)
43235 return false;
43236 if (d->testing_p)
43237 return true;
43238
43239 /* First we apply one operand permutation to the part where
43240 elements stay not in their respective lanes. */
43241 dcopy = *d;
43242 if (which == 2)
43243 dcopy.op0 = dcopy.op1 = d->op1;
43244 else
43245 dcopy.op0 = dcopy.op1 = d->op0;
43246 dcopy.one_operand_p = true;
43247
43248 for (i = 0; i < nelt; ++i)
43249 dcopy.perm[i] = d->perm[i] & (nelt - 1);
43250
43251 ok = expand_vec_perm_1 (&dcopy);
43252 gcc_assert (ok);
43253
43254 /* Next we put permuted elements into their positions. */
43255 dcopy1 = *d;
43256 if (which == 2)
43257 dcopy1.op1 = dcopy.target;
43258 else
43259 dcopy1.op0 = dcopy.target;
43260
43261 for (i = 0; i < nelt; ++i)
43262 dcopy1.perm[i] = ((d->perm[i] >= nelt) ? (nelt + i) : i);
43263
43264 ok = expand_vec_perm_blend (&dcopy1);
43265 gcc_assert (ok);
43266
43267 return true;
43268 }
43269
43270 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
43271
43272 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43273 a two vector permutation into a single vector permutation by using
43274 an interleave operation to merge the vectors. */
43275
43276 static bool
43277 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
43278 {
43279 struct expand_vec_perm_d dremap, dfinal;
43280 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
43281 unsigned HOST_WIDE_INT contents;
43282 unsigned char remap[2 * MAX_VECT_LEN];
43283 rtx seq;
43284 bool ok, same_halves = false;
43285
43286 if (GET_MODE_SIZE (d->vmode) == 16)
43287 {
43288 if (d->one_operand_p)
43289 return false;
43290 }
43291 else if (GET_MODE_SIZE (d->vmode) == 32)
43292 {
43293 if (!TARGET_AVX)
43294 return false;
43295 /* For 32-byte modes allow even d->one_operand_p.
43296 The lack of cross-lane shuffling in some instructions
43297 might prevent a single insn shuffle. */
43298 dfinal = *d;
43299 dfinal.testing_p = true;
43300 /* If expand_vec_perm_interleave3 can expand this into
43301 a 3 insn sequence, give up and let it be expanded as
43302 3 insn sequence. While that is one insn longer,
43303 it doesn't need a memory operand and in the common
43304 case that both interleave low and high permutations
43305 with the same operands are adjacent needs 4 insns
43306 for both after CSE. */
43307 if (expand_vec_perm_interleave3 (&dfinal))
43308 return false;
43309 }
43310 else
43311 return false;
43312
43313 /* Examine from whence the elements come. */
43314 contents = 0;
43315 for (i = 0; i < nelt; ++i)
43316 contents |= ((unsigned HOST_WIDE_INT) 1) << d->perm[i];
43317
43318 memset (remap, 0xff, sizeof (remap));
43319 dremap = *d;
43320
43321 if (GET_MODE_SIZE (d->vmode) == 16)
43322 {
43323 unsigned HOST_WIDE_INT h1, h2, h3, h4;
43324
43325 /* Split the two input vectors into 4 halves. */
43326 h1 = (((unsigned HOST_WIDE_INT) 1) << nelt2) - 1;
43327 h2 = h1 << nelt2;
43328 h3 = h2 << nelt2;
43329 h4 = h3 << nelt2;
43330
43331 /* If the elements from the low halves use interleave low, and similarly
43332 for interleave high. If the elements are from mis-matched halves, we
43333 can use shufps for V4SF/V4SI or do a DImode shuffle. */
43334 if ((contents & (h1 | h3)) == contents)
43335 {
43336 /* punpckl* */
43337 for (i = 0; i < nelt2; ++i)
43338 {
43339 remap[i] = i * 2;
43340 remap[i + nelt] = i * 2 + 1;
43341 dremap.perm[i * 2] = i;
43342 dremap.perm[i * 2 + 1] = i + nelt;
43343 }
43344 if (!TARGET_SSE2 && d->vmode == V4SImode)
43345 dremap.vmode = V4SFmode;
43346 }
43347 else if ((contents & (h2 | h4)) == contents)
43348 {
43349 /* punpckh* */
43350 for (i = 0; i < nelt2; ++i)
43351 {
43352 remap[i + nelt2] = i * 2;
43353 remap[i + nelt + nelt2] = i * 2 + 1;
43354 dremap.perm[i * 2] = i + nelt2;
43355 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
43356 }
43357 if (!TARGET_SSE2 && d->vmode == V4SImode)
43358 dremap.vmode = V4SFmode;
43359 }
43360 else if ((contents & (h1 | h4)) == contents)
43361 {
43362 /* shufps */
43363 for (i = 0; i < nelt2; ++i)
43364 {
43365 remap[i] = i;
43366 remap[i + nelt + nelt2] = i + nelt2;
43367 dremap.perm[i] = i;
43368 dremap.perm[i + nelt2] = i + nelt + nelt2;
43369 }
43370 if (nelt != 4)
43371 {
43372 /* shufpd */
43373 dremap.vmode = V2DImode;
43374 dremap.nelt = 2;
43375 dremap.perm[0] = 0;
43376 dremap.perm[1] = 3;
43377 }
43378 }
43379 else if ((contents & (h2 | h3)) == contents)
43380 {
43381 /* shufps */
43382 for (i = 0; i < nelt2; ++i)
43383 {
43384 remap[i + nelt2] = i;
43385 remap[i + nelt] = i + nelt2;
43386 dremap.perm[i] = i + nelt2;
43387 dremap.perm[i + nelt2] = i + nelt;
43388 }
43389 if (nelt != 4)
43390 {
43391 /* shufpd */
43392 dremap.vmode = V2DImode;
43393 dremap.nelt = 2;
43394 dremap.perm[0] = 1;
43395 dremap.perm[1] = 2;
43396 }
43397 }
43398 else
43399 return false;
43400 }
43401 else
43402 {
43403 unsigned int nelt4 = nelt / 4, nzcnt = 0;
43404 unsigned HOST_WIDE_INT q[8];
43405 unsigned int nonzero_halves[4];
43406
43407 /* Split the two input vectors into 8 quarters. */
43408 q[0] = (((unsigned HOST_WIDE_INT) 1) << nelt4) - 1;
43409 for (i = 1; i < 8; ++i)
43410 q[i] = q[0] << (nelt4 * i);
43411 for (i = 0; i < 4; ++i)
43412 if (((q[2 * i] | q[2 * i + 1]) & contents) != 0)
43413 {
43414 nonzero_halves[nzcnt] = i;
43415 ++nzcnt;
43416 }
43417
43418 if (nzcnt == 1)
43419 {
43420 gcc_assert (d->one_operand_p);
43421 nonzero_halves[1] = nonzero_halves[0];
43422 same_halves = true;
43423 }
43424 else if (d->one_operand_p)
43425 {
43426 gcc_assert (nonzero_halves[0] == 0);
43427 gcc_assert (nonzero_halves[1] == 1);
43428 }
43429
43430 if (nzcnt <= 2)
43431 {
43432 if (d->perm[0] / nelt2 == nonzero_halves[1])
43433 {
43434 /* Attempt to increase the likelihood that dfinal
43435 shuffle will be intra-lane. */
43436 char tmph = nonzero_halves[0];
43437 nonzero_halves[0] = nonzero_halves[1];
43438 nonzero_halves[1] = tmph;
43439 }
43440
43441 /* vperm2f128 or vperm2i128. */
43442 for (i = 0; i < nelt2; ++i)
43443 {
43444 remap[i + nonzero_halves[1] * nelt2] = i + nelt2;
43445 remap[i + nonzero_halves[0] * nelt2] = i;
43446 dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2;
43447 dremap.perm[i] = i + nonzero_halves[0] * nelt2;
43448 }
43449
43450 if (d->vmode != V8SFmode
43451 && d->vmode != V4DFmode
43452 && d->vmode != V8SImode)
43453 {
43454 dremap.vmode = V8SImode;
43455 dremap.nelt = 8;
43456 for (i = 0; i < 4; ++i)
43457 {
43458 dremap.perm[i] = i + nonzero_halves[0] * 4;
43459 dremap.perm[i + 4] = i + nonzero_halves[1] * 4;
43460 }
43461 }
43462 }
43463 else if (d->one_operand_p)
43464 return false;
43465 else if (TARGET_AVX2
43466 && (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
43467 {
43468 /* vpunpckl* */
43469 for (i = 0; i < nelt4; ++i)
43470 {
43471 remap[i] = i * 2;
43472 remap[i + nelt] = i * 2 + 1;
43473 remap[i + nelt2] = i * 2 + nelt2;
43474 remap[i + nelt + nelt2] = i * 2 + nelt2 + 1;
43475 dremap.perm[i * 2] = i;
43476 dremap.perm[i * 2 + 1] = i + nelt;
43477 dremap.perm[i * 2 + nelt2] = i + nelt2;
43478 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2;
43479 }
43480 }
43481 else if (TARGET_AVX2
43482 && (contents & (q[1] | q[3] | q[5] | q[7])) == contents)
43483 {
43484 /* vpunpckh* */
43485 for (i = 0; i < nelt4; ++i)
43486 {
43487 remap[i + nelt4] = i * 2;
43488 remap[i + nelt + nelt4] = i * 2 + 1;
43489 remap[i + nelt2 + nelt4] = i * 2 + nelt2;
43490 remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1;
43491 dremap.perm[i * 2] = i + nelt4;
43492 dremap.perm[i * 2 + 1] = i + nelt + nelt4;
43493 dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4;
43494 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4;
43495 }
43496 }
43497 else
43498 return false;
43499 }
43500
43501 /* Use the remapping array set up above to move the elements from their
43502 swizzled locations into their final destinations. */
43503 dfinal = *d;
43504 for (i = 0; i < nelt; ++i)
43505 {
43506 unsigned e = remap[d->perm[i]];
43507 gcc_assert (e < nelt);
43508 /* If same_halves is true, both halves of the remapped vector are the
43509 same. Avoid cross-lane accesses if possible. */
43510 if (same_halves && i >= nelt2)
43511 {
43512 gcc_assert (e < nelt2);
43513 dfinal.perm[i] = e + nelt2;
43514 }
43515 else
43516 dfinal.perm[i] = e;
43517 }
43518 if (!d->testing_p)
43519 {
43520 dremap.target = gen_reg_rtx (dremap.vmode);
43521 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
43522 }
43523 dfinal.op1 = dfinal.op0;
43524 dfinal.one_operand_p = true;
43525
43526 /* Test if the final remap can be done with a single insn. For V4SFmode or
43527 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
43528 start_sequence ();
43529 ok = expand_vec_perm_1 (&dfinal);
43530 seq = get_insns ();
43531 end_sequence ();
43532
43533 if (!ok)
43534 return false;
43535
43536 if (d->testing_p)
43537 return true;
43538
43539 if (dremap.vmode != dfinal.vmode)
43540 {
43541 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
43542 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
43543 }
43544
43545 ok = expand_vec_perm_1 (&dremap);
43546 gcc_assert (ok);
43547
43548 emit_insn (seq);
43549 return true;
43550 }
43551
43552 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43553 a single vector cross-lane permutation into vpermq followed
43554 by any of the single insn permutations. */
43555
43556 static bool
43557 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
43558 {
43559 struct expand_vec_perm_d dremap, dfinal;
43560 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4;
43561 unsigned contents[2];
43562 bool ok;
43563
43564 if (!(TARGET_AVX2
43565 && (d->vmode == V32QImode || d->vmode == V16HImode)
43566 && d->one_operand_p))
43567 return false;
43568
43569 contents[0] = 0;
43570 contents[1] = 0;
43571 for (i = 0; i < nelt2; ++i)
43572 {
43573 contents[0] |= 1u << (d->perm[i] / nelt4);
43574 contents[1] |= 1u << (d->perm[i + nelt2] / nelt4);
43575 }
43576
43577 for (i = 0; i < 2; ++i)
43578 {
43579 unsigned int cnt = 0;
43580 for (j = 0; j < 4; ++j)
43581 if ((contents[i] & (1u << j)) != 0 && ++cnt > 2)
43582 return false;
43583 }
43584
43585 if (d->testing_p)
43586 return true;
43587
43588 dremap = *d;
43589 dremap.vmode = V4DImode;
43590 dremap.nelt = 4;
43591 dremap.target = gen_reg_rtx (V4DImode);
43592 dremap.op0 = gen_lowpart (V4DImode, d->op0);
43593 dremap.op1 = dremap.op0;
43594 dremap.one_operand_p = true;
43595 for (i = 0; i < 2; ++i)
43596 {
43597 unsigned int cnt = 0;
43598 for (j = 0; j < 4; ++j)
43599 if ((contents[i] & (1u << j)) != 0)
43600 dremap.perm[2 * i + cnt++] = j;
43601 for (; cnt < 2; ++cnt)
43602 dremap.perm[2 * i + cnt] = 0;
43603 }
43604
43605 dfinal = *d;
43606 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
43607 dfinal.op1 = dfinal.op0;
43608 dfinal.one_operand_p = true;
43609 for (i = 0, j = 0; i < nelt; ++i)
43610 {
43611 if (i == nelt2)
43612 j = 2;
43613 dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0);
43614 if ((d->perm[i] / nelt4) == dremap.perm[j])
43615 ;
43616 else if ((d->perm[i] / nelt4) == dremap.perm[j + 1])
43617 dfinal.perm[i] |= nelt4;
43618 else
43619 gcc_unreachable ();
43620 }
43621
43622 ok = expand_vec_perm_1 (&dremap);
43623 gcc_assert (ok);
43624
43625 ok = expand_vec_perm_1 (&dfinal);
43626 gcc_assert (ok);
43627
43628 return true;
43629 }
43630
43631 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
43632 a vector permutation using two instructions, vperm2f128 resp.
43633 vperm2i128 followed by any single in-lane permutation. */
43634
43635 static bool
43636 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d)
43637 {
43638 struct expand_vec_perm_d dfirst, dsecond;
43639 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, perm;
43640 bool ok;
43641
43642 if (!TARGET_AVX
43643 || GET_MODE_SIZE (d->vmode) != 32
43644 || (d->vmode != V8SFmode && d->vmode != V4DFmode && !TARGET_AVX2))
43645 return false;
43646
43647 dsecond = *d;
43648 dsecond.one_operand_p = false;
43649 dsecond.testing_p = true;
43650
43651 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
43652 immediate. For perm < 16 the second permutation uses
43653 d->op0 as first operand, for perm >= 16 it uses d->op1
43654 as first operand. The second operand is the result of
43655 vperm2[fi]128. */
43656 for (perm = 0; perm < 32; perm++)
43657 {
43658 /* Ignore permutations which do not move anything cross-lane. */
43659 if (perm < 16)
43660 {
43661 /* The second shuffle for e.g. V4DFmode has
43662 0123 and ABCD operands.
43663 Ignore AB23, as 23 is already in the second lane
43664 of the first operand. */
43665 if ((perm & 0xc) == (1 << 2)) continue;
43666 /* And 01CD, as 01 is in the first lane of the first
43667 operand. */
43668 if ((perm & 3) == 0) continue;
43669 /* And 4567, as then the vperm2[fi]128 doesn't change
43670 anything on the original 4567 second operand. */
43671 if ((perm & 0xf) == ((3 << 2) | 2)) continue;
43672 }
43673 else
43674 {
43675 /* The second shuffle for e.g. V4DFmode has
43676 4567 and ABCD operands.
43677 Ignore AB67, as 67 is already in the second lane
43678 of the first operand. */
43679 if ((perm & 0xc) == (3 << 2)) continue;
43680 /* And 45CD, as 45 is in the first lane of the first
43681 operand. */
43682 if ((perm & 3) == 2) continue;
43683 /* And 0123, as then the vperm2[fi]128 doesn't change
43684 anything on the original 0123 first operand. */
43685 if ((perm & 0xf) == (1 << 2)) continue;
43686 }
43687
43688 for (i = 0; i < nelt; i++)
43689 {
43690 j = d->perm[i] / nelt2;
43691 if (j == ((perm >> (2 * (i >= nelt2))) & 3))
43692 dsecond.perm[i] = nelt + (i & nelt2) + (d->perm[i] & (nelt2 - 1));
43693 else if (j == (unsigned) (i >= nelt2) + 2 * (perm >= 16))
43694 dsecond.perm[i] = d->perm[i] & (nelt - 1);
43695 else
43696 break;
43697 }
43698
43699 if (i == nelt)
43700 {
43701 start_sequence ();
43702 ok = expand_vec_perm_1 (&dsecond);
43703 end_sequence ();
43704 }
43705 else
43706 ok = false;
43707
43708 if (ok)
43709 {
43710 if (d->testing_p)
43711 return true;
43712
43713 /* Found a usable second shuffle. dfirst will be
43714 vperm2f128 on d->op0 and d->op1. */
43715 dsecond.testing_p = false;
43716 dfirst = *d;
43717 dfirst.target = gen_reg_rtx (d->vmode);
43718 for (i = 0; i < nelt; i++)
43719 dfirst.perm[i] = (i & (nelt2 - 1))
43720 + ((perm >> (2 * (i >= nelt2))) & 3) * nelt2;
43721
43722 ok = expand_vec_perm_1 (&dfirst);
43723 gcc_assert (ok);
43724
43725 /* And dsecond is some single insn shuffle, taking
43726 d->op0 and result of vperm2f128 (if perm < 16) or
43727 d->op1 and result of vperm2f128 (otherwise). */
43728 dsecond.op1 = dfirst.target;
43729 if (perm >= 16)
43730 dsecond.op0 = dfirst.op1;
43731
43732 ok = expand_vec_perm_1 (&dsecond);
43733 gcc_assert (ok);
43734
43735 return true;
43736 }
43737
43738 /* For one operand, the only useful vperm2f128 permutation is 0x10. */
43739 if (d->one_operand_p)
43740 return false;
43741 }
43742
43743 return false;
43744 }
43745
43746 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43747 a two vector permutation using 2 intra-lane interleave insns
43748 and cross-lane shuffle for 32-byte vectors. */
43749
43750 static bool
43751 expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
43752 {
43753 unsigned i, nelt;
43754 rtx (*gen) (rtx, rtx, rtx);
43755
43756 if (d->one_operand_p)
43757 return false;
43758 if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
43759 ;
43760 else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
43761 ;
43762 else
43763 return false;
43764
43765 nelt = d->nelt;
43766 if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
43767 return false;
43768 for (i = 0; i < nelt; i += 2)
43769 if (d->perm[i] != d->perm[0] + i / 2
43770 || d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
43771 return false;
43772
43773 if (d->testing_p)
43774 return true;
43775
43776 switch (d->vmode)
43777 {
43778 case V32QImode:
43779 if (d->perm[0])
43780 gen = gen_vec_interleave_highv32qi;
43781 else
43782 gen = gen_vec_interleave_lowv32qi;
43783 break;
43784 case V16HImode:
43785 if (d->perm[0])
43786 gen = gen_vec_interleave_highv16hi;
43787 else
43788 gen = gen_vec_interleave_lowv16hi;
43789 break;
43790 case V8SImode:
43791 if (d->perm[0])
43792 gen = gen_vec_interleave_highv8si;
43793 else
43794 gen = gen_vec_interleave_lowv8si;
43795 break;
43796 case V4DImode:
43797 if (d->perm[0])
43798 gen = gen_vec_interleave_highv4di;
43799 else
43800 gen = gen_vec_interleave_lowv4di;
43801 break;
43802 case V8SFmode:
43803 if (d->perm[0])
43804 gen = gen_vec_interleave_highv8sf;
43805 else
43806 gen = gen_vec_interleave_lowv8sf;
43807 break;
43808 case V4DFmode:
43809 if (d->perm[0])
43810 gen = gen_vec_interleave_highv4df;
43811 else
43812 gen = gen_vec_interleave_lowv4df;
43813 break;
43814 default:
43815 gcc_unreachable ();
43816 }
43817
43818 emit_insn (gen (d->target, d->op0, d->op1));
43819 return true;
43820 }
43821
43822 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
43823 a single vector permutation using a single intra-lane vector
43824 permutation, vperm2f128 swapping the lanes and vblend* insn blending
43825 the non-swapped and swapped vectors together. */
43826
43827 static bool
43828 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
43829 {
43830 struct expand_vec_perm_d dfirst, dsecond;
43831 unsigned i, j, msk, nelt = d->nelt, nelt2 = nelt / 2;
43832 rtx seq;
43833 bool ok;
43834 rtx (*blend) (rtx, rtx, rtx, rtx) = NULL;
43835
43836 if (!TARGET_AVX
43837 || TARGET_AVX2
43838 || (d->vmode != V8SFmode && d->vmode != V4DFmode)
43839 || !d->one_operand_p)
43840 return false;
43841
43842 dfirst = *d;
43843 for (i = 0; i < nelt; i++)
43844 dfirst.perm[i] = 0xff;
43845 for (i = 0, msk = 0; i < nelt; i++)
43846 {
43847 j = (d->perm[i] & nelt2) ? i | nelt2 : i & ~nelt2;
43848 if (dfirst.perm[j] != 0xff && dfirst.perm[j] != d->perm[i])
43849 return false;
43850 dfirst.perm[j] = d->perm[i];
43851 if (j != i)
43852 msk |= (1 << i);
43853 }
43854 for (i = 0; i < nelt; i++)
43855 if (dfirst.perm[i] == 0xff)
43856 dfirst.perm[i] = i;
43857
43858 if (!d->testing_p)
43859 dfirst.target = gen_reg_rtx (dfirst.vmode);
43860
43861 start_sequence ();
43862 ok = expand_vec_perm_1 (&dfirst);
43863 seq = get_insns ();
43864 end_sequence ();
43865
43866 if (!ok)
43867 return false;
43868
43869 if (d->testing_p)
43870 return true;
43871
43872 emit_insn (seq);
43873
43874 dsecond = *d;
43875 dsecond.op0 = dfirst.target;
43876 dsecond.op1 = dfirst.target;
43877 dsecond.one_operand_p = true;
43878 dsecond.target = gen_reg_rtx (dsecond.vmode);
43879 for (i = 0; i < nelt; i++)
43880 dsecond.perm[i] = i ^ nelt2;
43881
43882 ok = expand_vec_perm_1 (&dsecond);
43883 gcc_assert (ok);
43884
43885 blend = d->vmode == V8SFmode ? gen_avx_blendps256 : gen_avx_blendpd256;
43886 emit_insn (blend (d->target, dfirst.target, dsecond.target, GEN_INT (msk)));
43887 return true;
43888 }
43889
43890 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
43891 permutation using two vperm2f128, followed by a vshufpd insn blending
43892 the two vectors together. */
43893
43894 static bool
43895 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d *d)
43896 {
43897 struct expand_vec_perm_d dfirst, dsecond, dthird;
43898 bool ok;
43899
43900 if (!TARGET_AVX || (d->vmode != V4DFmode))
43901 return false;
43902
43903 if (d->testing_p)
43904 return true;
43905
43906 dfirst = *d;
43907 dsecond = *d;
43908 dthird = *d;
43909
43910 dfirst.perm[0] = (d->perm[0] & ~1);
43911 dfirst.perm[1] = (d->perm[0] & ~1) + 1;
43912 dfirst.perm[2] = (d->perm[2] & ~1);
43913 dfirst.perm[3] = (d->perm[2] & ~1) + 1;
43914 dsecond.perm[0] = (d->perm[1] & ~1);
43915 dsecond.perm[1] = (d->perm[1] & ~1) + 1;
43916 dsecond.perm[2] = (d->perm[3] & ~1);
43917 dsecond.perm[3] = (d->perm[3] & ~1) + 1;
43918 dthird.perm[0] = (d->perm[0] % 2);
43919 dthird.perm[1] = (d->perm[1] % 2) + 4;
43920 dthird.perm[2] = (d->perm[2] % 2) + 2;
43921 dthird.perm[3] = (d->perm[3] % 2) + 6;
43922
43923 dfirst.target = gen_reg_rtx (dfirst.vmode);
43924 dsecond.target = gen_reg_rtx (dsecond.vmode);
43925 dthird.op0 = dfirst.target;
43926 dthird.op1 = dsecond.target;
43927 dthird.one_operand_p = false;
43928
43929 canonicalize_perm (&dfirst);
43930 canonicalize_perm (&dsecond);
43931
43932 ok = expand_vec_perm_1 (&dfirst)
43933 && expand_vec_perm_1 (&dsecond)
43934 && expand_vec_perm_1 (&dthird);
43935
43936 gcc_assert (ok);
43937
43938 return true;
43939 }
43940
43941 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
43942 permutation with two pshufb insns and an ior. We should have already
43943 failed all two instruction sequences. */
43944
43945 static bool
43946 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
43947 {
43948 rtx rperm[2][16], vperm, l, h, op, m128;
43949 unsigned int i, nelt, eltsz;
43950
43951 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
43952 return false;
43953 gcc_assert (!d->one_operand_p);
43954
43955 if (d->testing_p)
43956 return true;
43957
43958 nelt = d->nelt;
43959 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
43960
43961 /* Generate two permutation masks. If the required element is within
43962 the given vector it is shuffled into the proper lane. If the required
43963 element is in the other vector, force a zero into the lane by setting
43964 bit 7 in the permutation mask. */
43965 m128 = GEN_INT (-128);
43966 for (i = 0; i < nelt; ++i)
43967 {
43968 unsigned j, e = d->perm[i];
43969 unsigned which = (e >= nelt);
43970 if (e >= nelt)
43971 e -= nelt;
43972
43973 for (j = 0; j < eltsz; ++j)
43974 {
43975 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
43976 rperm[1-which][i*eltsz + j] = m128;
43977 }
43978 }
43979
43980 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
43981 vperm = force_reg (V16QImode, vperm);
43982
43983 l = gen_reg_rtx (V16QImode);
43984 op = gen_lowpart (V16QImode, d->op0);
43985 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
43986
43987 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
43988 vperm = force_reg (V16QImode, vperm);
43989
43990 h = gen_reg_rtx (V16QImode);
43991 op = gen_lowpart (V16QImode, d->op1);
43992 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
43993
43994 op = d->target;
43995 if (d->vmode != V16QImode)
43996 op = gen_reg_rtx (V16QImode);
43997 emit_insn (gen_iorv16qi3 (op, l, h));
43998 if (op != d->target)
43999 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
44000
44001 return true;
44002 }
44003
44004 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
44005 with two vpshufb insns, vpermq and vpor. We should have already failed
44006 all two or three instruction sequences. */
44007
44008 static bool
44009 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
44010 {
44011 rtx rperm[2][32], vperm, l, h, hp, op, m128;
44012 unsigned int i, nelt, eltsz;
44013
44014 if (!TARGET_AVX2
44015 || !d->one_operand_p
44016 || (d->vmode != V32QImode && d->vmode != V16HImode))
44017 return false;
44018
44019 if (d->testing_p)
44020 return true;
44021
44022 nelt = d->nelt;
44023 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
44024
44025 /* Generate two permutation masks. If the required element is within
44026 the same lane, it is shuffled in. If the required element from the
44027 other lane, force a zero by setting bit 7 in the permutation mask.
44028 In the other mask the mask has non-negative elements if element
44029 is requested from the other lane, but also moved to the other lane,
44030 so that the result of vpshufb can have the two V2TImode halves
44031 swapped. */
44032 m128 = GEN_INT (-128);
44033 for (i = 0; i < nelt; ++i)
44034 {
44035 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
44036 unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
44037
44038 for (j = 0; j < eltsz; ++j)
44039 {
44040 rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j);
44041 rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128;
44042 }
44043 }
44044
44045 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
44046 vperm = force_reg (V32QImode, vperm);
44047
44048 h = gen_reg_rtx (V32QImode);
44049 op = gen_lowpart (V32QImode, d->op0);
44050 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
44051
44052 /* Swap the 128-byte lanes of h into hp. */
44053 hp = gen_reg_rtx (V4DImode);
44054 op = gen_lowpart (V4DImode, h);
44055 emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx,
44056 const1_rtx));
44057
44058 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
44059 vperm = force_reg (V32QImode, vperm);
44060
44061 l = gen_reg_rtx (V32QImode);
44062 op = gen_lowpart (V32QImode, d->op0);
44063 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
44064
44065 op = d->target;
44066 if (d->vmode != V32QImode)
44067 op = gen_reg_rtx (V32QImode);
44068 emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp)));
44069 if (op != d->target)
44070 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
44071
44072 return true;
44073 }
44074
44075 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
44076 and extract-odd permutations of two V32QImode and V16QImode operand
44077 with two vpshufb insns, vpor and vpermq. We should have already
44078 failed all two or three instruction sequences. */
44079
44080 static bool
44081 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
44082 {
44083 rtx rperm[2][32], vperm, l, h, ior, op, m128;
44084 unsigned int i, nelt, eltsz;
44085
44086 if (!TARGET_AVX2
44087 || d->one_operand_p
44088 || (d->vmode != V32QImode && d->vmode != V16HImode))
44089 return false;
44090
44091 for (i = 0; i < d->nelt; ++i)
44092 if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2))
44093 return false;
44094
44095 if (d->testing_p)
44096 return true;
44097
44098 nelt = d->nelt;
44099 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
44100
44101 /* Generate two permutation masks. In the first permutation mask
44102 the first quarter will contain indexes for the first half
44103 of the op0, the second quarter will contain bit 7 set, third quarter
44104 will contain indexes for the second half of the op0 and the
44105 last quarter bit 7 set. In the second permutation mask
44106 the first quarter will contain bit 7 set, the second quarter
44107 indexes for the first half of the op1, the third quarter bit 7 set
44108 and last quarter indexes for the second half of the op1.
44109 I.e. the first mask e.g. for V32QImode extract even will be:
44110 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
44111 (all values masked with 0xf except for -128) and second mask
44112 for extract even will be
44113 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
44114 m128 = GEN_INT (-128);
44115 for (i = 0; i < nelt; ++i)
44116 {
44117 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
44118 unsigned which = d->perm[i] >= nelt;
44119 unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0;
44120
44121 for (j = 0; j < eltsz; ++j)
44122 {
44123 rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j);
44124 rperm[1 - which][(i * eltsz + j) ^ xorv] = m128;
44125 }
44126 }
44127
44128 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
44129 vperm = force_reg (V32QImode, vperm);
44130
44131 l = gen_reg_rtx (V32QImode);
44132 op = gen_lowpart (V32QImode, d->op0);
44133 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
44134
44135 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
44136 vperm = force_reg (V32QImode, vperm);
44137
44138 h = gen_reg_rtx (V32QImode);
44139 op = gen_lowpart (V32QImode, d->op1);
44140 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
44141
44142 ior = gen_reg_rtx (V32QImode);
44143 emit_insn (gen_iorv32qi3 (ior, l, h));
44144
44145 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
44146 op = gen_reg_rtx (V4DImode);
44147 ior = gen_lowpart (V4DImode, ior);
44148 emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx,
44149 const1_rtx, GEN_INT (3)));
44150 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
44151
44152 return true;
44153 }
44154
44155 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
44156 and extract-odd permutations. */
44157
44158 static bool
44159 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
44160 {
44161 rtx t1, t2, t3, t4, t5;
44162
44163 switch (d->vmode)
44164 {
44165 case V4DFmode:
44166 if (d->testing_p)
44167 break;
44168 t1 = gen_reg_rtx (V4DFmode);
44169 t2 = gen_reg_rtx (V4DFmode);
44170
44171 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
44172 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
44173 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
44174
44175 /* Now an unpck[lh]pd will produce the result required. */
44176 if (odd)
44177 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
44178 else
44179 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
44180 emit_insn (t3);
44181 break;
44182
44183 case V8SFmode:
44184 {
44185 int mask = odd ? 0xdd : 0x88;
44186
44187 if (d->testing_p)
44188 break;
44189 t1 = gen_reg_rtx (V8SFmode);
44190 t2 = gen_reg_rtx (V8SFmode);
44191 t3 = gen_reg_rtx (V8SFmode);
44192
44193 /* Shuffle within the 128-bit lanes to produce:
44194 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
44195 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
44196 GEN_INT (mask)));
44197
44198 /* Shuffle the lanes around to produce:
44199 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
44200 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
44201 GEN_INT (0x3)));
44202
44203 /* Shuffle within the 128-bit lanes to produce:
44204 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
44205 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
44206
44207 /* Shuffle within the 128-bit lanes to produce:
44208 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
44209 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
44210
44211 /* Shuffle the lanes around to produce:
44212 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
44213 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
44214 GEN_INT (0x20)));
44215 }
44216 break;
44217
44218 case V2DFmode:
44219 case V4SFmode:
44220 case V2DImode:
44221 case V4SImode:
44222 /* These are always directly implementable by expand_vec_perm_1. */
44223 gcc_unreachable ();
44224
44225 case V8HImode:
44226 if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB)
44227 return expand_vec_perm_pshufb2 (d);
44228 else
44229 {
44230 if (d->testing_p)
44231 break;
44232 /* We need 2*log2(N)-1 operations to achieve odd/even
44233 with interleave. */
44234 t1 = gen_reg_rtx (V8HImode);
44235 t2 = gen_reg_rtx (V8HImode);
44236 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
44237 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
44238 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
44239 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
44240 if (odd)
44241 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
44242 else
44243 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
44244 emit_insn (t3);
44245 }
44246 break;
44247
44248 case V16QImode:
44249 if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB)
44250 return expand_vec_perm_pshufb2 (d);
44251 else
44252 {
44253 if (d->testing_p)
44254 break;
44255 t1 = gen_reg_rtx (V16QImode);
44256 t2 = gen_reg_rtx (V16QImode);
44257 t3 = gen_reg_rtx (V16QImode);
44258 emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
44259 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
44260 emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
44261 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
44262 emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
44263 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
44264 if (odd)
44265 t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
44266 else
44267 t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
44268 emit_insn (t3);
44269 }
44270 break;
44271
44272 case V16HImode:
44273 case V32QImode:
44274 return expand_vec_perm_vpshufb2_vpermq_even_odd (d);
44275
44276 case V4DImode:
44277 if (!TARGET_AVX2)
44278 {
44279 struct expand_vec_perm_d d_copy = *d;
44280 d_copy.vmode = V4DFmode;
44281 if (d->testing_p)
44282 d_copy.target = gen_lowpart (V4DFmode, d->target);
44283 else
44284 d_copy.target = gen_reg_rtx (V4DFmode);
44285 d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
44286 d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
44287 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
44288 {
44289 if (!d->testing_p)
44290 emit_move_insn (d->target,
44291 gen_lowpart (V4DImode, d_copy.target));
44292 return true;
44293 }
44294 return false;
44295 }
44296
44297 if (d->testing_p)
44298 break;
44299
44300 t1 = gen_reg_rtx (V4DImode);
44301 t2 = gen_reg_rtx (V4DImode);
44302
44303 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
44304 emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20)));
44305 emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31)));
44306
44307 /* Now an vpunpck[lh]qdq will produce the result required. */
44308 if (odd)
44309 t3 = gen_avx2_interleave_highv4di (d->target, t1, t2);
44310 else
44311 t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2);
44312 emit_insn (t3);
44313 break;
44314
44315 case V8SImode:
44316 if (!TARGET_AVX2)
44317 {
44318 struct expand_vec_perm_d d_copy = *d;
44319 d_copy.vmode = V8SFmode;
44320 if (d->testing_p)
44321 d_copy.target = gen_lowpart (V8SFmode, d->target);
44322 else
44323 d_copy.target = gen_reg_rtx (V8SFmode);
44324 d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
44325 d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
44326 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
44327 {
44328 if (!d->testing_p)
44329 emit_move_insn (d->target,
44330 gen_lowpart (V8SImode, d_copy.target));
44331 return true;
44332 }
44333 return false;
44334 }
44335
44336 if (d->testing_p)
44337 break;
44338
44339 t1 = gen_reg_rtx (V8SImode);
44340 t2 = gen_reg_rtx (V8SImode);
44341 t3 = gen_reg_rtx (V4DImode);
44342 t4 = gen_reg_rtx (V4DImode);
44343 t5 = gen_reg_rtx (V4DImode);
44344
44345 /* Shuffle the lanes around into
44346 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
44347 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, d->op0),
44348 gen_lowpart (V4DImode, d->op1),
44349 GEN_INT (0x20)));
44350 emit_insn (gen_avx2_permv2ti (t4, gen_lowpart (V4DImode, d->op0),
44351 gen_lowpart (V4DImode, d->op1),
44352 GEN_INT (0x31)));
44353
44354 /* Swap the 2nd and 3rd position in each lane into
44355 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
44356 emit_insn (gen_avx2_pshufdv3 (t1, gen_lowpart (V8SImode, t3),
44357 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
44358 emit_insn (gen_avx2_pshufdv3 (t2, gen_lowpart (V8SImode, t4),
44359 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
44360
44361 /* Now an vpunpck[lh]qdq will produce
44362 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
44363 if (odd)
44364 t3 = gen_avx2_interleave_highv4di (t5, gen_lowpart (V4DImode, t1),
44365 gen_lowpart (V4DImode, t2));
44366 else
44367 t3 = gen_avx2_interleave_lowv4di (t5, gen_lowpart (V4DImode, t1),
44368 gen_lowpart (V4DImode, t2));
44369 emit_insn (t3);
44370 emit_move_insn (d->target, gen_lowpart (V8SImode, t5));
44371 break;
44372
44373 default:
44374 gcc_unreachable ();
44375 }
44376
44377 return true;
44378 }
44379
44380 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
44381 extract-even and extract-odd permutations. */
44382
44383 static bool
44384 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
44385 {
44386 unsigned i, odd, nelt = d->nelt;
44387
44388 odd = d->perm[0];
44389 if (odd != 0 && odd != 1)
44390 return false;
44391
44392 for (i = 1; i < nelt; ++i)
44393 if (d->perm[i] != 2 * i + odd)
44394 return false;
44395
44396 return expand_vec_perm_even_odd_1 (d, odd);
44397 }
44398
44399 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
44400 permutations. We assume that expand_vec_perm_1 has already failed. */
44401
44402 static bool
44403 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
44404 {
44405 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
44406 enum machine_mode vmode = d->vmode;
44407 unsigned char perm2[4];
44408 rtx op0 = d->op0, dest;
44409 bool ok;
44410
44411 switch (vmode)
44412 {
44413 case V4DFmode:
44414 case V8SFmode:
44415 /* These are special-cased in sse.md so that we can optionally
44416 use the vbroadcast instruction. They expand to two insns
44417 if the input happens to be in a register. */
44418 gcc_unreachable ();
44419
44420 case V2DFmode:
44421 case V2DImode:
44422 case V4SFmode:
44423 case V4SImode:
44424 /* These are always implementable using standard shuffle patterns. */
44425 gcc_unreachable ();
44426
44427 case V8HImode:
44428 case V16QImode:
44429 /* These can be implemented via interleave. We save one insn by
44430 stopping once we have promoted to V4SImode and then use pshufd. */
44431 if (d->testing_p)
44432 return true;
44433 do
44434 {
44435 rtx dest;
44436 rtx (*gen) (rtx, rtx, rtx)
44437 = vmode == V16QImode ? gen_vec_interleave_lowv16qi
44438 : gen_vec_interleave_lowv8hi;
44439
44440 if (elt >= nelt2)
44441 {
44442 gen = vmode == V16QImode ? gen_vec_interleave_highv16qi
44443 : gen_vec_interleave_highv8hi;
44444 elt -= nelt2;
44445 }
44446 nelt2 /= 2;
44447
44448 dest = gen_reg_rtx (vmode);
44449 emit_insn (gen (dest, op0, op0));
44450 vmode = get_mode_wider_vector (vmode);
44451 op0 = gen_lowpart (vmode, dest);
44452 }
44453 while (vmode != V4SImode);
44454
44455 memset (perm2, elt, 4);
44456 dest = gen_reg_rtx (V4SImode);
44457 ok = expand_vselect (dest, op0, perm2, 4, d->testing_p);
44458 gcc_assert (ok);
44459 if (!d->testing_p)
44460 emit_move_insn (d->target, gen_lowpart (d->vmode, dest));
44461 return true;
44462
44463 case V32QImode:
44464 case V16HImode:
44465 case V8SImode:
44466 case V4DImode:
44467 /* For AVX2 broadcasts of the first element vpbroadcast* or
44468 vpermq should be used by expand_vec_perm_1. */
44469 gcc_assert (!TARGET_AVX2 || d->perm[0]);
44470 return false;
44471
44472 default:
44473 gcc_unreachable ();
44474 }
44475 }
44476
44477 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
44478 broadcast permutations. */
44479
44480 static bool
44481 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
44482 {
44483 unsigned i, elt, nelt = d->nelt;
44484
44485 if (!d->one_operand_p)
44486 return false;
44487
44488 elt = d->perm[0];
44489 for (i = 1; i < nelt; ++i)
44490 if (d->perm[i] != elt)
44491 return false;
44492
44493 return expand_vec_perm_broadcast_1 (d);
44494 }
44495
44496 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
44497 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
44498 all the shorter instruction sequences. */
44499
44500 static bool
44501 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
44502 {
44503 rtx rperm[4][32], vperm, l[2], h[2], op, m128;
44504 unsigned int i, nelt, eltsz;
44505 bool used[4];
44506
44507 if (!TARGET_AVX2
44508 || d->one_operand_p
44509 || (d->vmode != V32QImode && d->vmode != V16HImode))
44510 return false;
44511
44512 if (d->testing_p)
44513 return true;
44514
44515 nelt = d->nelt;
44516 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
44517
44518 /* Generate 4 permutation masks. If the required element is within
44519 the same lane, it is shuffled in. If the required element from the
44520 other lane, force a zero by setting bit 7 in the permutation mask.
44521 In the other mask the mask has non-negative elements if element
44522 is requested from the other lane, but also moved to the other lane,
44523 so that the result of vpshufb can have the two V2TImode halves
44524 swapped. */
44525 m128 = GEN_INT (-128);
44526 for (i = 0; i < 32; ++i)
44527 {
44528 rperm[0][i] = m128;
44529 rperm[1][i] = m128;
44530 rperm[2][i] = m128;
44531 rperm[3][i] = m128;
44532 }
44533 used[0] = false;
44534 used[1] = false;
44535 used[2] = false;
44536 used[3] = false;
44537 for (i = 0; i < nelt; ++i)
44538 {
44539 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
44540 unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
44541 unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0);
44542
44543 for (j = 0; j < eltsz; ++j)
44544 rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j);
44545 used[which] = true;
44546 }
44547
44548 for (i = 0; i < 2; ++i)
44549 {
44550 if (!used[2 * i + 1])
44551 {
44552 h[i] = NULL_RTX;
44553 continue;
44554 }
44555 vperm = gen_rtx_CONST_VECTOR (V32QImode,
44556 gen_rtvec_v (32, rperm[2 * i + 1]));
44557 vperm = force_reg (V32QImode, vperm);
44558 h[i] = gen_reg_rtx (V32QImode);
44559 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
44560 emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm));
44561 }
44562
44563 /* Swap the 128-byte lanes of h[X]. */
44564 for (i = 0; i < 2; ++i)
44565 {
44566 if (h[i] == NULL_RTX)
44567 continue;
44568 op = gen_reg_rtx (V4DImode);
44569 emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]),
44570 const2_rtx, GEN_INT (3), const0_rtx,
44571 const1_rtx));
44572 h[i] = gen_lowpart (V32QImode, op);
44573 }
44574
44575 for (i = 0; i < 2; ++i)
44576 {
44577 if (!used[2 * i])
44578 {
44579 l[i] = NULL_RTX;
44580 continue;
44581 }
44582 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i]));
44583 vperm = force_reg (V32QImode, vperm);
44584 l[i] = gen_reg_rtx (V32QImode);
44585 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
44586 emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm));
44587 }
44588
44589 for (i = 0; i < 2; ++i)
44590 {
44591 if (h[i] && l[i])
44592 {
44593 op = gen_reg_rtx (V32QImode);
44594 emit_insn (gen_iorv32qi3 (op, l[i], h[i]));
44595 l[i] = op;
44596 }
44597 else if (h[i])
44598 l[i] = h[i];
44599 }
44600
44601 gcc_assert (l[0] && l[1]);
44602 op = d->target;
44603 if (d->vmode != V32QImode)
44604 op = gen_reg_rtx (V32QImode);
44605 emit_insn (gen_iorv32qi3 (op, l[0], l[1]));
44606 if (op != d->target)
44607 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
44608 return true;
44609 }
44610
44611 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
44612 With all of the interface bits taken care of, perform the expansion
44613 in D and return true on success. */
44614
44615 static bool
44616 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
44617 {
44618 /* Try a single instruction expansion. */
44619 if (expand_vec_perm_1 (d))
44620 return true;
44621
44622 /* Try sequences of two instructions. */
44623
44624 if (expand_vec_perm_pshuflw_pshufhw (d))
44625 return true;
44626
44627 if (expand_vec_perm_palignr (d))
44628 return true;
44629
44630 if (expand_vec_perm_interleave2 (d))
44631 return true;
44632
44633 if (expand_vec_perm_broadcast (d))
44634 return true;
44635
44636 if (expand_vec_perm_vpermq_perm_1 (d))
44637 return true;
44638
44639 if (expand_vec_perm_vperm2f128 (d))
44640 return true;
44641
44642 if (expand_vec_perm_pblendv (d))
44643 return true;
44644
44645 /* Try sequences of three instructions. */
44646
44647 if (expand_vec_perm_2vperm2f128_vshuf (d))
44648 return true;
44649
44650 if (expand_vec_perm_pshufb2 (d))
44651 return true;
44652
44653 if (expand_vec_perm_interleave3 (d))
44654 return true;
44655
44656 if (expand_vec_perm_vperm2f128_vblend (d))
44657 return true;
44658
44659 /* Try sequences of four instructions. */
44660
44661 if (expand_vec_perm_vpshufb2_vpermq (d))
44662 return true;
44663
44664 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
44665 return true;
44666
44667 /* ??? Look for narrow permutations whose element orderings would
44668 allow the promotion to a wider mode. */
44669
44670 /* ??? Look for sequences of interleave or a wider permute that place
44671 the data into the correct lanes for a half-vector shuffle like
44672 pshuf[lh]w or vpermilps. */
44673
44674 /* ??? Look for sequences of interleave that produce the desired results.
44675 The combinatorics of punpck[lh] get pretty ugly... */
44676
44677 if (expand_vec_perm_even_odd (d))
44678 return true;
44679
44680 /* Even longer sequences. */
44681 if (expand_vec_perm_vpshufb4_vpermq2 (d))
44682 return true;
44683
44684 return false;
44685 }
44686
44687 /* If a permutation only uses one operand, make it clear. Returns true
44688 if the permutation references both operands. */
44689
44690 static bool
44691 canonicalize_perm (struct expand_vec_perm_d *d)
44692 {
44693 int i, which, nelt = d->nelt;
44694
44695 for (i = which = 0; i < nelt; ++i)
44696 which |= (d->perm[i] < nelt ? 1 : 2);
44697
44698 d->one_operand_p = true;
44699 switch (which)
44700 {
44701 default:
44702 gcc_unreachable();
44703
44704 case 3:
44705 if (!rtx_equal_p (d->op0, d->op1))
44706 {
44707 d->one_operand_p = false;
44708 break;
44709 }
44710 /* The elements of PERM do not suggest that only the first operand
44711 is used, but both operands are identical. Allow easier matching
44712 of the permutation by folding the permutation into the single
44713 input vector. */
44714 /* FALLTHRU */
44715
44716 case 2:
44717 for (i = 0; i < nelt; ++i)
44718 d->perm[i] &= nelt - 1;
44719 d->op0 = d->op1;
44720 break;
44721
44722 case 1:
44723 d->op1 = d->op0;
44724 break;
44725 }
44726
44727 return (which == 3);
44728 }
44729
44730 bool
44731 ix86_expand_vec_perm_const (rtx operands[4])
44732 {
44733 struct expand_vec_perm_d d;
44734 unsigned char perm[MAX_VECT_LEN];
44735 int i, nelt;
44736 bool two_args;
44737 rtx sel;
44738
44739 d.target = operands[0];
44740 d.op0 = operands[1];
44741 d.op1 = operands[2];
44742 sel = operands[3];
44743
44744 d.vmode = GET_MODE (d.target);
44745 gcc_assert (VECTOR_MODE_P (d.vmode));
44746 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
44747 d.testing_p = false;
44748
44749 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
44750 gcc_assert (XVECLEN (sel, 0) == nelt);
44751 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
44752
44753 for (i = 0; i < nelt; ++i)
44754 {
44755 rtx e = XVECEXP (sel, 0, i);
44756 int ei = INTVAL (e) & (2 * nelt - 1);
44757 d.perm[i] = ei;
44758 perm[i] = ei;
44759 }
44760
44761 two_args = canonicalize_perm (&d);
44762
44763 if (ix86_expand_vec_perm_const_1 (&d))
44764 return true;
44765
44766 /* If the selector says both arguments are needed, but the operands are the
44767 same, the above tried to expand with one_operand_p and flattened selector.
44768 If that didn't work, retry without one_operand_p; we succeeded with that
44769 during testing. */
44770 if (two_args && d.one_operand_p)
44771 {
44772 d.one_operand_p = false;
44773 memcpy (d.perm, perm, sizeof (perm));
44774 return ix86_expand_vec_perm_const_1 (&d);
44775 }
44776
44777 return false;
44778 }
44779
44780 /* Implement targetm.vectorize.vec_perm_const_ok. */
44781
44782 static bool
44783 ix86_vectorize_vec_perm_const_ok (enum machine_mode vmode,
44784 const unsigned char *sel)
44785 {
44786 struct expand_vec_perm_d d;
44787 unsigned int i, nelt, which;
44788 bool ret;
44789
44790 d.vmode = vmode;
44791 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
44792 d.testing_p = true;
44793
44794 /* Given sufficient ISA support we can just return true here
44795 for selected vector modes. */
44796 if (d.vmode == V16SImode || d.vmode == V16SFmode
44797 || d.vmode == V8DFmode || d.vmode == V8DImode)
44798 /* All implementable with a single vpermi2 insn. */
44799 return true;
44800 if (GET_MODE_SIZE (d.vmode) == 16)
44801 {
44802 /* All implementable with a single vpperm insn. */
44803 if (TARGET_XOP)
44804 return true;
44805 /* All implementable with 2 pshufb + 1 ior. */
44806 if (TARGET_SSSE3)
44807 return true;
44808 /* All implementable with shufpd or unpck[lh]pd. */
44809 if (d.nelt == 2)
44810 return true;
44811 }
44812
44813 /* Extract the values from the vector CST into the permutation
44814 array in D. */
44815 memcpy (d.perm, sel, nelt);
44816 for (i = which = 0; i < nelt; ++i)
44817 {
44818 unsigned char e = d.perm[i];
44819 gcc_assert (e < 2 * nelt);
44820 which |= (e < nelt ? 1 : 2);
44821 }
44822
44823 /* For all elements from second vector, fold the elements to first. */
44824 if (which == 2)
44825 for (i = 0; i < nelt; ++i)
44826 d.perm[i] -= nelt;
44827
44828 /* Check whether the mask can be applied to the vector type. */
44829 d.one_operand_p = (which != 3);
44830
44831 /* Implementable with shufps or pshufd. */
44832 if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode))
44833 return true;
44834
44835 /* Otherwise we have to go through the motions and see if we can
44836 figure out how to generate the requested permutation. */
44837 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
44838 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
44839 if (!d.one_operand_p)
44840 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
44841
44842 start_sequence ();
44843 ret = ix86_expand_vec_perm_const_1 (&d);
44844 end_sequence ();
44845
44846 return ret;
44847 }
44848
44849 void
44850 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
44851 {
44852 struct expand_vec_perm_d d;
44853 unsigned i, nelt;
44854
44855 d.target = targ;
44856 d.op0 = op0;
44857 d.op1 = op1;
44858 d.vmode = GET_MODE (targ);
44859 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
44860 d.one_operand_p = false;
44861 d.testing_p = false;
44862
44863 for (i = 0; i < nelt; ++i)
44864 d.perm[i] = i * 2 + odd;
44865
44866 /* We'll either be able to implement the permutation directly... */
44867 if (expand_vec_perm_1 (&d))
44868 return;
44869
44870 /* ... or we use the special-case patterns. */
44871 expand_vec_perm_even_odd_1 (&d, odd);
44872 }
44873
44874 static void
44875 ix86_expand_vec_interleave (rtx targ, rtx op0, rtx op1, bool high_p)
44876 {
44877 struct expand_vec_perm_d d;
44878 unsigned i, nelt, base;
44879 bool ok;
44880
44881 d.target = targ;
44882 d.op0 = op0;
44883 d.op1 = op1;
44884 d.vmode = GET_MODE (targ);
44885 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
44886 d.one_operand_p = false;
44887 d.testing_p = false;
44888
44889 base = high_p ? nelt / 2 : 0;
44890 for (i = 0; i < nelt / 2; ++i)
44891 {
44892 d.perm[i * 2] = i + base;
44893 d.perm[i * 2 + 1] = i + base + nelt;
44894 }
44895
44896 /* Note that for AVX this isn't one instruction. */
44897 ok = ix86_expand_vec_perm_const_1 (&d);
44898 gcc_assert (ok);
44899 }
44900
44901
44902 /* Expand a vector operation CODE for a V*QImode in terms of the
44903 same operation on V*HImode. */
44904
44905 void
44906 ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
44907 {
44908 enum machine_mode qimode = GET_MODE (dest);
44909 enum machine_mode himode;
44910 rtx (*gen_il) (rtx, rtx, rtx);
44911 rtx (*gen_ih) (rtx, rtx, rtx);
44912 rtx op1_l, op1_h, op2_l, op2_h, res_l, res_h;
44913 struct expand_vec_perm_d d;
44914 bool ok, full_interleave;
44915 bool uns_p = false;
44916 int i;
44917
44918 switch (qimode)
44919 {
44920 case V16QImode:
44921 himode = V8HImode;
44922 gen_il = gen_vec_interleave_lowv16qi;
44923 gen_ih = gen_vec_interleave_highv16qi;
44924 break;
44925 case V32QImode:
44926 himode = V16HImode;
44927 gen_il = gen_avx2_interleave_lowv32qi;
44928 gen_ih = gen_avx2_interleave_highv32qi;
44929 break;
44930 default:
44931 gcc_unreachable ();
44932 }
44933
44934 op2_l = op2_h = op2;
44935 switch (code)
44936 {
44937 case MULT:
44938 /* Unpack data such that we've got a source byte in each low byte of
44939 each word. We don't care what goes into the high byte of each word.
44940 Rather than trying to get zero in there, most convenient is to let
44941 it be a copy of the low byte. */
44942 op2_l = gen_reg_rtx (qimode);
44943 op2_h = gen_reg_rtx (qimode);
44944 emit_insn (gen_il (op2_l, op2, op2));
44945 emit_insn (gen_ih (op2_h, op2, op2));
44946 /* FALLTHRU */
44947
44948 op1_l = gen_reg_rtx (qimode);
44949 op1_h = gen_reg_rtx (qimode);
44950 emit_insn (gen_il (op1_l, op1, op1));
44951 emit_insn (gen_ih (op1_h, op1, op1));
44952 full_interleave = qimode == V16QImode;
44953 break;
44954
44955 case ASHIFT:
44956 case LSHIFTRT:
44957 uns_p = true;
44958 /* FALLTHRU */
44959 case ASHIFTRT:
44960 op1_l = gen_reg_rtx (himode);
44961 op1_h = gen_reg_rtx (himode);
44962 ix86_expand_sse_unpack (op1_l, op1, uns_p, false);
44963 ix86_expand_sse_unpack (op1_h, op1, uns_p, true);
44964 full_interleave = true;
44965 break;
44966 default:
44967 gcc_unreachable ();
44968 }
44969
44970 /* Perform the operation. */
44971 res_l = expand_simple_binop (himode, code, op1_l, op2_l, NULL_RTX,
44972 1, OPTAB_DIRECT);
44973 res_h = expand_simple_binop (himode, code, op1_h, op2_h, NULL_RTX,
44974 1, OPTAB_DIRECT);
44975 gcc_assert (res_l && res_h);
44976
44977 /* Merge the data back into the right place. */
44978 d.target = dest;
44979 d.op0 = gen_lowpart (qimode, res_l);
44980 d.op1 = gen_lowpart (qimode, res_h);
44981 d.vmode = qimode;
44982 d.nelt = GET_MODE_NUNITS (qimode);
44983 d.one_operand_p = false;
44984 d.testing_p = false;
44985
44986 if (full_interleave)
44987 {
44988 /* For SSE2, we used an full interleave, so the desired
44989 results are in the even elements. */
44990 for (i = 0; i < 32; ++i)
44991 d.perm[i] = i * 2;
44992 }
44993 else
44994 {
44995 /* For AVX, the interleave used above was not cross-lane. So the
44996 extraction is evens but with the second and third quarter swapped.
44997 Happily, that is even one insn shorter than even extraction. */
44998 for (i = 0; i < 32; ++i)
44999 d.perm[i] = i * 2 + ((i & 24) == 8 ? 16 : (i & 24) == 16 ? -16 : 0);
45000 }
45001
45002 ok = ix86_expand_vec_perm_const_1 (&d);
45003 gcc_assert (ok);
45004
45005 set_unique_reg_note (get_last_insn (), REG_EQUAL,
45006 gen_rtx_fmt_ee (code, qimode, op1, op2));
45007 }
45008
45009 /* Helper function of ix86_expand_mul_widen_evenodd. Return true
45010 if op is CONST_VECTOR with all odd elements equal to their
45011 preceding element. */
45012
45013 static bool
45014 const_vector_equal_evenodd_p (rtx op)
45015 {
45016 enum machine_mode mode = GET_MODE (op);
45017 int i, nunits = GET_MODE_NUNITS (mode);
45018 if (GET_CODE (op) != CONST_VECTOR
45019 || nunits != CONST_VECTOR_NUNITS (op))
45020 return false;
45021 for (i = 0; i < nunits; i += 2)
45022 if (CONST_VECTOR_ELT (op, i) != CONST_VECTOR_ELT (op, i + 1))
45023 return false;
45024 return true;
45025 }
45026
45027 void
45028 ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
45029 bool uns_p, bool odd_p)
45030 {
45031 enum machine_mode mode = GET_MODE (op1);
45032 enum machine_mode wmode = GET_MODE (dest);
45033 rtx x;
45034 rtx orig_op1 = op1, orig_op2 = op2;
45035
45036 if (!nonimmediate_operand (op1, mode))
45037 op1 = force_reg (mode, op1);
45038 if (!nonimmediate_operand (op2, mode))
45039 op2 = force_reg (mode, op2);
45040
45041 /* We only play even/odd games with vectors of SImode. */
45042 gcc_assert (mode == V4SImode || mode == V8SImode || mode == V16SImode);
45043
45044 /* If we're looking for the odd results, shift those members down to
45045 the even slots. For some cpus this is faster than a PSHUFD. */
45046 if (odd_p)
45047 {
45048 /* For XOP use vpmacsdqh, but only for smult, as it is only
45049 signed. */
45050 if (TARGET_XOP && mode == V4SImode && !uns_p)
45051 {
45052 x = force_reg (wmode, CONST0_RTX (wmode));
45053 emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x));
45054 return;
45055 }
45056
45057 x = GEN_INT (GET_MODE_UNIT_BITSIZE (mode));
45058 if (!const_vector_equal_evenodd_p (orig_op1))
45059 op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1),
45060 x, NULL, 1, OPTAB_DIRECT);
45061 if (!const_vector_equal_evenodd_p (orig_op2))
45062 op2 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op2),
45063 x, NULL, 1, OPTAB_DIRECT);
45064 op1 = gen_lowpart (mode, op1);
45065 op2 = gen_lowpart (mode, op2);
45066 }
45067
45068 if (mode == V16SImode)
45069 {
45070 if (uns_p)
45071 x = gen_vec_widen_umult_even_v16si (dest, op1, op2);
45072 else
45073 x = gen_vec_widen_smult_even_v16si (dest, op1, op2);
45074 }
45075 else if (mode == V8SImode)
45076 {
45077 if (uns_p)
45078 x = gen_vec_widen_umult_even_v8si (dest, op1, op2);
45079 else
45080 x = gen_vec_widen_smult_even_v8si (dest, op1, op2);
45081 }
45082 else if (uns_p)
45083 x = gen_vec_widen_umult_even_v4si (dest, op1, op2);
45084 else if (TARGET_SSE4_1)
45085 x = gen_sse4_1_mulv2siv2di3 (dest, op1, op2);
45086 else
45087 {
45088 rtx s1, s2, t0, t1, t2;
45089
45090 /* The easiest way to implement this without PMULDQ is to go through
45091 the motions as if we are performing a full 64-bit multiply. With
45092 the exception that we need to do less shuffling of the elements. */
45093
45094 /* Compute the sign-extension, aka highparts, of the two operands. */
45095 s1 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
45096 op1, pc_rtx, pc_rtx);
45097 s2 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
45098 op2, pc_rtx, pc_rtx);
45099
45100 /* Multiply LO(A) * HI(B), and vice-versa. */
45101 t1 = gen_reg_rtx (wmode);
45102 t2 = gen_reg_rtx (wmode);
45103 emit_insn (gen_vec_widen_umult_even_v4si (t1, s1, op2));
45104 emit_insn (gen_vec_widen_umult_even_v4si (t2, s2, op1));
45105
45106 /* Multiply LO(A) * LO(B). */
45107 t0 = gen_reg_rtx (wmode);
45108 emit_insn (gen_vec_widen_umult_even_v4si (t0, op1, op2));
45109
45110 /* Combine and shift the highparts into place. */
45111 t1 = expand_binop (wmode, add_optab, t1, t2, t1, 1, OPTAB_DIRECT);
45112 t1 = expand_binop (wmode, ashl_optab, t1, GEN_INT (32), t1,
45113 1, OPTAB_DIRECT);
45114
45115 /* Combine high and low parts. */
45116 force_expand_binop (wmode, add_optab, t0, t1, dest, 1, OPTAB_DIRECT);
45117 return;
45118 }
45119 emit_insn (x);
45120 }
45121
45122 void
45123 ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2,
45124 bool uns_p, bool high_p)
45125 {
45126 enum machine_mode wmode = GET_MODE (dest);
45127 enum machine_mode mode = GET_MODE (op1);
45128 rtx t1, t2, t3, t4, mask;
45129
45130 switch (mode)
45131 {
45132 case V4SImode:
45133 t1 = gen_reg_rtx (mode);
45134 t2 = gen_reg_rtx (mode);
45135 if (TARGET_XOP && !uns_p)
45136 {
45137 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
45138 shuffle the elements once so that all elements are in the right
45139 place for immediate use: { A C B D }. */
45140 emit_insn (gen_sse2_pshufd_1 (t1, op1, const0_rtx, const2_rtx,
45141 const1_rtx, GEN_INT (3)));
45142 emit_insn (gen_sse2_pshufd_1 (t2, op2, const0_rtx, const2_rtx,
45143 const1_rtx, GEN_INT (3)));
45144 }
45145 else
45146 {
45147 /* Put the elements into place for the multiply. */
45148 ix86_expand_vec_interleave (t1, op1, op1, high_p);
45149 ix86_expand_vec_interleave (t2, op2, op2, high_p);
45150 high_p = false;
45151 }
45152 ix86_expand_mul_widen_evenodd (dest, t1, t2, uns_p, high_p);
45153 break;
45154
45155 case V8SImode:
45156 /* Shuffle the elements between the lanes. After this we
45157 have { A B E F | C D G H } for each operand. */
45158 t1 = gen_reg_rtx (V4DImode);
45159 t2 = gen_reg_rtx (V4DImode);
45160 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, op1),
45161 const0_rtx, const2_rtx,
45162 const1_rtx, GEN_INT (3)));
45163 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, op2),
45164 const0_rtx, const2_rtx,
45165 const1_rtx, GEN_INT (3)));
45166
45167 /* Shuffle the elements within the lanes. After this we
45168 have { A A B B | C C D D } or { E E F F | G G H H }. */
45169 t3 = gen_reg_rtx (V8SImode);
45170 t4 = gen_reg_rtx (V8SImode);
45171 mask = GEN_INT (high_p
45172 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
45173 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
45174 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), mask));
45175 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), mask));
45176
45177 ix86_expand_mul_widen_evenodd (dest, t3, t4, uns_p, false);
45178 break;
45179
45180 case V8HImode:
45181 case V16HImode:
45182 t1 = expand_binop (mode, smul_optab, op1, op2, NULL_RTX,
45183 uns_p, OPTAB_DIRECT);
45184 t2 = expand_binop (mode,
45185 uns_p ? umul_highpart_optab : smul_highpart_optab,
45186 op1, op2, NULL_RTX, uns_p, OPTAB_DIRECT);
45187 gcc_assert (t1 && t2);
45188
45189 t3 = gen_reg_rtx (mode);
45190 ix86_expand_vec_interleave (t3, t1, t2, high_p);
45191 emit_move_insn (dest, gen_lowpart (wmode, t3));
45192 break;
45193
45194 case V16QImode:
45195 case V32QImode:
45196 t1 = gen_reg_rtx (wmode);
45197 t2 = gen_reg_rtx (wmode);
45198 ix86_expand_sse_unpack (t1, op1, uns_p, high_p);
45199 ix86_expand_sse_unpack (t2, op2, uns_p, high_p);
45200
45201 emit_insn (gen_rtx_SET (VOIDmode, dest, gen_rtx_MULT (wmode, t1, t2)));
45202 break;
45203
45204 default:
45205 gcc_unreachable ();
45206 }
45207 }
45208
45209 void
45210 ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2)
45211 {
45212 rtx res_1, res_2, res_3, res_4;
45213
45214 res_1 = gen_reg_rtx (V4SImode);
45215 res_2 = gen_reg_rtx (V4SImode);
45216 res_3 = gen_reg_rtx (V2DImode);
45217 res_4 = gen_reg_rtx (V2DImode);
45218 ix86_expand_mul_widen_evenodd (res_3, op1, op2, true, false);
45219 ix86_expand_mul_widen_evenodd (res_4, op1, op2, true, true);
45220
45221 /* Move the results in element 2 down to element 1; we don't care
45222 what goes in elements 2 and 3. Then we can merge the parts
45223 back together with an interleave.
45224
45225 Note that two other sequences were tried:
45226 (1) Use interleaves at the start instead of psrldq, which allows
45227 us to use a single shufps to merge things back at the end.
45228 (2) Use shufps here to combine the two vectors, then pshufd to
45229 put the elements in the correct order.
45230 In both cases the cost of the reformatting stall was too high
45231 and the overall sequence slower. */
45232
45233 emit_insn (gen_sse2_pshufd_1 (res_1, gen_lowpart (V4SImode, res_3),
45234 const0_rtx, const2_rtx,
45235 const0_rtx, const0_rtx));
45236 emit_insn (gen_sse2_pshufd_1 (res_2, gen_lowpart (V4SImode, res_4),
45237 const0_rtx, const2_rtx,
45238 const0_rtx, const0_rtx));
45239 res_1 = emit_insn (gen_vec_interleave_lowv4si (op0, res_1, res_2));
45240
45241 set_unique_reg_note (res_1, REG_EQUAL, gen_rtx_MULT (V4SImode, op1, op2));
45242 }
45243
45244 void
45245 ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
45246 {
45247 enum machine_mode mode = GET_MODE (op0);
45248 rtx t1, t2, t3, t4, t5, t6;
45249
45250 if (TARGET_XOP && mode == V2DImode)
45251 {
45252 /* op1: A,B,C,D, op2: E,F,G,H */
45253 op1 = gen_lowpart (V4SImode, op1);
45254 op2 = gen_lowpart (V4SImode, op2);
45255
45256 t1 = gen_reg_rtx (V4SImode);
45257 t2 = gen_reg_rtx (V4SImode);
45258 t3 = gen_reg_rtx (V2DImode);
45259 t4 = gen_reg_rtx (V2DImode);
45260
45261 /* t1: B,A,D,C */
45262 emit_insn (gen_sse2_pshufd_1 (t1, op1,
45263 GEN_INT (1),
45264 GEN_INT (0),
45265 GEN_INT (3),
45266 GEN_INT (2)));
45267
45268 /* t2: (B*E),(A*F),(D*G),(C*H) */
45269 emit_insn (gen_mulv4si3 (t2, t1, op2));
45270
45271 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
45272 emit_insn (gen_xop_phadddq (t3, t2));
45273
45274 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
45275 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
45276
45277 /* Multiply lower parts and add all */
45278 t5 = gen_reg_rtx (V2DImode);
45279 emit_insn (gen_vec_widen_umult_even_v4si (t5,
45280 gen_lowpart (V4SImode, op1),
45281 gen_lowpart (V4SImode, op2)));
45282 op0 = expand_binop (mode, add_optab, t5, t4, op0, 1, OPTAB_DIRECT);
45283
45284 }
45285 else
45286 {
45287 enum machine_mode nmode;
45288 rtx (*umul) (rtx, rtx, rtx);
45289
45290 if (mode == V2DImode)
45291 {
45292 umul = gen_vec_widen_umult_even_v4si;
45293 nmode = V4SImode;
45294 }
45295 else if (mode == V4DImode)
45296 {
45297 umul = gen_vec_widen_umult_even_v8si;
45298 nmode = V8SImode;
45299 }
45300 else if (mode == V8DImode)
45301 {
45302 umul = gen_vec_widen_umult_even_v16si;
45303 nmode = V16SImode;
45304 }
45305 else
45306 gcc_unreachable ();
45307
45308
45309 /* Multiply low parts. */
45310 t1 = gen_reg_rtx (mode);
45311 emit_insn (umul (t1, gen_lowpart (nmode, op1), gen_lowpart (nmode, op2)));
45312
45313 /* Shift input vectors right 32 bits so we can multiply high parts. */
45314 t6 = GEN_INT (32);
45315 t2 = expand_binop (mode, lshr_optab, op1, t6, NULL, 1, OPTAB_DIRECT);
45316 t3 = expand_binop (mode, lshr_optab, op2, t6, NULL, 1, OPTAB_DIRECT);
45317
45318 /* Multiply high parts by low parts. */
45319 t4 = gen_reg_rtx (mode);
45320 t5 = gen_reg_rtx (mode);
45321 emit_insn (umul (t4, gen_lowpart (nmode, t2), gen_lowpart (nmode, op2)));
45322 emit_insn (umul (t5, gen_lowpart (nmode, t3), gen_lowpart (nmode, op1)));
45323
45324 /* Combine and shift the highparts back. */
45325 t4 = expand_binop (mode, add_optab, t4, t5, t4, 1, OPTAB_DIRECT);
45326 t4 = expand_binop (mode, ashl_optab, t4, t6, t4, 1, OPTAB_DIRECT);
45327
45328 /* Combine high and low parts. */
45329 force_expand_binop (mode, add_optab, t1, t4, op0, 1, OPTAB_DIRECT);
45330 }
45331
45332 set_unique_reg_note (get_last_insn (), REG_EQUAL,
45333 gen_rtx_MULT (mode, op1, op2));
45334 }
45335
45336 /* Calculate integer abs() using only SSE2 instructions. */
45337
45338 void
45339 ix86_expand_sse2_abs (rtx target, rtx input)
45340 {
45341 enum machine_mode mode = GET_MODE (target);
45342 rtx tmp0, tmp1, x;
45343
45344 switch (mode)
45345 {
45346 /* For 32-bit signed integer X, the best way to calculate the absolute
45347 value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)). */
45348 case V4SImode:
45349 tmp0 = expand_simple_binop (mode, ASHIFTRT, input,
45350 GEN_INT (GET_MODE_BITSIZE
45351 (GET_MODE_INNER (mode)) - 1),
45352 NULL, 0, OPTAB_DIRECT);
45353 tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
45354 NULL, 0, OPTAB_DIRECT);
45355 x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
45356 target, 0, OPTAB_DIRECT);
45357 break;
45358
45359 /* For 16-bit signed integer X, the best way to calculate the absolute
45360 value of X is max (X, -X), as SSE2 provides the PMAXSW insn. */
45361 case V8HImode:
45362 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
45363
45364 x = expand_simple_binop (mode, SMAX, tmp0, input,
45365 target, 0, OPTAB_DIRECT);
45366 break;
45367
45368 /* For 8-bit signed integer X, the best way to calculate the absolute
45369 value of X is min ((unsigned char) X, (unsigned char) (-X)),
45370 as SSE2 provides the PMINUB insn. */
45371 case V16QImode:
45372 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
45373
45374 x = expand_simple_binop (V16QImode, UMIN, tmp0, input,
45375 target, 0, OPTAB_DIRECT);
45376 break;
45377
45378 default:
45379 gcc_unreachable ();
45380 }
45381
45382 if (x != target)
45383 emit_move_insn (target, x);
45384 }
45385
45386 /* Expand an insert into a vector register through pinsr insn.
45387 Return true if successful. */
45388
45389 bool
45390 ix86_expand_pinsr (rtx *operands)
45391 {
45392 rtx dst = operands[0];
45393 rtx src = operands[3];
45394
45395 unsigned int size = INTVAL (operands[1]);
45396 unsigned int pos = INTVAL (operands[2]);
45397
45398 if (GET_CODE (dst) == SUBREG)
45399 {
45400 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
45401 dst = SUBREG_REG (dst);
45402 }
45403
45404 if (GET_CODE (src) == SUBREG)
45405 src = SUBREG_REG (src);
45406
45407 switch (GET_MODE (dst))
45408 {
45409 case V16QImode:
45410 case V8HImode:
45411 case V4SImode:
45412 case V2DImode:
45413 {
45414 enum machine_mode srcmode, dstmode;
45415 rtx (*pinsr)(rtx, rtx, rtx, rtx);
45416
45417 srcmode = mode_for_size (size, MODE_INT, 0);
45418
45419 switch (srcmode)
45420 {
45421 case QImode:
45422 if (!TARGET_SSE4_1)
45423 return false;
45424 dstmode = V16QImode;
45425 pinsr = gen_sse4_1_pinsrb;
45426 break;
45427
45428 case HImode:
45429 if (!TARGET_SSE2)
45430 return false;
45431 dstmode = V8HImode;
45432 pinsr = gen_sse2_pinsrw;
45433 break;
45434
45435 case SImode:
45436 if (!TARGET_SSE4_1)
45437 return false;
45438 dstmode = V4SImode;
45439 pinsr = gen_sse4_1_pinsrd;
45440 break;
45441
45442 case DImode:
45443 gcc_assert (TARGET_64BIT);
45444 if (!TARGET_SSE4_1)
45445 return false;
45446 dstmode = V2DImode;
45447 pinsr = gen_sse4_1_pinsrq;
45448 break;
45449
45450 default:
45451 return false;
45452 }
45453
45454 rtx d = dst;
45455 if (GET_MODE (dst) != dstmode)
45456 d = gen_reg_rtx (dstmode);
45457 src = gen_lowpart (srcmode, src);
45458
45459 pos /= size;
45460
45461 emit_insn (pinsr (d, gen_lowpart (dstmode, dst), src,
45462 GEN_INT (1 << pos)));
45463 if (d != dst)
45464 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
45465 return true;
45466 }
45467
45468 default:
45469 return false;
45470 }
45471 }
45472 \f
45473 /* This function returns the calling abi specific va_list type node.
45474 It returns the FNDECL specific va_list type. */
45475
45476 static tree
45477 ix86_fn_abi_va_list (tree fndecl)
45478 {
45479 if (!TARGET_64BIT)
45480 return va_list_type_node;
45481 gcc_assert (fndecl != NULL_TREE);
45482
45483 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
45484 return ms_va_list_type_node;
45485 else
45486 return sysv_va_list_type_node;
45487 }
45488
45489 /* Returns the canonical va_list type specified by TYPE. If there
45490 is no valid TYPE provided, it return NULL_TREE. */
45491
45492 static tree
45493 ix86_canonical_va_list_type (tree type)
45494 {
45495 tree wtype, htype;
45496
45497 /* Resolve references and pointers to va_list type. */
45498 if (TREE_CODE (type) == MEM_REF)
45499 type = TREE_TYPE (type);
45500 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
45501 type = TREE_TYPE (type);
45502 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
45503 type = TREE_TYPE (type);
45504
45505 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
45506 {
45507 wtype = va_list_type_node;
45508 gcc_assert (wtype != NULL_TREE);
45509 htype = type;
45510 if (TREE_CODE (wtype) == ARRAY_TYPE)
45511 {
45512 /* If va_list is an array type, the argument may have decayed
45513 to a pointer type, e.g. by being passed to another function.
45514 In that case, unwrap both types so that we can compare the
45515 underlying records. */
45516 if (TREE_CODE (htype) == ARRAY_TYPE
45517 || POINTER_TYPE_P (htype))
45518 {
45519 wtype = TREE_TYPE (wtype);
45520 htype = TREE_TYPE (htype);
45521 }
45522 }
45523 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
45524 return va_list_type_node;
45525 wtype = sysv_va_list_type_node;
45526 gcc_assert (wtype != NULL_TREE);
45527 htype = type;
45528 if (TREE_CODE (wtype) == ARRAY_TYPE)
45529 {
45530 /* If va_list is an array type, the argument may have decayed
45531 to a pointer type, e.g. by being passed to another function.
45532 In that case, unwrap both types so that we can compare the
45533 underlying records. */
45534 if (TREE_CODE (htype) == ARRAY_TYPE
45535 || POINTER_TYPE_P (htype))
45536 {
45537 wtype = TREE_TYPE (wtype);
45538 htype = TREE_TYPE (htype);
45539 }
45540 }
45541 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
45542 return sysv_va_list_type_node;
45543 wtype = ms_va_list_type_node;
45544 gcc_assert (wtype != NULL_TREE);
45545 htype = type;
45546 if (TREE_CODE (wtype) == ARRAY_TYPE)
45547 {
45548 /* If va_list is an array type, the argument may have decayed
45549 to a pointer type, e.g. by being passed to another function.
45550 In that case, unwrap both types so that we can compare the
45551 underlying records. */
45552 if (TREE_CODE (htype) == ARRAY_TYPE
45553 || POINTER_TYPE_P (htype))
45554 {
45555 wtype = TREE_TYPE (wtype);
45556 htype = TREE_TYPE (htype);
45557 }
45558 }
45559 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
45560 return ms_va_list_type_node;
45561 return NULL_TREE;
45562 }
45563 return std_canonical_va_list_type (type);
45564 }
45565
45566 /* Iterate through the target-specific builtin types for va_list.
45567 IDX denotes the iterator, *PTREE is set to the result type of
45568 the va_list builtin, and *PNAME to its internal type.
45569 Returns zero if there is no element for this index, otherwise
45570 IDX should be increased upon the next call.
45571 Note, do not iterate a base builtin's name like __builtin_va_list.
45572 Used from c_common_nodes_and_builtins. */
45573
45574 static int
45575 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
45576 {
45577 if (TARGET_64BIT)
45578 {
45579 switch (idx)
45580 {
45581 default:
45582 break;
45583
45584 case 0:
45585 *ptree = ms_va_list_type_node;
45586 *pname = "__builtin_ms_va_list";
45587 return 1;
45588
45589 case 1:
45590 *ptree = sysv_va_list_type_node;
45591 *pname = "__builtin_sysv_va_list";
45592 return 1;
45593 }
45594 }
45595
45596 return 0;
45597 }
45598
45599 #undef TARGET_SCHED_DISPATCH
45600 #define TARGET_SCHED_DISPATCH has_dispatch
45601 #undef TARGET_SCHED_DISPATCH_DO
45602 #define TARGET_SCHED_DISPATCH_DO do_dispatch
45603 #undef TARGET_SCHED_REASSOCIATION_WIDTH
45604 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
45605 #undef TARGET_SCHED_REORDER
45606 #define TARGET_SCHED_REORDER ix86_sched_reorder
45607 #undef TARGET_SCHED_ADJUST_PRIORITY
45608 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
45609 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
45610 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
45611 ix86_dependencies_evaluation_hook
45612
45613 /* The size of the dispatch window is the total number of bytes of
45614 object code allowed in a window. */
45615 #define DISPATCH_WINDOW_SIZE 16
45616
45617 /* Number of dispatch windows considered for scheduling. */
45618 #define MAX_DISPATCH_WINDOWS 3
45619
45620 /* Maximum number of instructions in a window. */
45621 #define MAX_INSN 4
45622
45623 /* Maximum number of immediate operands in a window. */
45624 #define MAX_IMM 4
45625
45626 /* Maximum number of immediate bits allowed in a window. */
45627 #define MAX_IMM_SIZE 128
45628
45629 /* Maximum number of 32 bit immediates allowed in a window. */
45630 #define MAX_IMM_32 4
45631
45632 /* Maximum number of 64 bit immediates allowed in a window. */
45633 #define MAX_IMM_64 2
45634
45635 /* Maximum total of loads or prefetches allowed in a window. */
45636 #define MAX_LOAD 2
45637
45638 /* Maximum total of stores allowed in a window. */
45639 #define MAX_STORE 1
45640
45641 #undef BIG
45642 #define BIG 100
45643
45644
45645 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
45646 enum dispatch_group {
45647 disp_no_group = 0,
45648 disp_load,
45649 disp_store,
45650 disp_load_store,
45651 disp_prefetch,
45652 disp_imm,
45653 disp_imm_32,
45654 disp_imm_64,
45655 disp_branch,
45656 disp_cmp,
45657 disp_jcc,
45658 disp_last
45659 };
45660
45661 /* Number of allowable groups in a dispatch window. It is an array
45662 indexed by dispatch_group enum. 100 is used as a big number,
45663 because the number of these kind of operations does not have any
45664 effect in dispatch window, but we need them for other reasons in
45665 the table. */
45666 static unsigned int num_allowable_groups[disp_last] = {
45667 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
45668 };
45669
45670 char group_name[disp_last + 1][16] = {
45671 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
45672 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
45673 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
45674 };
45675
45676 /* Instruction path. */
45677 enum insn_path {
45678 no_path = 0,
45679 path_single, /* Single micro op. */
45680 path_double, /* Double micro op. */
45681 path_multi, /* Instructions with more than 2 micro op.. */
45682 last_path
45683 };
45684
45685 /* sched_insn_info defines a window to the instructions scheduled in
45686 the basic block. It contains a pointer to the insn_info table and
45687 the instruction scheduled.
45688
45689 Windows are allocated for each basic block and are linked
45690 together. */
45691 typedef struct sched_insn_info_s {
45692 rtx insn;
45693 enum dispatch_group group;
45694 enum insn_path path;
45695 int byte_len;
45696 int imm_bytes;
45697 } sched_insn_info;
45698
45699 /* Linked list of dispatch windows. This is a two way list of
45700 dispatch windows of a basic block. It contains information about
45701 the number of uops in the window and the total number of
45702 instructions and of bytes in the object code for this dispatch
45703 window. */
45704 typedef struct dispatch_windows_s {
45705 int num_insn; /* Number of insn in the window. */
45706 int num_uops; /* Number of uops in the window. */
45707 int window_size; /* Number of bytes in the window. */
45708 int window_num; /* Window number between 0 or 1. */
45709 int num_imm; /* Number of immediates in an insn. */
45710 int num_imm_32; /* Number of 32 bit immediates in an insn. */
45711 int num_imm_64; /* Number of 64 bit immediates in an insn. */
45712 int imm_size; /* Total immediates in the window. */
45713 int num_loads; /* Total memory loads in the window. */
45714 int num_stores; /* Total memory stores in the window. */
45715 int violation; /* Violation exists in window. */
45716 sched_insn_info *window; /* Pointer to the window. */
45717 struct dispatch_windows_s *next;
45718 struct dispatch_windows_s *prev;
45719 } dispatch_windows;
45720
45721 /* Immediate valuse used in an insn. */
45722 typedef struct imm_info_s
45723 {
45724 int imm;
45725 int imm32;
45726 int imm64;
45727 } imm_info;
45728
45729 static dispatch_windows *dispatch_window_list;
45730 static dispatch_windows *dispatch_window_list1;
45731
45732 /* Get dispatch group of insn. */
45733
45734 static enum dispatch_group
45735 get_mem_group (rtx insn)
45736 {
45737 enum attr_memory memory;
45738
45739 if (INSN_CODE (insn) < 0)
45740 return disp_no_group;
45741 memory = get_attr_memory (insn);
45742 if (memory == MEMORY_STORE)
45743 return disp_store;
45744
45745 if (memory == MEMORY_LOAD)
45746 return disp_load;
45747
45748 if (memory == MEMORY_BOTH)
45749 return disp_load_store;
45750
45751 return disp_no_group;
45752 }
45753
45754 /* Return true if insn is a compare instruction. */
45755
45756 static bool
45757 is_cmp (rtx insn)
45758 {
45759 enum attr_type type;
45760
45761 type = get_attr_type (insn);
45762 return (type == TYPE_TEST
45763 || type == TYPE_ICMP
45764 || type == TYPE_FCMP
45765 || GET_CODE (PATTERN (insn)) == COMPARE);
45766 }
45767
45768 /* Return true if a dispatch violation encountered. */
45769
45770 static bool
45771 dispatch_violation (void)
45772 {
45773 if (dispatch_window_list->next)
45774 return dispatch_window_list->next->violation;
45775 return dispatch_window_list->violation;
45776 }
45777
45778 /* Return true if insn is a branch instruction. */
45779
45780 static bool
45781 is_branch (rtx insn)
45782 {
45783 return (CALL_P (insn) || JUMP_P (insn));
45784 }
45785
45786 /* Return true if insn is a prefetch instruction. */
45787
45788 static bool
45789 is_prefetch (rtx insn)
45790 {
45791 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
45792 }
45793
45794 /* This function initializes a dispatch window and the list container holding a
45795 pointer to the window. */
45796
45797 static void
45798 init_window (int window_num)
45799 {
45800 int i;
45801 dispatch_windows *new_list;
45802
45803 if (window_num == 0)
45804 new_list = dispatch_window_list;
45805 else
45806 new_list = dispatch_window_list1;
45807
45808 new_list->num_insn = 0;
45809 new_list->num_uops = 0;
45810 new_list->window_size = 0;
45811 new_list->next = NULL;
45812 new_list->prev = NULL;
45813 new_list->window_num = window_num;
45814 new_list->num_imm = 0;
45815 new_list->num_imm_32 = 0;
45816 new_list->num_imm_64 = 0;
45817 new_list->imm_size = 0;
45818 new_list->num_loads = 0;
45819 new_list->num_stores = 0;
45820 new_list->violation = false;
45821
45822 for (i = 0; i < MAX_INSN; i++)
45823 {
45824 new_list->window[i].insn = NULL;
45825 new_list->window[i].group = disp_no_group;
45826 new_list->window[i].path = no_path;
45827 new_list->window[i].byte_len = 0;
45828 new_list->window[i].imm_bytes = 0;
45829 }
45830 return;
45831 }
45832
45833 /* This function allocates and initializes a dispatch window and the
45834 list container holding a pointer to the window. */
45835
45836 static dispatch_windows *
45837 allocate_window (void)
45838 {
45839 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
45840 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
45841
45842 return new_list;
45843 }
45844
45845 /* This routine initializes the dispatch scheduling information. It
45846 initiates building dispatch scheduler tables and constructs the
45847 first dispatch window. */
45848
45849 static void
45850 init_dispatch_sched (void)
45851 {
45852 /* Allocate a dispatch list and a window. */
45853 dispatch_window_list = allocate_window ();
45854 dispatch_window_list1 = allocate_window ();
45855 init_window (0);
45856 init_window (1);
45857 }
45858
45859 /* This function returns true if a branch is detected. End of a basic block
45860 does not have to be a branch, but here we assume only branches end a
45861 window. */
45862
45863 static bool
45864 is_end_basic_block (enum dispatch_group group)
45865 {
45866 return group == disp_branch;
45867 }
45868
45869 /* This function is called when the end of a window processing is reached. */
45870
45871 static void
45872 process_end_window (void)
45873 {
45874 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
45875 if (dispatch_window_list->next)
45876 {
45877 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
45878 gcc_assert (dispatch_window_list->window_size
45879 + dispatch_window_list1->window_size <= 48);
45880 init_window (1);
45881 }
45882 init_window (0);
45883 }
45884
45885 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
45886 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
45887 for 48 bytes of instructions. Note that these windows are not dispatch
45888 windows that their sizes are DISPATCH_WINDOW_SIZE. */
45889
45890 static dispatch_windows *
45891 allocate_next_window (int window_num)
45892 {
45893 if (window_num == 0)
45894 {
45895 if (dispatch_window_list->next)
45896 init_window (1);
45897 init_window (0);
45898 return dispatch_window_list;
45899 }
45900
45901 dispatch_window_list->next = dispatch_window_list1;
45902 dispatch_window_list1->prev = dispatch_window_list;
45903
45904 return dispatch_window_list1;
45905 }
45906
45907 /* Increment the number of immediate operands of an instruction. */
45908
45909 static int
45910 find_constant_1 (rtx *in_rtx, imm_info *imm_values)
45911 {
45912 if (*in_rtx == 0)
45913 return 0;
45914
45915 switch ( GET_CODE (*in_rtx))
45916 {
45917 case CONST:
45918 case SYMBOL_REF:
45919 case CONST_INT:
45920 (imm_values->imm)++;
45921 if (x86_64_immediate_operand (*in_rtx, SImode))
45922 (imm_values->imm32)++;
45923 else
45924 (imm_values->imm64)++;
45925 break;
45926
45927 case CONST_DOUBLE:
45928 (imm_values->imm)++;
45929 (imm_values->imm64)++;
45930 break;
45931
45932 case CODE_LABEL:
45933 if (LABEL_KIND (*in_rtx) == LABEL_NORMAL)
45934 {
45935 (imm_values->imm)++;
45936 (imm_values->imm32)++;
45937 }
45938 break;
45939
45940 default:
45941 break;
45942 }
45943
45944 return 0;
45945 }
45946
45947 /* Compute number of immediate operands of an instruction. */
45948
45949 static void
45950 find_constant (rtx in_rtx, imm_info *imm_values)
45951 {
45952 for_each_rtx (INSN_P (in_rtx) ? &PATTERN (in_rtx) : &in_rtx,
45953 (rtx_function) find_constant_1, (void *) imm_values);
45954 }
45955
45956 /* Return total size of immediate operands of an instruction along with number
45957 of corresponding immediate-operands. It initializes its parameters to zero
45958 befor calling FIND_CONSTANT.
45959 INSN is the input instruction. IMM is the total of immediates.
45960 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
45961 bit immediates. */
45962
45963 static int
45964 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
45965 {
45966 imm_info imm_values = {0, 0, 0};
45967
45968 find_constant (insn, &imm_values);
45969 *imm = imm_values.imm;
45970 *imm32 = imm_values.imm32;
45971 *imm64 = imm_values.imm64;
45972 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
45973 }
45974
45975 /* This function indicates if an operand of an instruction is an
45976 immediate. */
45977
45978 static bool
45979 has_immediate (rtx insn)
45980 {
45981 int num_imm_operand;
45982 int num_imm32_operand;
45983 int num_imm64_operand;
45984
45985 if (insn)
45986 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
45987 &num_imm64_operand);
45988 return false;
45989 }
45990
45991 /* Return single or double path for instructions. */
45992
45993 static enum insn_path
45994 get_insn_path (rtx insn)
45995 {
45996 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
45997
45998 if ((int)path == 0)
45999 return path_single;
46000
46001 if ((int)path == 1)
46002 return path_double;
46003
46004 return path_multi;
46005 }
46006
46007 /* Return insn dispatch group. */
46008
46009 static enum dispatch_group
46010 get_insn_group (rtx insn)
46011 {
46012 enum dispatch_group group = get_mem_group (insn);
46013 if (group)
46014 return group;
46015
46016 if (is_branch (insn))
46017 return disp_branch;
46018
46019 if (is_cmp (insn))
46020 return disp_cmp;
46021
46022 if (has_immediate (insn))
46023 return disp_imm;
46024
46025 if (is_prefetch (insn))
46026 return disp_prefetch;
46027
46028 return disp_no_group;
46029 }
46030
46031 /* Count number of GROUP restricted instructions in a dispatch
46032 window WINDOW_LIST. */
46033
46034 static int
46035 count_num_restricted (rtx insn, dispatch_windows *window_list)
46036 {
46037 enum dispatch_group group = get_insn_group (insn);
46038 int imm_size;
46039 int num_imm_operand;
46040 int num_imm32_operand;
46041 int num_imm64_operand;
46042
46043 if (group == disp_no_group)
46044 return 0;
46045
46046 if (group == disp_imm)
46047 {
46048 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
46049 &num_imm64_operand);
46050 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
46051 || num_imm_operand + window_list->num_imm > MAX_IMM
46052 || (num_imm32_operand > 0
46053 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
46054 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
46055 || (num_imm64_operand > 0
46056 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
46057 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
46058 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
46059 && num_imm64_operand > 0
46060 && ((window_list->num_imm_64 > 0
46061 && window_list->num_insn >= 2)
46062 || window_list->num_insn >= 3)))
46063 return BIG;
46064
46065 return 1;
46066 }
46067
46068 if ((group == disp_load_store
46069 && (window_list->num_loads >= MAX_LOAD
46070 || window_list->num_stores >= MAX_STORE))
46071 || ((group == disp_load
46072 || group == disp_prefetch)
46073 && window_list->num_loads >= MAX_LOAD)
46074 || (group == disp_store
46075 && window_list->num_stores >= MAX_STORE))
46076 return BIG;
46077
46078 return 1;
46079 }
46080
46081 /* This function returns true if insn satisfies dispatch rules on the
46082 last window scheduled. */
46083
46084 static bool
46085 fits_dispatch_window (rtx insn)
46086 {
46087 dispatch_windows *window_list = dispatch_window_list;
46088 dispatch_windows *window_list_next = dispatch_window_list->next;
46089 unsigned int num_restrict;
46090 enum dispatch_group group = get_insn_group (insn);
46091 enum insn_path path = get_insn_path (insn);
46092 int sum;
46093
46094 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
46095 instructions should be given the lowest priority in the
46096 scheduling process in Haifa scheduler to make sure they will be
46097 scheduled in the same dispatch window as the reference to them. */
46098 if (group == disp_jcc || group == disp_cmp)
46099 return false;
46100
46101 /* Check nonrestricted. */
46102 if (group == disp_no_group || group == disp_branch)
46103 return true;
46104
46105 /* Get last dispatch window. */
46106 if (window_list_next)
46107 window_list = window_list_next;
46108
46109 if (window_list->window_num == 1)
46110 {
46111 sum = window_list->prev->window_size + window_list->window_size;
46112
46113 if (sum == 32
46114 || (min_insn_size (insn) + sum) >= 48)
46115 /* Window 1 is full. Go for next window. */
46116 return true;
46117 }
46118
46119 num_restrict = count_num_restricted (insn, window_list);
46120
46121 if (num_restrict > num_allowable_groups[group])
46122 return false;
46123
46124 /* See if it fits in the first window. */
46125 if (window_list->window_num == 0)
46126 {
46127 /* The first widow should have only single and double path
46128 uops. */
46129 if (path == path_double
46130 && (window_list->num_uops + 2) > MAX_INSN)
46131 return false;
46132 else if (path != path_single)
46133 return false;
46134 }
46135 return true;
46136 }
46137
46138 /* Add an instruction INSN with NUM_UOPS micro-operations to the
46139 dispatch window WINDOW_LIST. */
46140
46141 static void
46142 add_insn_window (rtx insn, dispatch_windows *window_list, int num_uops)
46143 {
46144 int byte_len = min_insn_size (insn);
46145 int num_insn = window_list->num_insn;
46146 int imm_size;
46147 sched_insn_info *window = window_list->window;
46148 enum dispatch_group group = get_insn_group (insn);
46149 enum insn_path path = get_insn_path (insn);
46150 int num_imm_operand;
46151 int num_imm32_operand;
46152 int num_imm64_operand;
46153
46154 if (!window_list->violation && group != disp_cmp
46155 && !fits_dispatch_window (insn))
46156 window_list->violation = true;
46157
46158 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
46159 &num_imm64_operand);
46160
46161 /* Initialize window with new instruction. */
46162 window[num_insn].insn = insn;
46163 window[num_insn].byte_len = byte_len;
46164 window[num_insn].group = group;
46165 window[num_insn].path = path;
46166 window[num_insn].imm_bytes = imm_size;
46167
46168 window_list->window_size += byte_len;
46169 window_list->num_insn = num_insn + 1;
46170 window_list->num_uops = window_list->num_uops + num_uops;
46171 window_list->imm_size += imm_size;
46172 window_list->num_imm += num_imm_operand;
46173 window_list->num_imm_32 += num_imm32_operand;
46174 window_list->num_imm_64 += num_imm64_operand;
46175
46176 if (group == disp_store)
46177 window_list->num_stores += 1;
46178 else if (group == disp_load
46179 || group == disp_prefetch)
46180 window_list->num_loads += 1;
46181 else if (group == disp_load_store)
46182 {
46183 window_list->num_stores += 1;
46184 window_list->num_loads += 1;
46185 }
46186 }
46187
46188 /* Adds a scheduled instruction, INSN, to the current dispatch window.
46189 If the total bytes of instructions or the number of instructions in
46190 the window exceed allowable, it allocates a new window. */
46191
46192 static void
46193 add_to_dispatch_window (rtx insn)
46194 {
46195 int byte_len;
46196 dispatch_windows *window_list;
46197 dispatch_windows *next_list;
46198 dispatch_windows *window0_list;
46199 enum insn_path path;
46200 enum dispatch_group insn_group;
46201 bool insn_fits;
46202 int num_insn;
46203 int num_uops;
46204 int window_num;
46205 int insn_num_uops;
46206 int sum;
46207
46208 if (INSN_CODE (insn) < 0)
46209 return;
46210
46211 byte_len = min_insn_size (insn);
46212 window_list = dispatch_window_list;
46213 next_list = window_list->next;
46214 path = get_insn_path (insn);
46215 insn_group = get_insn_group (insn);
46216
46217 /* Get the last dispatch window. */
46218 if (next_list)
46219 window_list = dispatch_window_list->next;
46220
46221 if (path == path_single)
46222 insn_num_uops = 1;
46223 else if (path == path_double)
46224 insn_num_uops = 2;
46225 else
46226 insn_num_uops = (int) path;
46227
46228 /* If current window is full, get a new window.
46229 Window number zero is full, if MAX_INSN uops are scheduled in it.
46230 Window number one is full, if window zero's bytes plus window
46231 one's bytes is 32, or if the bytes of the new instruction added
46232 to the total makes it greater than 48, or it has already MAX_INSN
46233 instructions in it. */
46234 num_insn = window_list->num_insn;
46235 num_uops = window_list->num_uops;
46236 window_num = window_list->window_num;
46237 insn_fits = fits_dispatch_window (insn);
46238
46239 if (num_insn >= MAX_INSN
46240 || num_uops + insn_num_uops > MAX_INSN
46241 || !(insn_fits))
46242 {
46243 window_num = ~window_num & 1;
46244 window_list = allocate_next_window (window_num);
46245 }
46246
46247 if (window_num == 0)
46248 {
46249 add_insn_window (insn, window_list, insn_num_uops);
46250 if (window_list->num_insn >= MAX_INSN
46251 && insn_group == disp_branch)
46252 {
46253 process_end_window ();
46254 return;
46255 }
46256 }
46257 else if (window_num == 1)
46258 {
46259 window0_list = window_list->prev;
46260 sum = window0_list->window_size + window_list->window_size;
46261 if (sum == 32
46262 || (byte_len + sum) >= 48)
46263 {
46264 process_end_window ();
46265 window_list = dispatch_window_list;
46266 }
46267
46268 add_insn_window (insn, window_list, insn_num_uops);
46269 }
46270 else
46271 gcc_unreachable ();
46272
46273 if (is_end_basic_block (insn_group))
46274 {
46275 /* End of basic block is reached do end-basic-block process. */
46276 process_end_window ();
46277 return;
46278 }
46279 }
46280
46281 /* Print the dispatch window, WINDOW_NUM, to FILE. */
46282
46283 DEBUG_FUNCTION static void
46284 debug_dispatch_window_file (FILE *file, int window_num)
46285 {
46286 dispatch_windows *list;
46287 int i;
46288
46289 if (window_num == 0)
46290 list = dispatch_window_list;
46291 else
46292 list = dispatch_window_list1;
46293
46294 fprintf (file, "Window #%d:\n", list->window_num);
46295 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
46296 list->num_insn, list->num_uops, list->window_size);
46297 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
46298 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
46299
46300 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
46301 list->num_stores);
46302 fprintf (file, " insn info:\n");
46303
46304 for (i = 0; i < MAX_INSN; i++)
46305 {
46306 if (!list->window[i].insn)
46307 break;
46308 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
46309 i, group_name[list->window[i].group],
46310 i, (void *)list->window[i].insn,
46311 i, list->window[i].path,
46312 i, list->window[i].byte_len,
46313 i, list->window[i].imm_bytes);
46314 }
46315 }
46316
46317 /* Print to stdout a dispatch window. */
46318
46319 DEBUG_FUNCTION void
46320 debug_dispatch_window (int window_num)
46321 {
46322 debug_dispatch_window_file (stdout, window_num);
46323 }
46324
46325 /* Print INSN dispatch information to FILE. */
46326
46327 DEBUG_FUNCTION static void
46328 debug_insn_dispatch_info_file (FILE *file, rtx insn)
46329 {
46330 int byte_len;
46331 enum insn_path path;
46332 enum dispatch_group group;
46333 int imm_size;
46334 int num_imm_operand;
46335 int num_imm32_operand;
46336 int num_imm64_operand;
46337
46338 if (INSN_CODE (insn) < 0)
46339 return;
46340
46341 byte_len = min_insn_size (insn);
46342 path = get_insn_path (insn);
46343 group = get_insn_group (insn);
46344 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
46345 &num_imm64_operand);
46346
46347 fprintf (file, " insn info:\n");
46348 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
46349 group_name[group], path, byte_len);
46350 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
46351 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
46352 }
46353
46354 /* Print to STDERR the status of the ready list with respect to
46355 dispatch windows. */
46356
46357 DEBUG_FUNCTION void
46358 debug_ready_dispatch (void)
46359 {
46360 int i;
46361 int no_ready = number_in_ready ();
46362
46363 fprintf (stdout, "Number of ready: %d\n", no_ready);
46364
46365 for (i = 0; i < no_ready; i++)
46366 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
46367 }
46368
46369 /* This routine is the driver of the dispatch scheduler. */
46370
46371 static void
46372 do_dispatch (rtx insn, int mode)
46373 {
46374 if (mode == DISPATCH_INIT)
46375 init_dispatch_sched ();
46376 else if (mode == ADD_TO_DISPATCH_WINDOW)
46377 add_to_dispatch_window (insn);
46378 }
46379
46380 /* Return TRUE if Dispatch Scheduling is supported. */
46381
46382 static bool
46383 has_dispatch (rtx insn, int action)
46384 {
46385 if ((TARGET_BDVER1 || TARGET_BDVER2 || TARGET_BDVER3 || TARGET_BDVER4)
46386 && flag_dispatch_scheduler)
46387 switch (action)
46388 {
46389 default:
46390 return false;
46391
46392 case IS_DISPATCH_ON:
46393 return true;
46394 break;
46395
46396 case IS_CMP:
46397 return is_cmp (insn);
46398
46399 case DISPATCH_VIOLATION:
46400 return dispatch_violation ();
46401
46402 case FITS_DISPATCH_WINDOW:
46403 return fits_dispatch_window (insn);
46404 }
46405
46406 return false;
46407 }
46408
46409 /* Implementation of reassociation_width target hook used by
46410 reassoc phase to identify parallelism level in reassociated
46411 tree. Statements tree_code is passed in OPC. Arguments type
46412 is passed in MODE.
46413
46414 Currently parallel reassociation is enabled for Atom
46415 processors only and we set reassociation width to be 2
46416 because Atom may issue up to 2 instructions per cycle.
46417
46418 Return value should be fixed if parallel reassociation is
46419 enabled for other processors. */
46420
46421 static int
46422 ix86_reassociation_width (unsigned int, enum machine_mode mode)
46423 {
46424 int res = 1;
46425
46426 /* Vector part. */
46427 if (VECTOR_MODE_P (mode))
46428 {
46429 if (TARGET_VECTOR_PARALLEL_EXECUTION)
46430 return 2;
46431 else
46432 return 1;
46433 }
46434
46435 /* Scalar part. */
46436 if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL)
46437 res = 2;
46438 else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL)
46439 res = 2;
46440
46441 return res;
46442 }
46443
46444 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
46445 place emms and femms instructions. */
46446
46447 static enum machine_mode
46448 ix86_preferred_simd_mode (enum machine_mode mode)
46449 {
46450 if (!TARGET_SSE)
46451 return word_mode;
46452
46453 switch (mode)
46454 {
46455 case QImode:
46456 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V32QImode : V16QImode;
46457 case HImode:
46458 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V16HImode : V8HImode;
46459 case SImode:
46460 return TARGET_AVX512F ? V16SImode :
46461 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode;
46462 case DImode:
46463 return TARGET_AVX512F ? V8DImode :
46464 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V4DImode : V2DImode;
46465
46466 case SFmode:
46467 if (TARGET_AVX512F)
46468 return V16SFmode;
46469 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
46470 return V8SFmode;
46471 else
46472 return V4SFmode;
46473
46474 case DFmode:
46475 if (!TARGET_VECTORIZE_DOUBLE)
46476 return word_mode;
46477 else if (TARGET_AVX512F)
46478 return V8DFmode;
46479 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
46480 return V4DFmode;
46481 else if (TARGET_SSE2)
46482 return V2DFmode;
46483 /* FALLTHRU */
46484
46485 default:
46486 return word_mode;
46487 }
46488 }
46489
46490 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
46491 vectors. If AVX512F is enabled then try vectorizing with 512bit,
46492 256bit and 128bit vectors. */
46493
46494 static unsigned int
46495 ix86_autovectorize_vector_sizes (void)
46496 {
46497 return TARGET_AVX512F ? 64 | 32 | 16 :
46498 (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
46499 }
46500
46501 \f
46502
46503 /* Return class of registers which could be used for pseudo of MODE
46504 and of class RCLASS for spilling instead of memory. Return NO_REGS
46505 if it is not possible or non-profitable. */
46506 static reg_class_t
46507 ix86_spill_class (reg_class_t rclass, enum machine_mode mode)
46508 {
46509 if (TARGET_SSE && TARGET_GENERAL_REGS_SSE_SPILL && ! TARGET_MMX
46510 && (mode == SImode || (TARGET_64BIT && mode == DImode))
46511 && rclass != NO_REGS && INTEGER_CLASS_P (rclass))
46512 return ALL_SSE_REGS;
46513 return NO_REGS;
46514 }
46515
46516 /* Implement targetm.vectorize.init_cost. */
46517
46518 static void *
46519 ix86_init_cost (struct loop *)
46520 {
46521 unsigned *cost = XNEWVEC (unsigned, 3);
46522 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
46523 return cost;
46524 }
46525
46526 /* Implement targetm.vectorize.add_stmt_cost. */
46527
46528 static unsigned
46529 ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
46530 struct _stmt_vec_info *stmt_info, int misalign,
46531 enum vect_cost_model_location where)
46532 {
46533 unsigned *cost = (unsigned *) data;
46534 unsigned retval = 0;
46535
46536 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
46537 int stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
46538
46539 /* Statements in an inner loop relative to the loop being
46540 vectorized are weighted more heavily. The value here is
46541 arbitrary and could potentially be improved with analysis. */
46542 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
46543 count *= 50; /* FIXME. */
46544
46545 retval = (unsigned) (count * stmt_cost);
46546
46547 /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
46548 for Silvermont as it has out of order integer pipeline and can execute
46549 2 scalar instruction per tick, but has in order SIMD pipeline. */
46550 if (TARGET_SILVERMONT || TARGET_INTEL)
46551 if (stmt_info && stmt_info->stmt)
46552 {
46553 tree lhs_op = gimple_get_lhs (stmt_info->stmt);
46554 if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
46555 retval = (retval * 17) / 10;
46556 }
46557
46558 cost[where] += retval;
46559
46560 return retval;
46561 }
46562
46563 /* Implement targetm.vectorize.finish_cost. */
46564
46565 static void
46566 ix86_finish_cost (void *data, unsigned *prologue_cost,
46567 unsigned *body_cost, unsigned *epilogue_cost)
46568 {
46569 unsigned *cost = (unsigned *) data;
46570 *prologue_cost = cost[vect_prologue];
46571 *body_cost = cost[vect_body];
46572 *epilogue_cost = cost[vect_epilogue];
46573 }
46574
46575 /* Implement targetm.vectorize.destroy_cost_data. */
46576
46577 static void
46578 ix86_destroy_cost_data (void *data)
46579 {
46580 free (data);
46581 }
46582
46583 /* Validate target specific memory model bits in VAL. */
46584
46585 static unsigned HOST_WIDE_INT
46586 ix86_memmodel_check (unsigned HOST_WIDE_INT val)
46587 {
46588 unsigned HOST_WIDE_INT model = val & MEMMODEL_MASK;
46589 bool strong;
46590
46591 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
46592 |MEMMODEL_MASK)
46593 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
46594 {
46595 warning (OPT_Winvalid_memory_model,
46596 "Unknown architecture specific memory model");
46597 return MEMMODEL_SEQ_CST;
46598 }
46599 strong = (model == MEMMODEL_ACQ_REL || model == MEMMODEL_SEQ_CST);
46600 if (val & IX86_HLE_ACQUIRE && !(model == MEMMODEL_ACQUIRE || strong))
46601 {
46602 warning (OPT_Winvalid_memory_model,
46603 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
46604 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
46605 }
46606 if (val & IX86_HLE_RELEASE && !(model == MEMMODEL_RELEASE || strong))
46607 {
46608 warning (OPT_Winvalid_memory_model,
46609 "HLE_RELEASE not used with RELEASE or stronger memory model");
46610 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
46611 }
46612 return val;
46613 }
46614
46615 /* Set CLONEI->vecsize_mangle, CLONEI->vecsize_int,
46616 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
46617 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
46618 or number of vecsize_mangle variants that should be emitted. */
46619
46620 static int
46621 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
46622 struct cgraph_simd_clone *clonei,
46623 tree base_type, int num)
46624 {
46625 int ret = 1;
46626
46627 if (clonei->simdlen
46628 && (clonei->simdlen < 2
46629 || clonei->simdlen > 16
46630 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
46631 {
46632 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
46633 "unsupported simdlen %d", clonei->simdlen);
46634 return 0;
46635 }
46636
46637 tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
46638 if (TREE_CODE (ret_type) != VOID_TYPE)
46639 switch (TYPE_MODE (ret_type))
46640 {
46641 case QImode:
46642 case HImode:
46643 case SImode:
46644 case DImode:
46645 case SFmode:
46646 case DFmode:
46647 /* case SCmode: */
46648 /* case DCmode: */
46649 break;
46650 default:
46651 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
46652 "unsupported return type %qT for simd\n", ret_type);
46653 return 0;
46654 }
46655
46656 tree t;
46657 int i;
46658
46659 for (t = DECL_ARGUMENTS (node->decl), i = 0; t; t = DECL_CHAIN (t), i++)
46660 /* FIXME: Shouldn't we allow such arguments if they are uniform? */
46661 switch (TYPE_MODE (TREE_TYPE (t)))
46662 {
46663 case QImode:
46664 case HImode:
46665 case SImode:
46666 case DImode:
46667 case SFmode:
46668 case DFmode:
46669 /* case SCmode: */
46670 /* case DCmode: */
46671 break;
46672 default:
46673 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
46674 "unsupported argument type %qT for simd\n", TREE_TYPE (t));
46675 return 0;
46676 }
46677
46678 if (clonei->cilk_elemental)
46679 {
46680 /* Parse here processor clause. If not present, default to 'b'. */
46681 clonei->vecsize_mangle = 'b';
46682 }
46683 else if (!TREE_PUBLIC (node->decl))
46684 {
46685 /* If the function isn't exported, we can pick up just one ISA
46686 for the clones. */
46687 if (TARGET_AVX2)
46688 clonei->vecsize_mangle = 'd';
46689 else if (TARGET_AVX)
46690 clonei->vecsize_mangle = 'c';
46691 else
46692 clonei->vecsize_mangle = 'b';
46693 ret = 1;
46694 }
46695 else
46696 {
46697 clonei->vecsize_mangle = "bcd"[num];
46698 ret = 3;
46699 }
46700 switch (clonei->vecsize_mangle)
46701 {
46702 case 'b':
46703 clonei->vecsize_int = 128;
46704 clonei->vecsize_float = 128;
46705 break;
46706 case 'c':
46707 clonei->vecsize_int = 128;
46708 clonei->vecsize_float = 256;
46709 break;
46710 case 'd':
46711 clonei->vecsize_int = 256;
46712 clonei->vecsize_float = 256;
46713 break;
46714 }
46715 if (clonei->simdlen == 0)
46716 {
46717 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
46718 clonei->simdlen = clonei->vecsize_int;
46719 else
46720 clonei->simdlen = clonei->vecsize_float;
46721 clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
46722 if (clonei->simdlen > 16)
46723 clonei->simdlen = 16;
46724 }
46725 return ret;
46726 }
46727
46728 /* Add target attribute to SIMD clone NODE if needed. */
46729
46730 static void
46731 ix86_simd_clone_adjust (struct cgraph_node *node)
46732 {
46733 const char *str = NULL;
46734 gcc_assert (node->decl == cfun->decl);
46735 switch (node->simdclone->vecsize_mangle)
46736 {
46737 case 'b':
46738 if (!TARGET_SSE2)
46739 str = "sse2";
46740 break;
46741 case 'c':
46742 if (!TARGET_AVX)
46743 str = "avx";
46744 break;
46745 case 'd':
46746 if (!TARGET_AVX2)
46747 str = "avx2";
46748 break;
46749 default:
46750 gcc_unreachable ();
46751 }
46752 if (str == NULL)
46753 return;
46754 push_cfun (NULL);
46755 tree args = build_tree_list (NULL_TREE, build_string (strlen (str), str));
46756 bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0);
46757 gcc_assert (ok);
46758 pop_cfun ();
46759 ix86_previous_fndecl = NULL_TREE;
46760 ix86_set_current_function (node->decl);
46761 }
46762
46763 /* If SIMD clone NODE can't be used in a vectorized loop
46764 in current function, return -1, otherwise return a badness of using it
46765 (0 if it is most desirable from vecsize_mangle point of view, 1
46766 slightly less desirable, etc.). */
46767
46768 static int
46769 ix86_simd_clone_usable (struct cgraph_node *node)
46770 {
46771 switch (node->simdclone->vecsize_mangle)
46772 {
46773 case 'b':
46774 if (!TARGET_SSE2)
46775 return -1;
46776 if (!TARGET_AVX)
46777 return 0;
46778 return TARGET_AVX2 ? 2 : 1;
46779 case 'c':
46780 if (!TARGET_AVX)
46781 return -1;
46782 return TARGET_AVX2 ? 1 : 0;
46783 break;
46784 case 'd':
46785 if (!TARGET_AVX2)
46786 return -1;
46787 return 0;
46788 default:
46789 gcc_unreachable ();
46790 }
46791 }
46792
46793 /* This function gives out the number of memory references.
46794 This value determines the unrolling factor for
46795 bdver3 and bdver4 architectures. */
46796
46797 static int
46798 ix86_loop_memcount (rtx *x, unsigned *mem_count)
46799 {
46800 if (*x != NULL_RTX && MEM_P (*x))
46801 {
46802 enum machine_mode mode;
46803 unsigned int n_words;
46804
46805 mode = GET_MODE (*x);
46806 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
46807
46808 if (n_words > 4)
46809 (*mem_count)+=2;
46810 else
46811 (*mem_count)+=1;
46812 }
46813 return 0;
46814 }
46815
46816 /* This function adjusts the unroll factor based on
46817 the hardware capabilities. For ex, bdver3 has
46818 a loop buffer which makes unrolling of smaller
46819 loops less important. This function decides the
46820 unroll factor using number of memory references
46821 (value 32 is used) as a heuristic. */
46822
46823 static unsigned
46824 ix86_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
46825 {
46826 basic_block *bbs;
46827 rtx insn;
46828 unsigned i;
46829 unsigned mem_count = 0;
46830
46831 if (!TARGET_ADJUST_UNROLL)
46832 return nunroll;
46833
46834 /* Count the number of memory references within the loop body. */
46835 bbs = get_loop_body (loop);
46836 for (i = 0; i < loop->num_nodes; i++)
46837 {
46838 for (insn = BB_HEAD (bbs[i]); insn != BB_END (bbs[i]); insn = NEXT_INSN (insn))
46839 if (NONDEBUG_INSN_P (insn))
46840 for_each_rtx (&insn, (rtx_function) ix86_loop_memcount, &mem_count);
46841 }
46842 free (bbs);
46843
46844 if (mem_count && mem_count <=32)
46845 return 32/mem_count;
46846
46847 return nunroll;
46848 }
46849
46850
46851 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
46852
46853 static bool
46854 ix86_float_exceptions_rounding_supported_p (void)
46855 {
46856 /* For x87 floating point with standard excess precision handling,
46857 there is no adddf3 pattern (since x87 floating point only has
46858 XFmode operations) so the default hook implementation gets this
46859 wrong. */
46860 return TARGET_80387 || TARGET_SSE_MATH;
46861 }
46862
46863 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
46864
46865 static void
46866 ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
46867 {
46868 if (!TARGET_80387 && !TARGET_SSE_MATH)
46869 return;
46870 tree exceptions_var = create_tmp_var (integer_type_node, NULL);
46871 if (TARGET_80387)
46872 {
46873 tree fenv_index_type = build_index_type (size_int (6));
46874 tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
46875 tree fenv_var = create_tmp_var (fenv_type, NULL);
46876 mark_addressable (fenv_var);
46877 tree fenv_ptr = build_pointer_type (fenv_type);
46878 tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
46879 fenv_addr = fold_convert (ptr_type_node, fenv_addr);
46880 tree fnstenv = ix86_builtins[IX86_BUILTIN_FNSTENV];
46881 tree fldenv = ix86_builtins[IX86_BUILTIN_FLDENV];
46882 tree fnstsw = ix86_builtins[IX86_BUILTIN_FNSTSW];
46883 tree fnclex = ix86_builtins[IX86_BUILTIN_FNCLEX];
46884 tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
46885 tree hold_fnclex = build_call_expr (fnclex, 0);
46886 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_fnstenv,
46887 hold_fnclex);
46888 *clear = build_call_expr (fnclex, 0);
46889 tree sw_var = create_tmp_var (short_unsigned_type_node, NULL);
46890 tree fnstsw_call = build_call_expr (fnstsw, 0);
46891 tree sw_mod = build2 (MODIFY_EXPR, short_unsigned_type_node,
46892 sw_var, fnstsw_call);
46893 tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
46894 tree update_mod = build2 (MODIFY_EXPR, integer_type_node,
46895 exceptions_var, exceptions_x87);
46896 *update = build2 (COMPOUND_EXPR, integer_type_node,
46897 sw_mod, update_mod);
46898 tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
46899 *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
46900 }
46901 if (TARGET_SSE_MATH)
46902 {
46903 tree mxcsr_orig_var = create_tmp_var (unsigned_type_node, NULL);
46904 tree mxcsr_mod_var = create_tmp_var (unsigned_type_node, NULL);
46905 tree stmxcsr = ix86_builtins[IX86_BUILTIN_STMXCSR];
46906 tree ldmxcsr = ix86_builtins[IX86_BUILTIN_LDMXCSR];
46907 tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
46908 tree hold_assign_orig = build2 (MODIFY_EXPR, unsigned_type_node,
46909 mxcsr_orig_var, stmxcsr_hold_call);
46910 tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
46911 mxcsr_orig_var,
46912 build_int_cst (unsigned_type_node, 0x1f80));
46913 hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
46914 build_int_cst (unsigned_type_node, 0xffffffc0));
46915 tree hold_assign_mod = build2 (MODIFY_EXPR, unsigned_type_node,
46916 mxcsr_mod_var, hold_mod_val);
46917 tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
46918 tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
46919 hold_assign_orig, hold_assign_mod);
46920 hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
46921 ldmxcsr_hold_call);
46922 if (*hold)
46923 *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
46924 else
46925 *hold = hold_all;
46926 tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
46927 if (*clear)
46928 *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
46929 ldmxcsr_clear_call);
46930 else
46931 *clear = ldmxcsr_clear_call;
46932 tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
46933 tree exceptions_sse = fold_convert (integer_type_node,
46934 stxmcsr_update_call);
46935 if (*update)
46936 {
46937 tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
46938 exceptions_var, exceptions_sse);
46939 tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
46940 exceptions_var, exceptions_mod);
46941 *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
46942 exceptions_assign);
46943 }
46944 else
46945 *update = build2 (MODIFY_EXPR, integer_type_node,
46946 exceptions_var, exceptions_sse);
46947 tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
46948 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
46949 ldmxcsr_update_call);
46950 }
46951 tree atomic_feraiseexcept
46952 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
46953 tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
46954 1, exceptions_var);
46955 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
46956 atomic_feraiseexcept_call);
46957 }
46958
46959 /* Initialize the GCC target structure. */
46960 #undef TARGET_RETURN_IN_MEMORY
46961 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
46962
46963 #undef TARGET_LEGITIMIZE_ADDRESS
46964 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
46965
46966 #undef TARGET_ATTRIBUTE_TABLE
46967 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
46968 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
46969 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
46970 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
46971 # undef TARGET_MERGE_DECL_ATTRIBUTES
46972 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
46973 #endif
46974
46975 #undef TARGET_COMP_TYPE_ATTRIBUTES
46976 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
46977
46978 #undef TARGET_INIT_BUILTINS
46979 #define TARGET_INIT_BUILTINS ix86_init_builtins
46980 #undef TARGET_BUILTIN_DECL
46981 #define TARGET_BUILTIN_DECL ix86_builtin_decl
46982 #undef TARGET_EXPAND_BUILTIN
46983 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
46984
46985 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
46986 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
46987 ix86_builtin_vectorized_function
46988
46989 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
46990 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
46991
46992 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
46993 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
46994
46995 #undef TARGET_VECTORIZE_BUILTIN_GATHER
46996 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
46997
46998 #undef TARGET_BUILTIN_RECIPROCAL
46999 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
47000
47001 #undef TARGET_ASM_FUNCTION_EPILOGUE
47002 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
47003
47004 #undef TARGET_ENCODE_SECTION_INFO
47005 #ifndef SUBTARGET_ENCODE_SECTION_INFO
47006 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
47007 #else
47008 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
47009 #endif
47010
47011 #undef TARGET_ASM_OPEN_PAREN
47012 #define TARGET_ASM_OPEN_PAREN ""
47013 #undef TARGET_ASM_CLOSE_PAREN
47014 #define TARGET_ASM_CLOSE_PAREN ""
47015
47016 #undef TARGET_ASM_BYTE_OP
47017 #define TARGET_ASM_BYTE_OP ASM_BYTE
47018
47019 #undef TARGET_ASM_ALIGNED_HI_OP
47020 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
47021 #undef TARGET_ASM_ALIGNED_SI_OP
47022 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
47023 #ifdef ASM_QUAD
47024 #undef TARGET_ASM_ALIGNED_DI_OP
47025 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
47026 #endif
47027
47028 #undef TARGET_PROFILE_BEFORE_PROLOGUE
47029 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
47030
47031 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
47032 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
47033
47034 #undef TARGET_ASM_UNALIGNED_HI_OP
47035 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
47036 #undef TARGET_ASM_UNALIGNED_SI_OP
47037 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
47038 #undef TARGET_ASM_UNALIGNED_DI_OP
47039 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
47040
47041 #undef TARGET_PRINT_OPERAND
47042 #define TARGET_PRINT_OPERAND ix86_print_operand
47043 #undef TARGET_PRINT_OPERAND_ADDRESS
47044 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
47045 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
47046 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
47047 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
47048 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
47049
47050 #undef TARGET_SCHED_INIT_GLOBAL
47051 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
47052 #undef TARGET_SCHED_ADJUST_COST
47053 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
47054 #undef TARGET_SCHED_ISSUE_RATE
47055 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
47056 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
47057 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
47058 ia32_multipass_dfa_lookahead
47059 #undef TARGET_SCHED_MACRO_FUSION_P
47060 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
47061 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
47062 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
47063
47064 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
47065 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
47066
47067 #undef TARGET_MEMMODEL_CHECK
47068 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
47069
47070 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
47071 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
47072
47073 #ifdef HAVE_AS_TLS
47074 #undef TARGET_HAVE_TLS
47075 #define TARGET_HAVE_TLS true
47076 #endif
47077 #undef TARGET_CANNOT_FORCE_CONST_MEM
47078 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
47079 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
47080 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
47081
47082 #undef TARGET_DELEGITIMIZE_ADDRESS
47083 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
47084
47085 #undef TARGET_MS_BITFIELD_LAYOUT_P
47086 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
47087
47088 #if TARGET_MACHO
47089 #undef TARGET_BINDS_LOCAL_P
47090 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
47091 #endif
47092 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
47093 #undef TARGET_BINDS_LOCAL_P
47094 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
47095 #endif
47096
47097 #undef TARGET_ASM_OUTPUT_MI_THUNK
47098 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
47099 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
47100 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
47101
47102 #undef TARGET_ASM_FILE_START
47103 #define TARGET_ASM_FILE_START x86_file_start
47104
47105 #undef TARGET_OPTION_OVERRIDE
47106 #define TARGET_OPTION_OVERRIDE ix86_option_override
47107
47108 #undef TARGET_REGISTER_MOVE_COST
47109 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
47110 #undef TARGET_MEMORY_MOVE_COST
47111 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
47112 #undef TARGET_RTX_COSTS
47113 #define TARGET_RTX_COSTS ix86_rtx_costs
47114 #undef TARGET_ADDRESS_COST
47115 #define TARGET_ADDRESS_COST ix86_address_cost
47116
47117 #undef TARGET_FIXED_CONDITION_CODE_REGS
47118 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
47119 #undef TARGET_CC_MODES_COMPATIBLE
47120 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
47121
47122 #undef TARGET_MACHINE_DEPENDENT_REORG
47123 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
47124
47125 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
47126 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
47127
47128 #undef TARGET_BUILD_BUILTIN_VA_LIST
47129 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
47130
47131 #undef TARGET_FOLD_BUILTIN
47132 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
47133
47134 #undef TARGET_COMPARE_VERSION_PRIORITY
47135 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
47136
47137 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
47138 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
47139 ix86_generate_version_dispatcher_body
47140
47141 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
47142 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
47143 ix86_get_function_versions_dispatcher
47144
47145 #undef TARGET_ENUM_VA_LIST_P
47146 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
47147
47148 #undef TARGET_FN_ABI_VA_LIST
47149 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
47150
47151 #undef TARGET_CANONICAL_VA_LIST_TYPE
47152 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
47153
47154 #undef TARGET_EXPAND_BUILTIN_VA_START
47155 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
47156
47157 #undef TARGET_MD_ASM_CLOBBERS
47158 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
47159
47160 #undef TARGET_PROMOTE_PROTOTYPES
47161 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
47162 #undef TARGET_SETUP_INCOMING_VARARGS
47163 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
47164 #undef TARGET_MUST_PASS_IN_STACK
47165 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
47166 #undef TARGET_FUNCTION_ARG_ADVANCE
47167 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
47168 #undef TARGET_FUNCTION_ARG
47169 #define TARGET_FUNCTION_ARG ix86_function_arg
47170 #undef TARGET_FUNCTION_ARG_BOUNDARY
47171 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
47172 #undef TARGET_PASS_BY_REFERENCE
47173 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
47174 #undef TARGET_INTERNAL_ARG_POINTER
47175 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
47176 #undef TARGET_UPDATE_STACK_BOUNDARY
47177 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
47178 #undef TARGET_GET_DRAP_RTX
47179 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
47180 #undef TARGET_STRICT_ARGUMENT_NAMING
47181 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
47182 #undef TARGET_STATIC_CHAIN
47183 #define TARGET_STATIC_CHAIN ix86_static_chain
47184 #undef TARGET_TRAMPOLINE_INIT
47185 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
47186 #undef TARGET_RETURN_POPS_ARGS
47187 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
47188
47189 #undef TARGET_LEGITIMATE_COMBINED_INSN
47190 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
47191
47192 #undef TARGET_ASAN_SHADOW_OFFSET
47193 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
47194
47195 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
47196 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
47197
47198 #undef TARGET_SCALAR_MODE_SUPPORTED_P
47199 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
47200
47201 #undef TARGET_VECTOR_MODE_SUPPORTED_P
47202 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
47203
47204 #undef TARGET_C_MODE_FOR_SUFFIX
47205 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
47206
47207 #ifdef HAVE_AS_TLS
47208 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
47209 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
47210 #endif
47211
47212 #ifdef SUBTARGET_INSERT_ATTRIBUTES
47213 #undef TARGET_INSERT_ATTRIBUTES
47214 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
47215 #endif
47216
47217 #undef TARGET_MANGLE_TYPE
47218 #define TARGET_MANGLE_TYPE ix86_mangle_type
47219
47220 #if !TARGET_MACHO
47221 #undef TARGET_STACK_PROTECT_FAIL
47222 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
47223 #endif
47224
47225 #undef TARGET_FUNCTION_VALUE
47226 #define TARGET_FUNCTION_VALUE ix86_function_value
47227
47228 #undef TARGET_FUNCTION_VALUE_REGNO_P
47229 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
47230
47231 #undef TARGET_PROMOTE_FUNCTION_MODE
47232 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
47233
47234 #undef TARGET_MEMBER_TYPE_FORCES_BLK
47235 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
47236
47237 #undef TARGET_INSTANTIATE_DECLS
47238 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
47239
47240 #undef TARGET_SECONDARY_RELOAD
47241 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
47242
47243 #undef TARGET_CLASS_MAX_NREGS
47244 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
47245
47246 #undef TARGET_PREFERRED_RELOAD_CLASS
47247 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
47248 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
47249 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
47250 #undef TARGET_CLASS_LIKELY_SPILLED_P
47251 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
47252
47253 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
47254 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
47255 ix86_builtin_vectorization_cost
47256 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
47257 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
47258 ix86_vectorize_vec_perm_const_ok
47259 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
47260 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
47261 ix86_preferred_simd_mode
47262 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
47263 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
47264 ix86_autovectorize_vector_sizes
47265 #undef TARGET_VECTORIZE_INIT_COST
47266 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
47267 #undef TARGET_VECTORIZE_ADD_STMT_COST
47268 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
47269 #undef TARGET_VECTORIZE_FINISH_COST
47270 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
47271 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
47272 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
47273
47274 #undef TARGET_SET_CURRENT_FUNCTION
47275 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
47276
47277 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
47278 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
47279
47280 #undef TARGET_OPTION_SAVE
47281 #define TARGET_OPTION_SAVE ix86_function_specific_save
47282
47283 #undef TARGET_OPTION_RESTORE
47284 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
47285
47286 #undef TARGET_OPTION_PRINT
47287 #define TARGET_OPTION_PRINT ix86_function_specific_print
47288
47289 #undef TARGET_OPTION_FUNCTION_VERSIONS
47290 #define TARGET_OPTION_FUNCTION_VERSIONS ix86_function_versions
47291
47292 #undef TARGET_CAN_INLINE_P
47293 #define TARGET_CAN_INLINE_P ix86_can_inline_p
47294
47295 #undef TARGET_EXPAND_TO_RTL_HOOK
47296 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
47297
47298 #undef TARGET_LEGITIMATE_ADDRESS_P
47299 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
47300
47301 #undef TARGET_LRA_P
47302 #define TARGET_LRA_P hook_bool_void_true
47303
47304 #undef TARGET_REGISTER_PRIORITY
47305 #define TARGET_REGISTER_PRIORITY ix86_register_priority
47306
47307 #undef TARGET_REGISTER_USAGE_LEVELING_P
47308 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
47309
47310 #undef TARGET_LEGITIMATE_CONSTANT_P
47311 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
47312
47313 #undef TARGET_FRAME_POINTER_REQUIRED
47314 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
47315
47316 #undef TARGET_CAN_ELIMINATE
47317 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
47318
47319 #undef TARGET_EXTRA_LIVE_ON_ENTRY
47320 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
47321
47322 #undef TARGET_ASM_CODE_END
47323 #define TARGET_ASM_CODE_END ix86_code_end
47324
47325 #undef TARGET_CONDITIONAL_REGISTER_USAGE
47326 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
47327
47328 #if TARGET_MACHO
47329 #undef TARGET_INIT_LIBFUNCS
47330 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
47331 #endif
47332
47333 #undef TARGET_LOOP_UNROLL_ADJUST
47334 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
47335
47336 #undef TARGET_SPILL_CLASS
47337 #define TARGET_SPILL_CLASS ix86_spill_class
47338
47339 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
47340 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
47341 ix86_simd_clone_compute_vecsize_and_simdlen
47342
47343 #undef TARGET_SIMD_CLONE_ADJUST
47344 #define TARGET_SIMD_CLONE_ADJUST \
47345 ix86_simd_clone_adjust
47346
47347 #undef TARGET_SIMD_CLONE_USABLE
47348 #define TARGET_SIMD_CLONE_USABLE \
47349 ix86_simd_clone_usable
47350
47351 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
47352 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
47353 ix86_float_exceptions_rounding_supported_p
47354
47355 #undef TARGET_MODE_EMIT
47356 #define TARGET_MODE_EMIT ix86_emit_mode_set
47357
47358 #undef TARGET_MODE_NEEDED
47359 #define TARGET_MODE_NEEDED ix86_mode_needed
47360
47361 #undef TARGET_MODE_AFTER
47362 #define TARGET_MODE_AFTER ix86_mode_after
47363
47364 #undef TARGET_MODE_ENTRY
47365 #define TARGET_MODE_ENTRY ix86_mode_entry
47366
47367 #undef TARGET_MODE_EXIT
47368 #define TARGET_MODE_EXIT ix86_mode_exit
47369
47370 #undef TARGET_MODE_PRIORITY
47371 #define TARGET_MODE_PRIORITY ix86_mode_priority
47372
47373 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
47374 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
47375
47376 struct gcc_target targetm = TARGET_INITIALIZER;
47377 \f
47378 #include "gt-i386.h"