8433fad65aee890fad680dca5cd0e420a096a73a
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2014 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
10
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "stringpool.h"
27 #include "attribs.h"
28 #include "calls.h"
29 #include "stor-layout.h"
30 #include "varasm.h"
31 #include "tm_p.h"
32 #include "regs.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
36 #include "output.h"
37 #include "insn-codes.h"
38 #include "insn-attr.h"
39 #include "flags.h"
40 #include "except.h"
41 #include "function.h"
42 #include "recog.h"
43 #include "expr.h"
44 #include "optabs.h"
45 #include "diagnostic-core.h"
46 #include "toplev.h"
47 #include "basic-block.h"
48 #include "ggc.h"
49 #include "target.h"
50 #include "target-def.h"
51 #include "common/common-target.h"
52 #include "langhooks.h"
53 #include "reload.h"
54 #include "cgraph.h"
55 #include "pointer-set.h"
56 #include "hash-table.h"
57 #include "vec.h"
58 #include "basic-block.h"
59 #include "tree-ssa-alias.h"
60 #include "internal-fn.h"
61 #include "gimple-fold.h"
62 #include "tree-eh.h"
63 #include "gimple-expr.h"
64 #include "is-a.h"
65 #include "gimple.h"
66 #include "gimplify.h"
67 #include "cfgloop.h"
68 #include "dwarf2.h"
69 #include "df.h"
70 #include "tm-constrs.h"
71 #include "params.h"
72 #include "cselib.h"
73 #include "debug.h"
74 #include "sched-int.h"
75 #include "sbitmap.h"
76 #include "fibheap.h"
77 #include "opts.h"
78 #include "diagnostic.h"
79 #include "dumpfile.h"
80 #include "tree-pass.h"
81 #include "context.h"
82 #include "pass_manager.h"
83 #include "target-globals.h"
84
85 static rtx legitimize_dllimport_symbol (rtx, bool);
86 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
87 static rtx legitimize_pe_coff_symbol (rtx, bool);
88
89 #ifndef CHECK_STACK_LIMIT
90 #define CHECK_STACK_LIMIT (-1)
91 #endif
92
93 /* Return index of given mode in mult and division cost tables. */
94 #define MODE_INDEX(mode) \
95 ((mode) == QImode ? 0 \
96 : (mode) == HImode ? 1 \
97 : (mode) == SImode ? 2 \
98 : (mode) == DImode ? 3 \
99 : 4)
100
101 /* Processor costs (relative to an add) */
102 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
103 #define COSTS_N_BYTES(N) ((N) * 2)
104
105 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
106
107 static stringop_algs ix86_size_memcpy[2] = {
108 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
109 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
110 static stringop_algs ix86_size_memset[2] = {
111 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
112 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
113
114 const
115 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
116 COSTS_N_BYTES (2), /* cost of an add instruction */
117 COSTS_N_BYTES (3), /* cost of a lea instruction */
118 COSTS_N_BYTES (2), /* variable shift costs */
119 COSTS_N_BYTES (3), /* constant shift costs */
120 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
121 COSTS_N_BYTES (3), /* HI */
122 COSTS_N_BYTES (3), /* SI */
123 COSTS_N_BYTES (3), /* DI */
124 COSTS_N_BYTES (5)}, /* other */
125 0, /* cost of multiply per each bit set */
126 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
127 COSTS_N_BYTES (3), /* HI */
128 COSTS_N_BYTES (3), /* SI */
129 COSTS_N_BYTES (3), /* DI */
130 COSTS_N_BYTES (5)}, /* other */
131 COSTS_N_BYTES (3), /* cost of movsx */
132 COSTS_N_BYTES (3), /* cost of movzx */
133 0, /* "large" insn */
134 2, /* MOVE_RATIO */
135 2, /* cost for loading QImode using movzbl */
136 {2, 2, 2}, /* cost of loading integer registers
137 in QImode, HImode and SImode.
138 Relative to reg-reg move (2). */
139 {2, 2, 2}, /* cost of storing integer registers */
140 2, /* cost of reg,reg fld/fst */
141 {2, 2, 2}, /* cost of loading fp registers
142 in SFmode, DFmode and XFmode */
143 {2, 2, 2}, /* cost of storing fp registers
144 in SFmode, DFmode and XFmode */
145 3, /* cost of moving MMX register */
146 {3, 3}, /* cost of loading MMX registers
147 in SImode and DImode */
148 {3, 3}, /* cost of storing MMX registers
149 in SImode and DImode */
150 3, /* cost of moving SSE register */
151 {3, 3, 3}, /* cost of loading SSE registers
152 in SImode, DImode and TImode */
153 {3, 3, 3}, /* cost of storing SSE registers
154 in SImode, DImode and TImode */
155 3, /* MMX or SSE register to integer */
156 0, /* size of l1 cache */
157 0, /* size of l2 cache */
158 0, /* size of prefetch block */
159 0, /* number of parallel prefetches */
160 2, /* Branch cost */
161 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
162 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
163 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
164 COSTS_N_BYTES (2), /* cost of FABS instruction. */
165 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
166 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
167 ix86_size_memcpy,
168 ix86_size_memset,
169 1, /* scalar_stmt_cost. */
170 1, /* scalar load_cost. */
171 1, /* scalar_store_cost. */
172 1, /* vec_stmt_cost. */
173 1, /* vec_to_scalar_cost. */
174 1, /* scalar_to_vec_cost. */
175 1, /* vec_align_load_cost. */
176 1, /* vec_unalign_load_cost. */
177 1, /* vec_store_cost. */
178 1, /* cond_taken_branch_cost. */
179 1, /* cond_not_taken_branch_cost. */
180 };
181
182 /* Processor costs (relative to an add) */
183 static stringop_algs i386_memcpy[2] = {
184 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
185 DUMMY_STRINGOP_ALGS};
186 static stringop_algs i386_memset[2] = {
187 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
188 DUMMY_STRINGOP_ALGS};
189
190 static const
191 struct processor_costs i386_cost = { /* 386 specific costs */
192 COSTS_N_INSNS (1), /* cost of an add instruction */
193 COSTS_N_INSNS (1), /* cost of a lea instruction */
194 COSTS_N_INSNS (3), /* variable shift costs */
195 COSTS_N_INSNS (2), /* constant shift costs */
196 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
197 COSTS_N_INSNS (6), /* HI */
198 COSTS_N_INSNS (6), /* SI */
199 COSTS_N_INSNS (6), /* DI */
200 COSTS_N_INSNS (6)}, /* other */
201 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
202 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
203 COSTS_N_INSNS (23), /* HI */
204 COSTS_N_INSNS (23), /* SI */
205 COSTS_N_INSNS (23), /* DI */
206 COSTS_N_INSNS (23)}, /* other */
207 COSTS_N_INSNS (3), /* cost of movsx */
208 COSTS_N_INSNS (2), /* cost of movzx */
209 15, /* "large" insn */
210 3, /* MOVE_RATIO */
211 4, /* cost for loading QImode using movzbl */
212 {2, 4, 2}, /* cost of loading integer registers
213 in QImode, HImode and SImode.
214 Relative to reg-reg move (2). */
215 {2, 4, 2}, /* cost of storing integer registers */
216 2, /* cost of reg,reg fld/fst */
217 {8, 8, 8}, /* cost of loading fp registers
218 in SFmode, DFmode and XFmode */
219 {8, 8, 8}, /* cost of storing fp registers
220 in SFmode, DFmode and XFmode */
221 2, /* cost of moving MMX register */
222 {4, 8}, /* cost of loading MMX registers
223 in SImode and DImode */
224 {4, 8}, /* cost of storing MMX registers
225 in SImode and DImode */
226 2, /* cost of moving SSE register */
227 {4, 8, 16}, /* cost of loading SSE registers
228 in SImode, DImode and TImode */
229 {4, 8, 16}, /* cost of storing SSE registers
230 in SImode, DImode and TImode */
231 3, /* MMX or SSE register to integer */
232 0, /* size of l1 cache */
233 0, /* size of l2 cache */
234 0, /* size of prefetch block */
235 0, /* number of parallel prefetches */
236 1, /* Branch cost */
237 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
238 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
239 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
240 COSTS_N_INSNS (22), /* cost of FABS instruction. */
241 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
242 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
243 i386_memcpy,
244 i386_memset,
245 1, /* scalar_stmt_cost. */
246 1, /* scalar load_cost. */
247 1, /* scalar_store_cost. */
248 1, /* vec_stmt_cost. */
249 1, /* vec_to_scalar_cost. */
250 1, /* scalar_to_vec_cost. */
251 1, /* vec_align_load_cost. */
252 2, /* vec_unalign_load_cost. */
253 1, /* vec_store_cost. */
254 3, /* cond_taken_branch_cost. */
255 1, /* cond_not_taken_branch_cost. */
256 };
257
258 static stringop_algs i486_memcpy[2] = {
259 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
260 DUMMY_STRINGOP_ALGS};
261 static stringop_algs i486_memset[2] = {
262 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
263 DUMMY_STRINGOP_ALGS};
264
265 static const
266 struct processor_costs i486_cost = { /* 486 specific costs */
267 COSTS_N_INSNS (1), /* cost of an add instruction */
268 COSTS_N_INSNS (1), /* cost of a lea instruction */
269 COSTS_N_INSNS (3), /* variable shift costs */
270 COSTS_N_INSNS (2), /* constant shift costs */
271 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
272 COSTS_N_INSNS (12), /* HI */
273 COSTS_N_INSNS (12), /* SI */
274 COSTS_N_INSNS (12), /* DI */
275 COSTS_N_INSNS (12)}, /* other */
276 1, /* cost of multiply per each bit set */
277 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
278 COSTS_N_INSNS (40), /* HI */
279 COSTS_N_INSNS (40), /* SI */
280 COSTS_N_INSNS (40), /* DI */
281 COSTS_N_INSNS (40)}, /* other */
282 COSTS_N_INSNS (3), /* cost of movsx */
283 COSTS_N_INSNS (2), /* cost of movzx */
284 15, /* "large" insn */
285 3, /* MOVE_RATIO */
286 4, /* cost for loading QImode using movzbl */
287 {2, 4, 2}, /* cost of loading integer registers
288 in QImode, HImode and SImode.
289 Relative to reg-reg move (2). */
290 {2, 4, 2}, /* cost of storing integer registers */
291 2, /* cost of reg,reg fld/fst */
292 {8, 8, 8}, /* cost of loading fp registers
293 in SFmode, DFmode and XFmode */
294 {8, 8, 8}, /* cost of storing fp registers
295 in SFmode, DFmode and XFmode */
296 2, /* cost of moving MMX register */
297 {4, 8}, /* cost of loading MMX registers
298 in SImode and DImode */
299 {4, 8}, /* cost of storing MMX registers
300 in SImode and DImode */
301 2, /* cost of moving SSE register */
302 {4, 8, 16}, /* cost of loading SSE registers
303 in SImode, DImode and TImode */
304 {4, 8, 16}, /* cost of storing SSE registers
305 in SImode, DImode and TImode */
306 3, /* MMX or SSE register to integer */
307 4, /* size of l1 cache. 486 has 8kB cache
308 shared for code and data, so 4kB is
309 not really precise. */
310 4, /* size of l2 cache */
311 0, /* size of prefetch block */
312 0, /* number of parallel prefetches */
313 1, /* Branch cost */
314 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
315 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
316 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
317 COSTS_N_INSNS (3), /* cost of FABS instruction. */
318 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
319 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
320 i486_memcpy,
321 i486_memset,
322 1, /* scalar_stmt_cost. */
323 1, /* scalar load_cost. */
324 1, /* scalar_store_cost. */
325 1, /* vec_stmt_cost. */
326 1, /* vec_to_scalar_cost. */
327 1, /* scalar_to_vec_cost. */
328 1, /* vec_align_load_cost. */
329 2, /* vec_unalign_load_cost. */
330 1, /* vec_store_cost. */
331 3, /* cond_taken_branch_cost. */
332 1, /* cond_not_taken_branch_cost. */
333 };
334
335 static stringop_algs pentium_memcpy[2] = {
336 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
337 DUMMY_STRINGOP_ALGS};
338 static stringop_algs pentium_memset[2] = {
339 {libcall, {{-1, rep_prefix_4_byte, false}}},
340 DUMMY_STRINGOP_ALGS};
341
342 static const
343 struct processor_costs pentium_cost = {
344 COSTS_N_INSNS (1), /* cost of an add instruction */
345 COSTS_N_INSNS (1), /* cost of a lea instruction */
346 COSTS_N_INSNS (4), /* variable shift costs */
347 COSTS_N_INSNS (1), /* constant shift costs */
348 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
349 COSTS_N_INSNS (11), /* HI */
350 COSTS_N_INSNS (11), /* SI */
351 COSTS_N_INSNS (11), /* DI */
352 COSTS_N_INSNS (11)}, /* other */
353 0, /* cost of multiply per each bit set */
354 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
355 COSTS_N_INSNS (25), /* HI */
356 COSTS_N_INSNS (25), /* SI */
357 COSTS_N_INSNS (25), /* DI */
358 COSTS_N_INSNS (25)}, /* other */
359 COSTS_N_INSNS (3), /* cost of movsx */
360 COSTS_N_INSNS (2), /* cost of movzx */
361 8, /* "large" insn */
362 6, /* MOVE_RATIO */
363 6, /* cost for loading QImode using movzbl */
364 {2, 4, 2}, /* cost of loading integer registers
365 in QImode, HImode and SImode.
366 Relative to reg-reg move (2). */
367 {2, 4, 2}, /* cost of storing integer registers */
368 2, /* cost of reg,reg fld/fst */
369 {2, 2, 6}, /* cost of loading fp registers
370 in SFmode, DFmode and XFmode */
371 {4, 4, 6}, /* cost of storing fp registers
372 in SFmode, DFmode and XFmode */
373 8, /* cost of moving MMX register */
374 {8, 8}, /* cost of loading MMX registers
375 in SImode and DImode */
376 {8, 8}, /* cost of storing MMX registers
377 in SImode and DImode */
378 2, /* cost of moving SSE register */
379 {4, 8, 16}, /* cost of loading SSE registers
380 in SImode, DImode and TImode */
381 {4, 8, 16}, /* cost of storing SSE registers
382 in SImode, DImode and TImode */
383 3, /* MMX or SSE register to integer */
384 8, /* size of l1 cache. */
385 8, /* size of l2 cache */
386 0, /* size of prefetch block */
387 0, /* number of parallel prefetches */
388 2, /* Branch cost */
389 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
390 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
391 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
392 COSTS_N_INSNS (1), /* cost of FABS instruction. */
393 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
394 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
395 pentium_memcpy,
396 pentium_memset,
397 1, /* scalar_stmt_cost. */
398 1, /* scalar load_cost. */
399 1, /* scalar_store_cost. */
400 1, /* vec_stmt_cost. */
401 1, /* vec_to_scalar_cost. */
402 1, /* scalar_to_vec_cost. */
403 1, /* vec_align_load_cost. */
404 2, /* vec_unalign_load_cost. */
405 1, /* vec_store_cost. */
406 3, /* cond_taken_branch_cost. */
407 1, /* cond_not_taken_branch_cost. */
408 };
409
410 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
411 (we ensure the alignment). For small blocks inline loop is still a
412 noticeable win, for bigger blocks either rep movsl or rep movsb is
413 way to go. Rep movsb has apparently more expensive startup time in CPU,
414 but after 4K the difference is down in the noise. */
415 static stringop_algs pentiumpro_memcpy[2] = {
416 {rep_prefix_4_byte, {{128, loop, false}, {1024, unrolled_loop, false},
417 {8192, rep_prefix_4_byte, false},
418 {-1, rep_prefix_1_byte, false}}},
419 DUMMY_STRINGOP_ALGS};
420 static stringop_algs pentiumpro_memset[2] = {
421 {rep_prefix_4_byte, {{1024, unrolled_loop, false},
422 {8192, rep_prefix_4_byte, false},
423 {-1, libcall, false}}},
424 DUMMY_STRINGOP_ALGS};
425 static const
426 struct processor_costs pentiumpro_cost = {
427 COSTS_N_INSNS (1), /* cost of an add instruction */
428 COSTS_N_INSNS (1), /* cost of a lea instruction */
429 COSTS_N_INSNS (1), /* variable shift costs */
430 COSTS_N_INSNS (1), /* constant shift costs */
431 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
432 COSTS_N_INSNS (4), /* HI */
433 COSTS_N_INSNS (4), /* SI */
434 COSTS_N_INSNS (4), /* DI */
435 COSTS_N_INSNS (4)}, /* other */
436 0, /* cost of multiply per each bit set */
437 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
438 COSTS_N_INSNS (17), /* HI */
439 COSTS_N_INSNS (17), /* SI */
440 COSTS_N_INSNS (17), /* DI */
441 COSTS_N_INSNS (17)}, /* other */
442 COSTS_N_INSNS (1), /* cost of movsx */
443 COSTS_N_INSNS (1), /* cost of movzx */
444 8, /* "large" insn */
445 6, /* MOVE_RATIO */
446 2, /* cost for loading QImode using movzbl */
447 {4, 4, 4}, /* cost of loading integer registers
448 in QImode, HImode and SImode.
449 Relative to reg-reg move (2). */
450 {2, 2, 2}, /* cost of storing integer registers */
451 2, /* cost of reg,reg fld/fst */
452 {2, 2, 6}, /* cost of loading fp registers
453 in SFmode, DFmode and XFmode */
454 {4, 4, 6}, /* cost of storing fp registers
455 in SFmode, DFmode and XFmode */
456 2, /* cost of moving MMX register */
457 {2, 2}, /* cost of loading MMX registers
458 in SImode and DImode */
459 {2, 2}, /* cost of storing MMX registers
460 in SImode and DImode */
461 2, /* cost of moving SSE register */
462 {2, 2, 8}, /* cost of loading SSE registers
463 in SImode, DImode and TImode */
464 {2, 2, 8}, /* cost of storing SSE registers
465 in SImode, DImode and TImode */
466 3, /* MMX or SSE register to integer */
467 8, /* size of l1 cache. */
468 256, /* size of l2 cache */
469 32, /* size of prefetch block */
470 6, /* number of parallel prefetches */
471 2, /* Branch cost */
472 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
473 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
474 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
475 COSTS_N_INSNS (2), /* cost of FABS instruction. */
476 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
477 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
478 pentiumpro_memcpy,
479 pentiumpro_memset,
480 1, /* scalar_stmt_cost. */
481 1, /* scalar load_cost. */
482 1, /* scalar_store_cost. */
483 1, /* vec_stmt_cost. */
484 1, /* vec_to_scalar_cost. */
485 1, /* scalar_to_vec_cost. */
486 1, /* vec_align_load_cost. */
487 2, /* vec_unalign_load_cost. */
488 1, /* vec_store_cost. */
489 3, /* cond_taken_branch_cost. */
490 1, /* cond_not_taken_branch_cost. */
491 };
492
493 static stringop_algs geode_memcpy[2] = {
494 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
495 DUMMY_STRINGOP_ALGS};
496 static stringop_algs geode_memset[2] = {
497 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
498 DUMMY_STRINGOP_ALGS};
499 static const
500 struct processor_costs geode_cost = {
501 COSTS_N_INSNS (1), /* cost of an add instruction */
502 COSTS_N_INSNS (1), /* cost of a lea instruction */
503 COSTS_N_INSNS (2), /* variable shift costs */
504 COSTS_N_INSNS (1), /* constant shift costs */
505 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
506 COSTS_N_INSNS (4), /* HI */
507 COSTS_N_INSNS (7), /* SI */
508 COSTS_N_INSNS (7), /* DI */
509 COSTS_N_INSNS (7)}, /* other */
510 0, /* cost of multiply per each bit set */
511 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
512 COSTS_N_INSNS (23), /* HI */
513 COSTS_N_INSNS (39), /* SI */
514 COSTS_N_INSNS (39), /* DI */
515 COSTS_N_INSNS (39)}, /* other */
516 COSTS_N_INSNS (1), /* cost of movsx */
517 COSTS_N_INSNS (1), /* cost of movzx */
518 8, /* "large" insn */
519 4, /* MOVE_RATIO */
520 1, /* cost for loading QImode using movzbl */
521 {1, 1, 1}, /* cost of loading integer registers
522 in QImode, HImode and SImode.
523 Relative to reg-reg move (2). */
524 {1, 1, 1}, /* cost of storing integer registers */
525 1, /* cost of reg,reg fld/fst */
526 {1, 1, 1}, /* cost of loading fp registers
527 in SFmode, DFmode and XFmode */
528 {4, 6, 6}, /* cost of storing fp registers
529 in SFmode, DFmode and XFmode */
530
531 1, /* cost of moving MMX register */
532 {1, 1}, /* cost of loading MMX registers
533 in SImode and DImode */
534 {1, 1}, /* cost of storing MMX registers
535 in SImode and DImode */
536 1, /* cost of moving SSE register */
537 {1, 1, 1}, /* cost of loading SSE registers
538 in SImode, DImode and TImode */
539 {1, 1, 1}, /* cost of storing SSE registers
540 in SImode, DImode and TImode */
541 1, /* MMX or SSE register to integer */
542 64, /* size of l1 cache. */
543 128, /* size of l2 cache. */
544 32, /* size of prefetch block */
545 1, /* number of parallel prefetches */
546 1, /* Branch cost */
547 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
548 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
549 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
550 COSTS_N_INSNS (1), /* cost of FABS instruction. */
551 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
552 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
553 geode_memcpy,
554 geode_memset,
555 1, /* scalar_stmt_cost. */
556 1, /* scalar load_cost. */
557 1, /* scalar_store_cost. */
558 1, /* vec_stmt_cost. */
559 1, /* vec_to_scalar_cost. */
560 1, /* scalar_to_vec_cost. */
561 1, /* vec_align_load_cost. */
562 2, /* vec_unalign_load_cost. */
563 1, /* vec_store_cost. */
564 3, /* cond_taken_branch_cost. */
565 1, /* cond_not_taken_branch_cost. */
566 };
567
568 static stringop_algs k6_memcpy[2] = {
569 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
570 DUMMY_STRINGOP_ALGS};
571 static stringop_algs k6_memset[2] = {
572 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
573 DUMMY_STRINGOP_ALGS};
574 static const
575 struct processor_costs k6_cost = {
576 COSTS_N_INSNS (1), /* cost of an add instruction */
577 COSTS_N_INSNS (2), /* cost of a lea instruction */
578 COSTS_N_INSNS (1), /* variable shift costs */
579 COSTS_N_INSNS (1), /* constant shift costs */
580 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
581 COSTS_N_INSNS (3), /* HI */
582 COSTS_N_INSNS (3), /* SI */
583 COSTS_N_INSNS (3), /* DI */
584 COSTS_N_INSNS (3)}, /* other */
585 0, /* cost of multiply per each bit set */
586 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
587 COSTS_N_INSNS (18), /* HI */
588 COSTS_N_INSNS (18), /* SI */
589 COSTS_N_INSNS (18), /* DI */
590 COSTS_N_INSNS (18)}, /* other */
591 COSTS_N_INSNS (2), /* cost of movsx */
592 COSTS_N_INSNS (2), /* cost of movzx */
593 8, /* "large" insn */
594 4, /* MOVE_RATIO */
595 3, /* cost for loading QImode using movzbl */
596 {4, 5, 4}, /* cost of loading integer registers
597 in QImode, HImode and SImode.
598 Relative to reg-reg move (2). */
599 {2, 3, 2}, /* cost of storing integer registers */
600 4, /* cost of reg,reg fld/fst */
601 {6, 6, 6}, /* cost of loading fp registers
602 in SFmode, DFmode and XFmode */
603 {4, 4, 4}, /* cost of storing fp registers
604 in SFmode, DFmode and XFmode */
605 2, /* cost of moving MMX register */
606 {2, 2}, /* cost of loading MMX registers
607 in SImode and DImode */
608 {2, 2}, /* cost of storing MMX registers
609 in SImode and DImode */
610 2, /* cost of moving SSE register */
611 {2, 2, 8}, /* cost of loading SSE registers
612 in SImode, DImode and TImode */
613 {2, 2, 8}, /* cost of storing SSE registers
614 in SImode, DImode and TImode */
615 6, /* MMX or SSE register to integer */
616 32, /* size of l1 cache. */
617 32, /* size of l2 cache. Some models
618 have integrated l2 cache, but
619 optimizing for k6 is not important
620 enough to worry about that. */
621 32, /* size of prefetch block */
622 1, /* number of parallel prefetches */
623 1, /* Branch cost */
624 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
625 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
626 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
627 COSTS_N_INSNS (2), /* cost of FABS instruction. */
628 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
629 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
630 k6_memcpy,
631 k6_memset,
632 1, /* scalar_stmt_cost. */
633 1, /* scalar load_cost. */
634 1, /* scalar_store_cost. */
635 1, /* vec_stmt_cost. */
636 1, /* vec_to_scalar_cost. */
637 1, /* scalar_to_vec_cost. */
638 1, /* vec_align_load_cost. */
639 2, /* vec_unalign_load_cost. */
640 1, /* vec_store_cost. */
641 3, /* cond_taken_branch_cost. */
642 1, /* cond_not_taken_branch_cost. */
643 };
644
645 /* For some reason, Athlon deals better with REP prefix (relative to loops)
646 compared to K8. Alignment becomes important after 8 bytes for memcpy and
647 128 bytes for memset. */
648 static stringop_algs athlon_memcpy[2] = {
649 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
650 DUMMY_STRINGOP_ALGS};
651 static stringop_algs athlon_memset[2] = {
652 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
653 DUMMY_STRINGOP_ALGS};
654 static const
655 struct processor_costs athlon_cost = {
656 COSTS_N_INSNS (1), /* cost of an add instruction */
657 COSTS_N_INSNS (2), /* cost of a lea instruction */
658 COSTS_N_INSNS (1), /* variable shift costs */
659 COSTS_N_INSNS (1), /* constant shift costs */
660 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
661 COSTS_N_INSNS (5), /* HI */
662 COSTS_N_INSNS (5), /* SI */
663 COSTS_N_INSNS (5), /* DI */
664 COSTS_N_INSNS (5)}, /* other */
665 0, /* cost of multiply per each bit set */
666 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
667 COSTS_N_INSNS (26), /* HI */
668 COSTS_N_INSNS (42), /* SI */
669 COSTS_N_INSNS (74), /* DI */
670 COSTS_N_INSNS (74)}, /* other */
671 COSTS_N_INSNS (1), /* cost of movsx */
672 COSTS_N_INSNS (1), /* cost of movzx */
673 8, /* "large" insn */
674 9, /* MOVE_RATIO */
675 4, /* cost for loading QImode using movzbl */
676 {3, 4, 3}, /* cost of loading integer registers
677 in QImode, HImode and SImode.
678 Relative to reg-reg move (2). */
679 {3, 4, 3}, /* cost of storing integer registers */
680 4, /* cost of reg,reg fld/fst */
681 {4, 4, 12}, /* cost of loading fp registers
682 in SFmode, DFmode and XFmode */
683 {6, 6, 8}, /* cost of storing fp registers
684 in SFmode, DFmode and XFmode */
685 2, /* cost of moving MMX register */
686 {4, 4}, /* cost of loading MMX registers
687 in SImode and DImode */
688 {4, 4}, /* cost of storing MMX registers
689 in SImode and DImode */
690 2, /* cost of moving SSE register */
691 {4, 4, 6}, /* cost of loading SSE registers
692 in SImode, DImode and TImode */
693 {4, 4, 5}, /* cost of storing SSE registers
694 in SImode, DImode and TImode */
695 5, /* MMX or SSE register to integer */
696 64, /* size of l1 cache. */
697 256, /* size of l2 cache. */
698 64, /* size of prefetch block */
699 6, /* number of parallel prefetches */
700 5, /* Branch cost */
701 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
702 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
703 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
704 COSTS_N_INSNS (2), /* cost of FABS instruction. */
705 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
706 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
707 athlon_memcpy,
708 athlon_memset,
709 1, /* scalar_stmt_cost. */
710 1, /* scalar load_cost. */
711 1, /* scalar_store_cost. */
712 1, /* vec_stmt_cost. */
713 1, /* vec_to_scalar_cost. */
714 1, /* scalar_to_vec_cost. */
715 1, /* vec_align_load_cost. */
716 2, /* vec_unalign_load_cost. */
717 1, /* vec_store_cost. */
718 3, /* cond_taken_branch_cost. */
719 1, /* cond_not_taken_branch_cost. */
720 };
721
722 /* K8 has optimized REP instruction for medium sized blocks, but for very
723 small blocks it is better to use loop. For large blocks, libcall can
724 do nontemporary accesses and beat inline considerably. */
725 static stringop_algs k8_memcpy[2] = {
726 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
727 {-1, rep_prefix_4_byte, false}}},
728 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
729 {-1, libcall, false}}}};
730 static stringop_algs k8_memset[2] = {
731 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
732 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
733 {libcall, {{48, unrolled_loop, false},
734 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
735 static const
736 struct processor_costs k8_cost = {
737 COSTS_N_INSNS (1), /* cost of an add instruction */
738 COSTS_N_INSNS (2), /* cost of a lea instruction */
739 COSTS_N_INSNS (1), /* variable shift costs */
740 COSTS_N_INSNS (1), /* constant shift costs */
741 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
742 COSTS_N_INSNS (4), /* HI */
743 COSTS_N_INSNS (3), /* SI */
744 COSTS_N_INSNS (4), /* DI */
745 COSTS_N_INSNS (5)}, /* other */
746 0, /* cost of multiply per each bit set */
747 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
748 COSTS_N_INSNS (26), /* HI */
749 COSTS_N_INSNS (42), /* SI */
750 COSTS_N_INSNS (74), /* DI */
751 COSTS_N_INSNS (74)}, /* other */
752 COSTS_N_INSNS (1), /* cost of movsx */
753 COSTS_N_INSNS (1), /* cost of movzx */
754 8, /* "large" insn */
755 9, /* MOVE_RATIO */
756 4, /* cost for loading QImode using movzbl */
757 {3, 4, 3}, /* cost of loading integer registers
758 in QImode, HImode and SImode.
759 Relative to reg-reg move (2). */
760 {3, 4, 3}, /* cost of storing integer registers */
761 4, /* cost of reg,reg fld/fst */
762 {4, 4, 12}, /* cost of loading fp registers
763 in SFmode, DFmode and XFmode */
764 {6, 6, 8}, /* cost of storing fp registers
765 in SFmode, DFmode and XFmode */
766 2, /* cost of moving MMX register */
767 {3, 3}, /* cost of loading MMX registers
768 in SImode and DImode */
769 {4, 4}, /* cost of storing MMX registers
770 in SImode and DImode */
771 2, /* cost of moving SSE register */
772 {4, 3, 6}, /* cost of loading SSE registers
773 in SImode, DImode and TImode */
774 {4, 4, 5}, /* cost of storing SSE registers
775 in SImode, DImode and TImode */
776 5, /* MMX or SSE register to integer */
777 64, /* size of l1 cache. */
778 512, /* size of l2 cache. */
779 64, /* size of prefetch block */
780 /* New AMD processors never drop prefetches; if they cannot be performed
781 immediately, they are queued. We set number of simultaneous prefetches
782 to a large constant to reflect this (it probably is not a good idea not
783 to limit number of prefetches at all, as their execution also takes some
784 time). */
785 100, /* number of parallel prefetches */
786 3, /* Branch cost */
787 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
788 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
789 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
790 COSTS_N_INSNS (2), /* cost of FABS instruction. */
791 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
792 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
793
794 k8_memcpy,
795 k8_memset,
796 4, /* scalar_stmt_cost. */
797 2, /* scalar load_cost. */
798 2, /* scalar_store_cost. */
799 5, /* vec_stmt_cost. */
800 0, /* vec_to_scalar_cost. */
801 2, /* scalar_to_vec_cost. */
802 2, /* vec_align_load_cost. */
803 3, /* vec_unalign_load_cost. */
804 3, /* vec_store_cost. */
805 3, /* cond_taken_branch_cost. */
806 2, /* cond_not_taken_branch_cost. */
807 };
808
809 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
810 very small blocks it is better to use loop. For large blocks, libcall can
811 do nontemporary accesses and beat inline considerably. */
812 static stringop_algs amdfam10_memcpy[2] = {
813 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
814 {-1, rep_prefix_4_byte, false}}},
815 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
816 {-1, libcall, false}}}};
817 static stringop_algs amdfam10_memset[2] = {
818 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
819 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
820 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
821 {-1, libcall, false}}}};
822 struct processor_costs amdfam10_cost = {
823 COSTS_N_INSNS (1), /* cost of an add instruction */
824 COSTS_N_INSNS (2), /* cost of a lea instruction */
825 COSTS_N_INSNS (1), /* variable shift costs */
826 COSTS_N_INSNS (1), /* constant shift costs */
827 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
828 COSTS_N_INSNS (4), /* HI */
829 COSTS_N_INSNS (3), /* SI */
830 COSTS_N_INSNS (4), /* DI */
831 COSTS_N_INSNS (5)}, /* other */
832 0, /* cost of multiply per each bit set */
833 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
834 COSTS_N_INSNS (35), /* HI */
835 COSTS_N_INSNS (51), /* SI */
836 COSTS_N_INSNS (83), /* DI */
837 COSTS_N_INSNS (83)}, /* other */
838 COSTS_N_INSNS (1), /* cost of movsx */
839 COSTS_N_INSNS (1), /* cost of movzx */
840 8, /* "large" insn */
841 9, /* MOVE_RATIO */
842 4, /* cost for loading QImode using movzbl */
843 {3, 4, 3}, /* cost of loading integer registers
844 in QImode, HImode and SImode.
845 Relative to reg-reg move (2). */
846 {3, 4, 3}, /* cost of storing integer registers */
847 4, /* cost of reg,reg fld/fst */
848 {4, 4, 12}, /* cost of loading fp registers
849 in SFmode, DFmode and XFmode */
850 {6, 6, 8}, /* cost of storing fp registers
851 in SFmode, DFmode and XFmode */
852 2, /* cost of moving MMX register */
853 {3, 3}, /* cost of loading MMX registers
854 in SImode and DImode */
855 {4, 4}, /* cost of storing MMX registers
856 in SImode and DImode */
857 2, /* cost of moving SSE register */
858 {4, 4, 3}, /* cost of loading SSE registers
859 in SImode, DImode and TImode */
860 {4, 4, 5}, /* cost of storing SSE registers
861 in SImode, DImode and TImode */
862 3, /* MMX or SSE register to integer */
863 /* On K8:
864 MOVD reg64, xmmreg Double FSTORE 4
865 MOVD reg32, xmmreg Double FSTORE 4
866 On AMDFAM10:
867 MOVD reg64, xmmreg Double FADD 3
868 1/1 1/1
869 MOVD reg32, xmmreg Double FADD 3
870 1/1 1/1 */
871 64, /* size of l1 cache. */
872 512, /* size of l2 cache. */
873 64, /* size of prefetch block */
874 /* New AMD processors never drop prefetches; if they cannot be performed
875 immediately, they are queued. We set number of simultaneous prefetches
876 to a large constant to reflect this (it probably is not a good idea not
877 to limit number of prefetches at all, as their execution also takes some
878 time). */
879 100, /* number of parallel prefetches */
880 2, /* Branch cost */
881 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
882 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
883 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
884 COSTS_N_INSNS (2), /* cost of FABS instruction. */
885 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
886 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
887
888 amdfam10_memcpy,
889 amdfam10_memset,
890 4, /* scalar_stmt_cost. */
891 2, /* scalar load_cost. */
892 2, /* scalar_store_cost. */
893 6, /* vec_stmt_cost. */
894 0, /* vec_to_scalar_cost. */
895 2, /* scalar_to_vec_cost. */
896 2, /* vec_align_load_cost. */
897 2, /* vec_unalign_load_cost. */
898 2, /* vec_store_cost. */
899 2, /* cond_taken_branch_cost. */
900 1, /* cond_not_taken_branch_cost. */
901 };
902
903 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
904 very small blocks it is better to use loop. For large blocks, libcall
905 can do nontemporary accesses and beat inline considerably. */
906 static stringop_algs bdver1_memcpy[2] = {
907 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
908 {-1, rep_prefix_4_byte, false}}},
909 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
910 {-1, libcall, false}}}};
911 static stringop_algs bdver1_memset[2] = {
912 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
913 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
914 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
915 {-1, libcall, false}}}};
916
917 const struct processor_costs bdver1_cost = {
918 COSTS_N_INSNS (1), /* cost of an add instruction */
919 COSTS_N_INSNS (1), /* cost of a lea instruction */
920 COSTS_N_INSNS (1), /* variable shift costs */
921 COSTS_N_INSNS (1), /* constant shift costs */
922 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
923 COSTS_N_INSNS (4), /* HI */
924 COSTS_N_INSNS (4), /* SI */
925 COSTS_N_INSNS (6), /* DI */
926 COSTS_N_INSNS (6)}, /* other */
927 0, /* cost of multiply per each bit set */
928 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
929 COSTS_N_INSNS (35), /* HI */
930 COSTS_N_INSNS (51), /* SI */
931 COSTS_N_INSNS (83), /* DI */
932 COSTS_N_INSNS (83)}, /* other */
933 COSTS_N_INSNS (1), /* cost of movsx */
934 COSTS_N_INSNS (1), /* cost of movzx */
935 8, /* "large" insn */
936 9, /* MOVE_RATIO */
937 4, /* cost for loading QImode using movzbl */
938 {5, 5, 4}, /* cost of loading integer registers
939 in QImode, HImode and SImode.
940 Relative to reg-reg move (2). */
941 {4, 4, 4}, /* cost of storing integer registers */
942 2, /* cost of reg,reg fld/fst */
943 {5, 5, 12}, /* cost of loading fp registers
944 in SFmode, DFmode and XFmode */
945 {4, 4, 8}, /* cost of storing fp registers
946 in SFmode, DFmode and XFmode */
947 2, /* cost of moving MMX register */
948 {4, 4}, /* cost of loading MMX registers
949 in SImode and DImode */
950 {4, 4}, /* cost of storing MMX registers
951 in SImode and DImode */
952 2, /* cost of moving SSE register */
953 {4, 4, 4}, /* cost of loading SSE registers
954 in SImode, DImode and TImode */
955 {4, 4, 4}, /* cost of storing SSE registers
956 in SImode, DImode and TImode */
957 2, /* MMX or SSE register to integer */
958 /* On K8:
959 MOVD reg64, xmmreg Double FSTORE 4
960 MOVD reg32, xmmreg Double FSTORE 4
961 On AMDFAM10:
962 MOVD reg64, xmmreg Double FADD 3
963 1/1 1/1
964 MOVD reg32, xmmreg Double FADD 3
965 1/1 1/1 */
966 16, /* size of l1 cache. */
967 2048, /* size of l2 cache. */
968 64, /* size of prefetch block */
969 /* New AMD processors never drop prefetches; if they cannot be performed
970 immediately, they are queued. We set number of simultaneous prefetches
971 to a large constant to reflect this (it probably is not a good idea not
972 to limit number of prefetches at all, as their execution also takes some
973 time). */
974 100, /* number of parallel prefetches */
975 2, /* Branch cost */
976 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
977 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
978 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
979 COSTS_N_INSNS (2), /* cost of FABS instruction. */
980 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
981 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
982
983 bdver1_memcpy,
984 bdver1_memset,
985 6, /* scalar_stmt_cost. */
986 4, /* scalar load_cost. */
987 4, /* scalar_store_cost. */
988 6, /* vec_stmt_cost. */
989 0, /* vec_to_scalar_cost. */
990 2, /* scalar_to_vec_cost. */
991 4, /* vec_align_load_cost. */
992 4, /* vec_unalign_load_cost. */
993 4, /* vec_store_cost. */
994 2, /* cond_taken_branch_cost. */
995 1, /* cond_not_taken_branch_cost. */
996 };
997
998 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
999 very small blocks it is better to use loop. For large blocks, libcall
1000 can do nontemporary accesses and beat inline considerably. */
1001
1002 static stringop_algs bdver2_memcpy[2] = {
1003 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1004 {-1, rep_prefix_4_byte, false}}},
1005 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1006 {-1, libcall, false}}}};
1007 static stringop_algs bdver2_memset[2] = {
1008 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1009 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1010 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1011 {-1, libcall, false}}}};
1012
1013 const struct processor_costs bdver2_cost = {
1014 COSTS_N_INSNS (1), /* cost of an add instruction */
1015 COSTS_N_INSNS (1), /* cost of a lea instruction */
1016 COSTS_N_INSNS (1), /* variable shift costs */
1017 COSTS_N_INSNS (1), /* constant shift costs */
1018 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1019 COSTS_N_INSNS (4), /* HI */
1020 COSTS_N_INSNS (4), /* SI */
1021 COSTS_N_INSNS (6), /* DI */
1022 COSTS_N_INSNS (6)}, /* other */
1023 0, /* cost of multiply per each bit set */
1024 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1025 COSTS_N_INSNS (35), /* HI */
1026 COSTS_N_INSNS (51), /* SI */
1027 COSTS_N_INSNS (83), /* DI */
1028 COSTS_N_INSNS (83)}, /* other */
1029 COSTS_N_INSNS (1), /* cost of movsx */
1030 COSTS_N_INSNS (1), /* cost of movzx */
1031 8, /* "large" insn */
1032 9, /* MOVE_RATIO */
1033 4, /* cost for loading QImode using movzbl */
1034 {5, 5, 4}, /* cost of loading integer registers
1035 in QImode, HImode and SImode.
1036 Relative to reg-reg move (2). */
1037 {4, 4, 4}, /* cost of storing integer registers */
1038 2, /* cost of reg,reg fld/fst */
1039 {5, 5, 12}, /* cost of loading fp registers
1040 in SFmode, DFmode and XFmode */
1041 {4, 4, 8}, /* cost of storing fp registers
1042 in SFmode, DFmode and XFmode */
1043 2, /* cost of moving MMX register */
1044 {4, 4}, /* cost of loading MMX registers
1045 in SImode and DImode */
1046 {4, 4}, /* cost of storing MMX registers
1047 in SImode and DImode */
1048 2, /* cost of moving SSE register */
1049 {4, 4, 4}, /* cost of loading SSE registers
1050 in SImode, DImode and TImode */
1051 {4, 4, 4}, /* cost of storing SSE registers
1052 in SImode, DImode and TImode */
1053 2, /* MMX or SSE register to integer */
1054 /* On K8:
1055 MOVD reg64, xmmreg Double FSTORE 4
1056 MOVD reg32, xmmreg Double FSTORE 4
1057 On AMDFAM10:
1058 MOVD reg64, xmmreg Double FADD 3
1059 1/1 1/1
1060 MOVD reg32, xmmreg Double FADD 3
1061 1/1 1/1 */
1062 16, /* size of l1 cache. */
1063 2048, /* size of l2 cache. */
1064 64, /* size of prefetch block */
1065 /* New AMD processors never drop prefetches; if they cannot be performed
1066 immediately, they are queued. We set number of simultaneous prefetches
1067 to a large constant to reflect this (it probably is not a good idea not
1068 to limit number of prefetches at all, as their execution also takes some
1069 time). */
1070 100, /* number of parallel prefetches */
1071 2, /* Branch cost */
1072 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1073 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1074 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1075 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1076 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1077 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1078
1079 bdver2_memcpy,
1080 bdver2_memset,
1081 6, /* scalar_stmt_cost. */
1082 4, /* scalar load_cost. */
1083 4, /* scalar_store_cost. */
1084 6, /* vec_stmt_cost. */
1085 0, /* vec_to_scalar_cost. */
1086 2, /* scalar_to_vec_cost. */
1087 4, /* vec_align_load_cost. */
1088 4, /* vec_unalign_load_cost. */
1089 4, /* vec_store_cost. */
1090 2, /* cond_taken_branch_cost. */
1091 1, /* cond_not_taken_branch_cost. */
1092 };
1093
1094
1095 /* BDVER3 has optimized REP instruction for medium sized blocks, but for
1096 very small blocks it is better to use loop. For large blocks, libcall
1097 can do nontemporary accesses and beat inline considerably. */
1098 static stringop_algs bdver3_memcpy[2] = {
1099 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1100 {-1, rep_prefix_4_byte, false}}},
1101 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1102 {-1, libcall, false}}}};
1103 static stringop_algs bdver3_memset[2] = {
1104 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1105 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1106 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1107 {-1, libcall, false}}}};
1108 struct processor_costs bdver3_cost = {
1109 COSTS_N_INSNS (1), /* cost of an add instruction */
1110 COSTS_N_INSNS (1), /* cost of a lea instruction */
1111 COSTS_N_INSNS (1), /* variable shift costs */
1112 COSTS_N_INSNS (1), /* constant shift costs */
1113 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1114 COSTS_N_INSNS (4), /* HI */
1115 COSTS_N_INSNS (4), /* SI */
1116 COSTS_N_INSNS (6), /* DI */
1117 COSTS_N_INSNS (6)}, /* other */
1118 0, /* cost of multiply per each bit set */
1119 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1120 COSTS_N_INSNS (35), /* HI */
1121 COSTS_N_INSNS (51), /* SI */
1122 COSTS_N_INSNS (83), /* DI */
1123 COSTS_N_INSNS (83)}, /* other */
1124 COSTS_N_INSNS (1), /* cost of movsx */
1125 COSTS_N_INSNS (1), /* cost of movzx */
1126 8, /* "large" insn */
1127 9, /* MOVE_RATIO */
1128 4, /* cost for loading QImode using movzbl */
1129 {5, 5, 4}, /* cost of loading integer registers
1130 in QImode, HImode and SImode.
1131 Relative to reg-reg move (2). */
1132 {4, 4, 4}, /* cost of storing integer registers */
1133 2, /* cost of reg,reg fld/fst */
1134 {5, 5, 12}, /* cost of loading fp registers
1135 in SFmode, DFmode and XFmode */
1136 {4, 4, 8}, /* cost of storing fp registers
1137 in SFmode, DFmode and XFmode */
1138 2, /* cost of moving MMX register */
1139 {4, 4}, /* cost of loading MMX registers
1140 in SImode and DImode */
1141 {4, 4}, /* cost of storing MMX registers
1142 in SImode and DImode */
1143 2, /* cost of moving SSE register */
1144 {4, 4, 4}, /* cost of loading SSE registers
1145 in SImode, DImode and TImode */
1146 {4, 4, 4}, /* cost of storing SSE registers
1147 in SImode, DImode and TImode */
1148 2, /* MMX or SSE register to integer */
1149 16, /* size of l1 cache. */
1150 2048, /* size of l2 cache. */
1151 64, /* size of prefetch block */
1152 /* New AMD processors never drop prefetches; if they cannot be performed
1153 immediately, they are queued. We set number of simultaneous prefetches
1154 to a large constant to reflect this (it probably is not a good idea not
1155 to limit number of prefetches at all, as their execution also takes some
1156 time). */
1157 100, /* number of parallel prefetches */
1158 2, /* Branch cost */
1159 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1160 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1161 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1162 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1163 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1164 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1165
1166 bdver3_memcpy,
1167 bdver3_memset,
1168 6, /* scalar_stmt_cost. */
1169 4, /* scalar load_cost. */
1170 4, /* scalar_store_cost. */
1171 6, /* vec_stmt_cost. */
1172 0, /* vec_to_scalar_cost. */
1173 2, /* scalar_to_vec_cost. */
1174 4, /* vec_align_load_cost. */
1175 4, /* vec_unalign_load_cost. */
1176 4, /* vec_store_cost. */
1177 2, /* cond_taken_branch_cost. */
1178 1, /* cond_not_taken_branch_cost. */
1179 };
1180
1181 /* BDVER4 has optimized REP instruction for medium sized blocks, but for
1182 very small blocks it is better to use loop. For large blocks, libcall
1183 can do nontemporary accesses and beat inline considerably. */
1184 static stringop_algs bdver4_memcpy[2] = {
1185 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1186 {-1, rep_prefix_4_byte, false}}},
1187 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1188 {-1, libcall, false}}}};
1189 static stringop_algs bdver4_memset[2] = {
1190 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1191 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1192 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1193 {-1, libcall, false}}}};
1194 struct processor_costs bdver4_cost = {
1195 COSTS_N_INSNS (1), /* cost of an add instruction */
1196 COSTS_N_INSNS (1), /* cost of a lea instruction */
1197 COSTS_N_INSNS (1), /* variable shift costs */
1198 COSTS_N_INSNS (1), /* constant shift costs */
1199 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1200 COSTS_N_INSNS (4), /* HI */
1201 COSTS_N_INSNS (4), /* SI */
1202 COSTS_N_INSNS (6), /* DI */
1203 COSTS_N_INSNS (6)}, /* other */
1204 0, /* cost of multiply per each bit set */
1205 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1206 COSTS_N_INSNS (35), /* HI */
1207 COSTS_N_INSNS (51), /* SI */
1208 COSTS_N_INSNS (83), /* DI */
1209 COSTS_N_INSNS (83)}, /* other */
1210 COSTS_N_INSNS (1), /* cost of movsx */
1211 COSTS_N_INSNS (1), /* cost of movzx */
1212 8, /* "large" insn */
1213 9, /* MOVE_RATIO */
1214 4, /* cost for loading QImode using movzbl */
1215 {5, 5, 4}, /* cost of loading integer registers
1216 in QImode, HImode and SImode.
1217 Relative to reg-reg move (2). */
1218 {4, 4, 4}, /* cost of storing integer registers */
1219 2, /* cost of reg,reg fld/fst */
1220 {5, 5, 12}, /* cost of loading fp registers
1221 in SFmode, DFmode and XFmode */
1222 {4, 4, 8}, /* cost of storing fp registers
1223 in SFmode, DFmode and XFmode */
1224 2, /* cost of moving MMX register */
1225 {4, 4}, /* cost of loading MMX registers
1226 in SImode and DImode */
1227 {4, 4}, /* cost of storing MMX registers
1228 in SImode and DImode */
1229 2, /* cost of moving SSE register */
1230 {4, 4, 4}, /* cost of loading SSE registers
1231 in SImode, DImode and TImode */
1232 {4, 4, 4}, /* cost of storing SSE registers
1233 in SImode, DImode and TImode */
1234 2, /* MMX or SSE register to integer */
1235 16, /* size of l1 cache. */
1236 2048, /* size of l2 cache. */
1237 64, /* size of prefetch block */
1238 /* New AMD processors never drop prefetches; if they cannot be performed
1239 immediately, they are queued. We set number of simultaneous prefetches
1240 to a large constant to reflect this (it probably is not a good idea not
1241 to limit number of prefetches at all, as their execution also takes some
1242 time). */
1243 100, /* number of parallel prefetches */
1244 2, /* Branch cost */
1245 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1246 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1247 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1248 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1249 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1250 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1251
1252 bdver4_memcpy,
1253 bdver4_memset,
1254 6, /* scalar_stmt_cost. */
1255 4, /* scalar load_cost. */
1256 4, /* scalar_store_cost. */
1257 6, /* vec_stmt_cost. */
1258 0, /* vec_to_scalar_cost. */
1259 2, /* scalar_to_vec_cost. */
1260 4, /* vec_align_load_cost. */
1261 4, /* vec_unalign_load_cost. */
1262 4, /* vec_store_cost. */
1263 2, /* cond_taken_branch_cost. */
1264 1, /* cond_not_taken_branch_cost. */
1265 };
1266
1267 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1268 very small blocks it is better to use loop. For large blocks, libcall can
1269 do nontemporary accesses and beat inline considerably. */
1270 static stringop_algs btver1_memcpy[2] = {
1271 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1272 {-1, rep_prefix_4_byte, false}}},
1273 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1274 {-1, libcall, false}}}};
1275 static stringop_algs btver1_memset[2] = {
1276 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1277 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1278 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1279 {-1, libcall, false}}}};
1280 const struct processor_costs btver1_cost = {
1281 COSTS_N_INSNS (1), /* cost of an add instruction */
1282 COSTS_N_INSNS (2), /* cost of a lea instruction */
1283 COSTS_N_INSNS (1), /* variable shift costs */
1284 COSTS_N_INSNS (1), /* constant shift costs */
1285 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1286 COSTS_N_INSNS (4), /* HI */
1287 COSTS_N_INSNS (3), /* SI */
1288 COSTS_N_INSNS (4), /* DI */
1289 COSTS_N_INSNS (5)}, /* other */
1290 0, /* cost of multiply per each bit set */
1291 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1292 COSTS_N_INSNS (35), /* HI */
1293 COSTS_N_INSNS (51), /* SI */
1294 COSTS_N_INSNS (83), /* DI */
1295 COSTS_N_INSNS (83)}, /* other */
1296 COSTS_N_INSNS (1), /* cost of movsx */
1297 COSTS_N_INSNS (1), /* cost of movzx */
1298 8, /* "large" insn */
1299 9, /* MOVE_RATIO */
1300 4, /* cost for loading QImode using movzbl */
1301 {3, 4, 3}, /* cost of loading integer registers
1302 in QImode, HImode and SImode.
1303 Relative to reg-reg move (2). */
1304 {3, 4, 3}, /* cost of storing integer registers */
1305 4, /* cost of reg,reg fld/fst */
1306 {4, 4, 12}, /* cost of loading fp registers
1307 in SFmode, DFmode and XFmode */
1308 {6, 6, 8}, /* cost of storing fp registers
1309 in SFmode, DFmode and XFmode */
1310 2, /* cost of moving MMX register */
1311 {3, 3}, /* cost of loading MMX registers
1312 in SImode and DImode */
1313 {4, 4}, /* cost of storing MMX registers
1314 in SImode and DImode */
1315 2, /* cost of moving SSE register */
1316 {4, 4, 3}, /* cost of loading SSE registers
1317 in SImode, DImode and TImode */
1318 {4, 4, 5}, /* cost of storing SSE registers
1319 in SImode, DImode and TImode */
1320 3, /* MMX or SSE register to integer */
1321 /* On K8:
1322 MOVD reg64, xmmreg Double FSTORE 4
1323 MOVD reg32, xmmreg Double FSTORE 4
1324 On AMDFAM10:
1325 MOVD reg64, xmmreg Double FADD 3
1326 1/1 1/1
1327 MOVD reg32, xmmreg Double FADD 3
1328 1/1 1/1 */
1329 32, /* size of l1 cache. */
1330 512, /* size of l2 cache. */
1331 64, /* size of prefetch block */
1332 100, /* number of parallel prefetches */
1333 2, /* Branch cost */
1334 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1335 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1336 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1337 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1338 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1339 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1340
1341 btver1_memcpy,
1342 btver1_memset,
1343 4, /* scalar_stmt_cost. */
1344 2, /* scalar load_cost. */
1345 2, /* scalar_store_cost. */
1346 6, /* vec_stmt_cost. */
1347 0, /* vec_to_scalar_cost. */
1348 2, /* scalar_to_vec_cost. */
1349 2, /* vec_align_load_cost. */
1350 2, /* vec_unalign_load_cost. */
1351 2, /* vec_store_cost. */
1352 2, /* cond_taken_branch_cost. */
1353 1, /* cond_not_taken_branch_cost. */
1354 };
1355
1356 static stringop_algs btver2_memcpy[2] = {
1357 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1358 {-1, rep_prefix_4_byte, false}}},
1359 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1360 {-1, libcall, false}}}};
1361 static stringop_algs btver2_memset[2] = {
1362 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1363 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1364 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1365 {-1, libcall, false}}}};
1366 const struct processor_costs btver2_cost = {
1367 COSTS_N_INSNS (1), /* cost of an add instruction */
1368 COSTS_N_INSNS (2), /* cost of a lea instruction */
1369 COSTS_N_INSNS (1), /* variable shift costs */
1370 COSTS_N_INSNS (1), /* constant shift costs */
1371 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1372 COSTS_N_INSNS (4), /* HI */
1373 COSTS_N_INSNS (3), /* SI */
1374 COSTS_N_INSNS (4), /* DI */
1375 COSTS_N_INSNS (5)}, /* other */
1376 0, /* cost of multiply per each bit set */
1377 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1378 COSTS_N_INSNS (35), /* HI */
1379 COSTS_N_INSNS (51), /* SI */
1380 COSTS_N_INSNS (83), /* DI */
1381 COSTS_N_INSNS (83)}, /* other */
1382 COSTS_N_INSNS (1), /* cost of movsx */
1383 COSTS_N_INSNS (1), /* cost of movzx */
1384 8, /* "large" insn */
1385 9, /* MOVE_RATIO */
1386 4, /* cost for loading QImode using movzbl */
1387 {3, 4, 3}, /* cost of loading integer registers
1388 in QImode, HImode and SImode.
1389 Relative to reg-reg move (2). */
1390 {3, 4, 3}, /* cost of storing integer registers */
1391 4, /* cost of reg,reg fld/fst */
1392 {4, 4, 12}, /* cost of loading fp registers
1393 in SFmode, DFmode and XFmode */
1394 {6, 6, 8}, /* cost of storing fp registers
1395 in SFmode, DFmode and XFmode */
1396 2, /* cost of moving MMX register */
1397 {3, 3}, /* cost of loading MMX registers
1398 in SImode and DImode */
1399 {4, 4}, /* cost of storing MMX registers
1400 in SImode and DImode */
1401 2, /* cost of moving SSE register */
1402 {4, 4, 3}, /* cost of loading SSE registers
1403 in SImode, DImode and TImode */
1404 {4, 4, 5}, /* cost of storing SSE registers
1405 in SImode, DImode and TImode */
1406 3, /* MMX or SSE register to integer */
1407 /* On K8:
1408 MOVD reg64, xmmreg Double FSTORE 4
1409 MOVD reg32, xmmreg Double FSTORE 4
1410 On AMDFAM10:
1411 MOVD reg64, xmmreg Double FADD 3
1412 1/1 1/1
1413 MOVD reg32, xmmreg Double FADD 3
1414 1/1 1/1 */
1415 32, /* size of l1 cache. */
1416 2048, /* size of l2 cache. */
1417 64, /* size of prefetch block */
1418 100, /* number of parallel prefetches */
1419 2, /* Branch cost */
1420 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1421 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1422 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1423 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1424 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1425 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1426 btver2_memcpy,
1427 btver2_memset,
1428 4, /* scalar_stmt_cost. */
1429 2, /* scalar load_cost. */
1430 2, /* scalar_store_cost. */
1431 6, /* vec_stmt_cost. */
1432 0, /* vec_to_scalar_cost. */
1433 2, /* scalar_to_vec_cost. */
1434 2, /* vec_align_load_cost. */
1435 2, /* vec_unalign_load_cost. */
1436 2, /* vec_store_cost. */
1437 2, /* cond_taken_branch_cost. */
1438 1, /* cond_not_taken_branch_cost. */
1439 };
1440
1441 static stringop_algs pentium4_memcpy[2] = {
1442 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1443 DUMMY_STRINGOP_ALGS};
1444 static stringop_algs pentium4_memset[2] = {
1445 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1446 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1447 DUMMY_STRINGOP_ALGS};
1448
1449 static const
1450 struct processor_costs pentium4_cost = {
1451 COSTS_N_INSNS (1), /* cost of an add instruction */
1452 COSTS_N_INSNS (3), /* cost of a lea instruction */
1453 COSTS_N_INSNS (4), /* variable shift costs */
1454 COSTS_N_INSNS (4), /* constant shift costs */
1455 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1456 COSTS_N_INSNS (15), /* HI */
1457 COSTS_N_INSNS (15), /* SI */
1458 COSTS_N_INSNS (15), /* DI */
1459 COSTS_N_INSNS (15)}, /* other */
1460 0, /* cost of multiply per each bit set */
1461 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1462 COSTS_N_INSNS (56), /* HI */
1463 COSTS_N_INSNS (56), /* SI */
1464 COSTS_N_INSNS (56), /* DI */
1465 COSTS_N_INSNS (56)}, /* other */
1466 COSTS_N_INSNS (1), /* cost of movsx */
1467 COSTS_N_INSNS (1), /* cost of movzx */
1468 16, /* "large" insn */
1469 6, /* MOVE_RATIO */
1470 2, /* cost for loading QImode using movzbl */
1471 {4, 5, 4}, /* cost of loading integer registers
1472 in QImode, HImode and SImode.
1473 Relative to reg-reg move (2). */
1474 {2, 3, 2}, /* cost of storing integer registers */
1475 2, /* cost of reg,reg fld/fst */
1476 {2, 2, 6}, /* cost of loading fp registers
1477 in SFmode, DFmode and XFmode */
1478 {4, 4, 6}, /* cost of storing fp registers
1479 in SFmode, DFmode and XFmode */
1480 2, /* cost of moving MMX register */
1481 {2, 2}, /* cost of loading MMX registers
1482 in SImode and DImode */
1483 {2, 2}, /* cost of storing MMX registers
1484 in SImode and DImode */
1485 12, /* cost of moving SSE register */
1486 {12, 12, 12}, /* cost of loading SSE registers
1487 in SImode, DImode and TImode */
1488 {2, 2, 8}, /* cost of storing SSE registers
1489 in SImode, DImode and TImode */
1490 10, /* MMX or SSE register to integer */
1491 8, /* size of l1 cache. */
1492 256, /* size of l2 cache. */
1493 64, /* size of prefetch block */
1494 6, /* number of parallel prefetches */
1495 2, /* Branch cost */
1496 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1497 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1498 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1499 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1500 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1501 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1502 pentium4_memcpy,
1503 pentium4_memset,
1504 1, /* scalar_stmt_cost. */
1505 1, /* scalar load_cost. */
1506 1, /* scalar_store_cost. */
1507 1, /* vec_stmt_cost. */
1508 1, /* vec_to_scalar_cost. */
1509 1, /* scalar_to_vec_cost. */
1510 1, /* vec_align_load_cost. */
1511 2, /* vec_unalign_load_cost. */
1512 1, /* vec_store_cost. */
1513 3, /* cond_taken_branch_cost. */
1514 1, /* cond_not_taken_branch_cost. */
1515 };
1516
1517 static stringop_algs nocona_memcpy[2] = {
1518 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1519 {libcall, {{32, loop, false}, {20000, rep_prefix_8_byte, false},
1520 {100000, unrolled_loop, false}, {-1, libcall, false}}}};
1521
1522 static stringop_algs nocona_memset[2] = {
1523 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1524 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1525 {libcall, {{24, loop, false}, {64, unrolled_loop, false},
1526 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1527
1528 static const
1529 struct processor_costs nocona_cost = {
1530 COSTS_N_INSNS (1), /* cost of an add instruction */
1531 COSTS_N_INSNS (1), /* cost of a lea instruction */
1532 COSTS_N_INSNS (1), /* variable shift costs */
1533 COSTS_N_INSNS (1), /* constant shift costs */
1534 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1535 COSTS_N_INSNS (10), /* HI */
1536 COSTS_N_INSNS (10), /* SI */
1537 COSTS_N_INSNS (10), /* DI */
1538 COSTS_N_INSNS (10)}, /* other */
1539 0, /* cost of multiply per each bit set */
1540 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1541 COSTS_N_INSNS (66), /* HI */
1542 COSTS_N_INSNS (66), /* SI */
1543 COSTS_N_INSNS (66), /* DI */
1544 COSTS_N_INSNS (66)}, /* other */
1545 COSTS_N_INSNS (1), /* cost of movsx */
1546 COSTS_N_INSNS (1), /* cost of movzx */
1547 16, /* "large" insn */
1548 17, /* MOVE_RATIO */
1549 4, /* cost for loading QImode using movzbl */
1550 {4, 4, 4}, /* cost of loading integer registers
1551 in QImode, HImode and SImode.
1552 Relative to reg-reg move (2). */
1553 {4, 4, 4}, /* cost of storing integer registers */
1554 3, /* cost of reg,reg fld/fst */
1555 {12, 12, 12}, /* cost of loading fp registers
1556 in SFmode, DFmode and XFmode */
1557 {4, 4, 4}, /* cost of storing fp registers
1558 in SFmode, DFmode and XFmode */
1559 6, /* cost of moving MMX register */
1560 {12, 12}, /* cost of loading MMX registers
1561 in SImode and DImode */
1562 {12, 12}, /* cost of storing MMX registers
1563 in SImode and DImode */
1564 6, /* cost of moving SSE register */
1565 {12, 12, 12}, /* cost of loading SSE registers
1566 in SImode, DImode and TImode */
1567 {12, 12, 12}, /* cost of storing SSE registers
1568 in SImode, DImode and TImode */
1569 8, /* MMX or SSE register to integer */
1570 8, /* size of l1 cache. */
1571 1024, /* size of l2 cache. */
1572 64, /* size of prefetch block */
1573 8, /* number of parallel prefetches */
1574 1, /* Branch cost */
1575 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1576 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1577 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1578 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1579 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1580 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1581 nocona_memcpy,
1582 nocona_memset,
1583 1, /* scalar_stmt_cost. */
1584 1, /* scalar load_cost. */
1585 1, /* scalar_store_cost. */
1586 1, /* vec_stmt_cost. */
1587 1, /* vec_to_scalar_cost. */
1588 1, /* scalar_to_vec_cost. */
1589 1, /* vec_align_load_cost. */
1590 2, /* vec_unalign_load_cost. */
1591 1, /* vec_store_cost. */
1592 3, /* cond_taken_branch_cost. */
1593 1, /* cond_not_taken_branch_cost. */
1594 };
1595
1596 static stringop_algs atom_memcpy[2] = {
1597 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1598 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1599 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1600 static stringop_algs atom_memset[2] = {
1601 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1602 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1603 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1604 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1605 static const
1606 struct processor_costs atom_cost = {
1607 COSTS_N_INSNS (1), /* cost of an add instruction */
1608 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1609 COSTS_N_INSNS (1), /* variable shift costs */
1610 COSTS_N_INSNS (1), /* constant shift costs */
1611 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1612 COSTS_N_INSNS (4), /* HI */
1613 COSTS_N_INSNS (3), /* SI */
1614 COSTS_N_INSNS (4), /* DI */
1615 COSTS_N_INSNS (2)}, /* other */
1616 0, /* cost of multiply per each bit set */
1617 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1618 COSTS_N_INSNS (26), /* HI */
1619 COSTS_N_INSNS (42), /* SI */
1620 COSTS_N_INSNS (74), /* DI */
1621 COSTS_N_INSNS (74)}, /* other */
1622 COSTS_N_INSNS (1), /* cost of movsx */
1623 COSTS_N_INSNS (1), /* cost of movzx */
1624 8, /* "large" insn */
1625 17, /* MOVE_RATIO */
1626 4, /* cost for loading QImode using movzbl */
1627 {4, 4, 4}, /* cost of loading integer registers
1628 in QImode, HImode and SImode.
1629 Relative to reg-reg move (2). */
1630 {4, 4, 4}, /* cost of storing integer registers */
1631 4, /* cost of reg,reg fld/fst */
1632 {12, 12, 12}, /* cost of loading fp registers
1633 in SFmode, DFmode and XFmode */
1634 {6, 6, 8}, /* cost of storing fp registers
1635 in SFmode, DFmode and XFmode */
1636 2, /* cost of moving MMX register */
1637 {8, 8}, /* cost of loading MMX registers
1638 in SImode and DImode */
1639 {8, 8}, /* cost of storing MMX registers
1640 in SImode and DImode */
1641 2, /* cost of moving SSE register */
1642 {8, 8, 8}, /* cost of loading SSE registers
1643 in SImode, DImode and TImode */
1644 {8, 8, 8}, /* cost of storing SSE registers
1645 in SImode, DImode and TImode */
1646 5, /* MMX or SSE register to integer */
1647 32, /* size of l1 cache. */
1648 256, /* size of l2 cache. */
1649 64, /* size of prefetch block */
1650 6, /* number of parallel prefetches */
1651 3, /* Branch cost */
1652 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1653 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1654 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1655 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1656 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1657 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1658 atom_memcpy,
1659 atom_memset,
1660 1, /* scalar_stmt_cost. */
1661 1, /* scalar load_cost. */
1662 1, /* scalar_store_cost. */
1663 1, /* vec_stmt_cost. */
1664 1, /* vec_to_scalar_cost. */
1665 1, /* scalar_to_vec_cost. */
1666 1, /* vec_align_load_cost. */
1667 2, /* vec_unalign_load_cost. */
1668 1, /* vec_store_cost. */
1669 3, /* cond_taken_branch_cost. */
1670 1, /* cond_not_taken_branch_cost. */
1671 };
1672
1673 static stringop_algs slm_memcpy[2] = {
1674 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1675 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1676 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1677 static stringop_algs slm_memset[2] = {
1678 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1679 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1680 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1681 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1682 static const
1683 struct processor_costs slm_cost = {
1684 COSTS_N_INSNS (1), /* cost of an add instruction */
1685 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1686 COSTS_N_INSNS (1), /* variable shift costs */
1687 COSTS_N_INSNS (1), /* constant shift costs */
1688 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1689 COSTS_N_INSNS (3), /* HI */
1690 COSTS_N_INSNS (3), /* SI */
1691 COSTS_N_INSNS (4), /* DI */
1692 COSTS_N_INSNS (2)}, /* other */
1693 0, /* cost of multiply per each bit set */
1694 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1695 COSTS_N_INSNS (26), /* HI */
1696 COSTS_N_INSNS (42), /* SI */
1697 COSTS_N_INSNS (74), /* DI */
1698 COSTS_N_INSNS (74)}, /* other */
1699 COSTS_N_INSNS (1), /* cost of movsx */
1700 COSTS_N_INSNS (1), /* cost of movzx */
1701 8, /* "large" insn */
1702 17, /* MOVE_RATIO */
1703 4, /* cost for loading QImode using movzbl */
1704 {4, 4, 4}, /* cost of loading integer registers
1705 in QImode, HImode and SImode.
1706 Relative to reg-reg move (2). */
1707 {4, 4, 4}, /* cost of storing integer registers */
1708 4, /* cost of reg,reg fld/fst */
1709 {12, 12, 12}, /* cost of loading fp registers
1710 in SFmode, DFmode and XFmode */
1711 {6, 6, 8}, /* cost of storing fp registers
1712 in SFmode, DFmode and XFmode */
1713 2, /* cost of moving MMX register */
1714 {8, 8}, /* cost of loading MMX registers
1715 in SImode and DImode */
1716 {8, 8}, /* cost of storing MMX registers
1717 in SImode and DImode */
1718 2, /* cost of moving SSE register */
1719 {8, 8, 8}, /* cost of loading SSE registers
1720 in SImode, DImode and TImode */
1721 {8, 8, 8}, /* cost of storing SSE registers
1722 in SImode, DImode and TImode */
1723 5, /* MMX or SSE register to integer */
1724 32, /* size of l1 cache. */
1725 256, /* size of l2 cache. */
1726 64, /* size of prefetch block */
1727 6, /* number of parallel prefetches */
1728 3, /* Branch cost */
1729 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1730 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1731 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1732 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1733 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1734 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1735 slm_memcpy,
1736 slm_memset,
1737 1, /* scalar_stmt_cost. */
1738 1, /* scalar load_cost. */
1739 1, /* scalar_store_cost. */
1740 1, /* vec_stmt_cost. */
1741 1, /* vec_to_scalar_cost. */
1742 1, /* scalar_to_vec_cost. */
1743 1, /* vec_align_load_cost. */
1744 2, /* vec_unalign_load_cost. */
1745 1, /* vec_store_cost. */
1746 3, /* cond_taken_branch_cost. */
1747 1, /* cond_not_taken_branch_cost. */
1748 };
1749
1750 static stringop_algs intel_memcpy[2] = {
1751 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1752 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1753 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1754 static stringop_algs intel_memset[2] = {
1755 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1756 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1757 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1758 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1759 static const
1760 struct processor_costs intel_cost = {
1761 COSTS_N_INSNS (1), /* cost of an add instruction */
1762 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1763 COSTS_N_INSNS (1), /* variable shift costs */
1764 COSTS_N_INSNS (1), /* constant shift costs */
1765 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1766 COSTS_N_INSNS (3), /* HI */
1767 COSTS_N_INSNS (3), /* SI */
1768 COSTS_N_INSNS (4), /* DI */
1769 COSTS_N_INSNS (2)}, /* other */
1770 0, /* cost of multiply per each bit set */
1771 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1772 COSTS_N_INSNS (26), /* HI */
1773 COSTS_N_INSNS (42), /* SI */
1774 COSTS_N_INSNS (74), /* DI */
1775 COSTS_N_INSNS (74)}, /* other */
1776 COSTS_N_INSNS (1), /* cost of movsx */
1777 COSTS_N_INSNS (1), /* cost of movzx */
1778 8, /* "large" insn */
1779 17, /* MOVE_RATIO */
1780 4, /* cost for loading QImode using movzbl */
1781 {4, 4, 4}, /* cost of loading integer registers
1782 in QImode, HImode and SImode.
1783 Relative to reg-reg move (2). */
1784 {4, 4, 4}, /* cost of storing integer registers */
1785 4, /* cost of reg,reg fld/fst */
1786 {12, 12, 12}, /* cost of loading fp registers
1787 in SFmode, DFmode and XFmode */
1788 {6, 6, 8}, /* cost of storing fp registers
1789 in SFmode, DFmode and XFmode */
1790 2, /* cost of moving MMX register */
1791 {8, 8}, /* cost of loading MMX registers
1792 in SImode and DImode */
1793 {8, 8}, /* cost of storing MMX registers
1794 in SImode and DImode */
1795 2, /* cost of moving SSE register */
1796 {8, 8, 8}, /* cost of loading SSE registers
1797 in SImode, DImode and TImode */
1798 {8, 8, 8}, /* cost of storing SSE registers
1799 in SImode, DImode and TImode */
1800 5, /* MMX or SSE register to integer */
1801 32, /* size of l1 cache. */
1802 256, /* size of l2 cache. */
1803 64, /* size of prefetch block */
1804 6, /* number of parallel prefetches */
1805 3, /* Branch cost */
1806 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1807 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1808 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1809 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1810 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1811 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1812 intel_memcpy,
1813 intel_memset,
1814 1, /* scalar_stmt_cost. */
1815 1, /* scalar load_cost. */
1816 1, /* scalar_store_cost. */
1817 1, /* vec_stmt_cost. */
1818 1, /* vec_to_scalar_cost. */
1819 1, /* scalar_to_vec_cost. */
1820 1, /* vec_align_load_cost. */
1821 2, /* vec_unalign_load_cost. */
1822 1, /* vec_store_cost. */
1823 3, /* cond_taken_branch_cost. */
1824 1, /* cond_not_taken_branch_cost. */
1825 };
1826
1827 /* Generic should produce code tuned for Core-i7 (and newer chips)
1828 and btver1 (and newer chips). */
1829
1830 static stringop_algs generic_memcpy[2] = {
1831 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1832 {-1, libcall, false}}},
1833 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1834 {-1, libcall, false}}}};
1835 static stringop_algs generic_memset[2] = {
1836 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1837 {-1, libcall, false}}},
1838 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1839 {-1, libcall, false}}}};
1840 static const
1841 struct processor_costs generic_cost = {
1842 COSTS_N_INSNS (1), /* cost of an add instruction */
1843 /* On all chips taken into consideration lea is 2 cycles and more. With
1844 this cost however our current implementation of synth_mult results in
1845 use of unnecessary temporary registers causing regression on several
1846 SPECfp benchmarks. */
1847 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1848 COSTS_N_INSNS (1), /* variable shift costs */
1849 COSTS_N_INSNS (1), /* constant shift costs */
1850 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1851 COSTS_N_INSNS (4), /* HI */
1852 COSTS_N_INSNS (3), /* SI */
1853 COSTS_N_INSNS (4), /* DI */
1854 COSTS_N_INSNS (2)}, /* other */
1855 0, /* cost of multiply per each bit set */
1856 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1857 COSTS_N_INSNS (26), /* HI */
1858 COSTS_N_INSNS (42), /* SI */
1859 COSTS_N_INSNS (74), /* DI */
1860 COSTS_N_INSNS (74)}, /* other */
1861 COSTS_N_INSNS (1), /* cost of movsx */
1862 COSTS_N_INSNS (1), /* cost of movzx */
1863 8, /* "large" insn */
1864 17, /* MOVE_RATIO */
1865 4, /* cost for loading QImode using movzbl */
1866 {4, 4, 4}, /* cost of loading integer registers
1867 in QImode, HImode and SImode.
1868 Relative to reg-reg move (2). */
1869 {4, 4, 4}, /* cost of storing integer registers */
1870 4, /* cost of reg,reg fld/fst */
1871 {12, 12, 12}, /* cost of loading fp registers
1872 in SFmode, DFmode and XFmode */
1873 {6, 6, 8}, /* cost of storing fp registers
1874 in SFmode, DFmode and XFmode */
1875 2, /* cost of moving MMX register */
1876 {8, 8}, /* cost of loading MMX registers
1877 in SImode and DImode */
1878 {8, 8}, /* cost of storing MMX registers
1879 in SImode and DImode */
1880 2, /* cost of moving SSE register */
1881 {8, 8, 8}, /* cost of loading SSE registers
1882 in SImode, DImode and TImode */
1883 {8, 8, 8}, /* cost of storing SSE registers
1884 in SImode, DImode and TImode */
1885 5, /* MMX or SSE register to integer */
1886 32, /* size of l1 cache. */
1887 512, /* size of l2 cache. */
1888 64, /* size of prefetch block */
1889 6, /* number of parallel prefetches */
1890 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1891 value is increased to perhaps more appropriate value of 5. */
1892 3, /* Branch cost */
1893 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1894 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1895 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1896 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1897 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1898 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1899 generic_memcpy,
1900 generic_memset,
1901 1, /* scalar_stmt_cost. */
1902 1, /* scalar load_cost. */
1903 1, /* scalar_store_cost. */
1904 1, /* vec_stmt_cost. */
1905 1, /* vec_to_scalar_cost. */
1906 1, /* scalar_to_vec_cost. */
1907 1, /* vec_align_load_cost. */
1908 2, /* vec_unalign_load_cost. */
1909 1, /* vec_store_cost. */
1910 3, /* cond_taken_branch_cost. */
1911 1, /* cond_not_taken_branch_cost. */
1912 };
1913
1914 /* core_cost should produce code tuned for Core familly of CPUs. */
1915 static stringop_algs core_memcpy[2] = {
1916 {libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}},
1917 {libcall, {{24, loop, true}, {128, rep_prefix_8_byte, true},
1918 {-1, libcall, false}}}};
1919 static stringop_algs core_memset[2] = {
1920 {libcall, {{6, loop_1_byte, true},
1921 {24, loop, true},
1922 {8192, rep_prefix_4_byte, true},
1923 {-1, libcall, false}}},
1924 {libcall, {{24, loop, true}, {512, rep_prefix_8_byte, true},
1925 {-1, libcall, false}}}};
1926
1927 static const
1928 struct processor_costs core_cost = {
1929 COSTS_N_INSNS (1), /* cost of an add instruction */
1930 /* On all chips taken into consideration lea is 2 cycles and more. With
1931 this cost however our current implementation of synth_mult results in
1932 use of unnecessary temporary registers causing regression on several
1933 SPECfp benchmarks. */
1934 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1935 COSTS_N_INSNS (1), /* variable shift costs */
1936 COSTS_N_INSNS (1), /* constant shift costs */
1937 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1938 COSTS_N_INSNS (4), /* HI */
1939 COSTS_N_INSNS (3), /* SI */
1940 COSTS_N_INSNS (4), /* DI */
1941 COSTS_N_INSNS (2)}, /* other */
1942 0, /* cost of multiply per each bit set */
1943 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1944 COSTS_N_INSNS (26), /* HI */
1945 COSTS_N_INSNS (42), /* SI */
1946 COSTS_N_INSNS (74), /* DI */
1947 COSTS_N_INSNS (74)}, /* other */
1948 COSTS_N_INSNS (1), /* cost of movsx */
1949 COSTS_N_INSNS (1), /* cost of movzx */
1950 8, /* "large" insn */
1951 17, /* MOVE_RATIO */
1952 4, /* cost for loading QImode using movzbl */
1953 {4, 4, 4}, /* cost of loading integer registers
1954 in QImode, HImode and SImode.
1955 Relative to reg-reg move (2). */
1956 {4, 4, 4}, /* cost of storing integer registers */
1957 4, /* cost of reg,reg fld/fst */
1958 {12, 12, 12}, /* cost of loading fp registers
1959 in SFmode, DFmode and XFmode */
1960 {6, 6, 8}, /* cost of storing fp registers
1961 in SFmode, DFmode and XFmode */
1962 2, /* cost of moving MMX register */
1963 {8, 8}, /* cost of loading MMX registers
1964 in SImode and DImode */
1965 {8, 8}, /* cost of storing MMX registers
1966 in SImode and DImode */
1967 2, /* cost of moving SSE register */
1968 {8, 8, 8}, /* cost of loading SSE registers
1969 in SImode, DImode and TImode */
1970 {8, 8, 8}, /* cost of storing SSE registers
1971 in SImode, DImode and TImode */
1972 5, /* MMX or SSE register to integer */
1973 64, /* size of l1 cache. */
1974 512, /* size of l2 cache. */
1975 64, /* size of prefetch block */
1976 6, /* number of parallel prefetches */
1977 /* FIXME perhaps more appropriate value is 5. */
1978 3, /* Branch cost */
1979 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1980 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1981 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1982 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1983 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1984 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1985 core_memcpy,
1986 core_memset,
1987 1, /* scalar_stmt_cost. */
1988 1, /* scalar load_cost. */
1989 1, /* scalar_store_cost. */
1990 1, /* vec_stmt_cost. */
1991 1, /* vec_to_scalar_cost. */
1992 1, /* scalar_to_vec_cost. */
1993 1, /* vec_align_load_cost. */
1994 2, /* vec_unalign_load_cost. */
1995 1, /* vec_store_cost. */
1996 3, /* cond_taken_branch_cost. */
1997 1, /* cond_not_taken_branch_cost. */
1998 };
1999
2000
2001 /* Set by -mtune. */
2002 const struct processor_costs *ix86_tune_cost = &pentium_cost;
2003
2004 /* Set by -mtune or -Os. */
2005 const struct processor_costs *ix86_cost = &pentium_cost;
2006
2007 /* Processor feature/optimization bitmasks. */
2008 #define m_386 (1<<PROCESSOR_I386)
2009 #define m_486 (1<<PROCESSOR_I486)
2010 #define m_PENT (1<<PROCESSOR_PENTIUM)
2011 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
2012 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
2013 #define m_NOCONA (1<<PROCESSOR_NOCONA)
2014 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
2015 #define m_CORE2 (1<<PROCESSOR_CORE2)
2016 #define m_NEHALEM (1<<PROCESSOR_NEHALEM)
2017 #define m_SANDYBRIDGE (1<<PROCESSOR_SANDYBRIDGE)
2018 #define m_HASWELL (1<<PROCESSOR_HASWELL)
2019 #define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL)
2020 #define m_BONNELL (1<<PROCESSOR_BONNELL)
2021 #define m_SILVERMONT (1<<PROCESSOR_SILVERMONT)
2022 #define m_INTEL (1<<PROCESSOR_INTEL)
2023
2024 #define m_GEODE (1<<PROCESSOR_GEODE)
2025 #define m_K6 (1<<PROCESSOR_K6)
2026 #define m_K6_GEODE (m_K6 | m_GEODE)
2027 #define m_K8 (1<<PROCESSOR_K8)
2028 #define m_ATHLON (1<<PROCESSOR_ATHLON)
2029 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
2030 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
2031 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
2032 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
2033 #define m_BDVER3 (1<<PROCESSOR_BDVER3)
2034 #define m_BDVER4 (1<<PROCESSOR_BDVER4)
2035 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
2036 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
2037 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
2038 #define m_BTVER (m_BTVER1 | m_BTVER2)
2039 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER)
2040
2041 #define m_GENERIC (1<<PROCESSOR_GENERIC)
2042
2043 const char* ix86_tune_feature_names[X86_TUNE_LAST] = {
2044 #undef DEF_TUNE
2045 #define DEF_TUNE(tune, name, selector) name,
2046 #include "x86-tune.def"
2047 #undef DEF_TUNE
2048 };
2049
2050 /* Feature tests against the various tunings. */
2051 unsigned char ix86_tune_features[X86_TUNE_LAST];
2052
2053 /* Feature tests against the various tunings used to create ix86_tune_features
2054 based on the processor mask. */
2055 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
2056 #undef DEF_TUNE
2057 #define DEF_TUNE(tune, name, selector) selector,
2058 #include "x86-tune.def"
2059 #undef DEF_TUNE
2060 };
2061
2062 /* Feature tests against the various architecture variations. */
2063 unsigned char ix86_arch_features[X86_ARCH_LAST];
2064
2065 /* Feature tests against the various architecture variations, used to create
2066 ix86_arch_features based on the processor mask. */
2067 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2068 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
2069 ~(m_386 | m_486 | m_PENT | m_K6),
2070
2071 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2072 ~m_386,
2073
2074 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2075 ~(m_386 | m_486),
2076
2077 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2078 ~m_386,
2079
2080 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2081 ~m_386,
2082 };
2083
2084 /* In case the average insn count for single function invocation is
2085 lower than this constant, emit fast (but longer) prologue and
2086 epilogue code. */
2087 #define FAST_PROLOGUE_INSN_COUNT 20
2088
2089 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2090 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2091 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2092 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2093
2094 /* Array of the smallest class containing reg number REGNO, indexed by
2095 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2096
2097 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2098 {
2099 /* ax, dx, cx, bx */
2100 AREG, DREG, CREG, BREG,
2101 /* si, di, bp, sp */
2102 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2103 /* FP registers */
2104 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2105 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2106 /* arg pointer */
2107 NON_Q_REGS,
2108 /* flags, fpsr, fpcr, frame */
2109 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2110 /* SSE registers */
2111 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2112 SSE_REGS, SSE_REGS,
2113 /* MMX registers */
2114 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2115 MMX_REGS, MMX_REGS,
2116 /* REX registers */
2117 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2118 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2119 /* SSE REX registers */
2120 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2121 SSE_REGS, SSE_REGS,
2122 /* AVX-512 SSE registers */
2123 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2124 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2125 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2126 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2127 /* Mask registers. */
2128 MASK_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2129 MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2130 };
2131
2132 /* The "default" register map used in 32bit mode. */
2133
2134 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2135 {
2136 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2137 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2138 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2139 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2140 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2141 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2142 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2143 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2144 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2145 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2146 };
2147
2148 /* The "default" register map used in 64bit mode. */
2149
2150 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2151 {
2152 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2153 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2154 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2155 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2156 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2157 8,9,10,11,12,13,14,15, /* extended integer registers */
2158 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2159 67, 68, 69, 70, 71, 72, 73, 74, /* AVX-512 registers 16-23 */
2160 75, 76, 77, 78, 79, 80, 81, 82, /* AVX-512 registers 24-31 */
2161 118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
2162 };
2163
2164 /* Define the register numbers to be used in Dwarf debugging information.
2165 The SVR4 reference port C compiler uses the following register numbers
2166 in its Dwarf output code:
2167 0 for %eax (gcc regno = 0)
2168 1 for %ecx (gcc regno = 2)
2169 2 for %edx (gcc regno = 1)
2170 3 for %ebx (gcc regno = 3)
2171 4 for %esp (gcc regno = 7)
2172 5 for %ebp (gcc regno = 6)
2173 6 for %esi (gcc regno = 4)
2174 7 for %edi (gcc regno = 5)
2175 The following three DWARF register numbers are never generated by
2176 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2177 believes these numbers have these meanings.
2178 8 for %eip (no gcc equivalent)
2179 9 for %eflags (gcc regno = 17)
2180 10 for %trapno (no gcc equivalent)
2181 It is not at all clear how we should number the FP stack registers
2182 for the x86 architecture. If the version of SDB on x86/svr4 were
2183 a bit less brain dead with respect to floating-point then we would
2184 have a precedent to follow with respect to DWARF register numbers
2185 for x86 FP registers, but the SDB on x86/svr4 is so completely
2186 broken with respect to FP registers that it is hardly worth thinking
2187 of it as something to strive for compatibility with.
2188 The version of x86/svr4 SDB I have at the moment does (partially)
2189 seem to believe that DWARF register number 11 is associated with
2190 the x86 register %st(0), but that's about all. Higher DWARF
2191 register numbers don't seem to be associated with anything in
2192 particular, and even for DWARF regno 11, SDB only seems to under-
2193 stand that it should say that a variable lives in %st(0) (when
2194 asked via an `=' command) if we said it was in DWARF regno 11,
2195 but SDB still prints garbage when asked for the value of the
2196 variable in question (via a `/' command).
2197 (Also note that the labels SDB prints for various FP stack regs
2198 when doing an `x' command are all wrong.)
2199 Note that these problems generally don't affect the native SVR4
2200 C compiler because it doesn't allow the use of -O with -g and
2201 because when it is *not* optimizing, it allocates a memory
2202 location for each floating-point variable, and the memory
2203 location is what gets described in the DWARF AT_location
2204 attribute for the variable in question.
2205 Regardless of the severe mental illness of the x86/svr4 SDB, we
2206 do something sensible here and we use the following DWARF
2207 register numbers. Note that these are all stack-top-relative
2208 numbers.
2209 11 for %st(0) (gcc regno = 8)
2210 12 for %st(1) (gcc regno = 9)
2211 13 for %st(2) (gcc regno = 10)
2212 14 for %st(3) (gcc regno = 11)
2213 15 for %st(4) (gcc regno = 12)
2214 16 for %st(5) (gcc regno = 13)
2215 17 for %st(6) (gcc regno = 14)
2216 18 for %st(7) (gcc regno = 15)
2217 */
2218 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2219 {
2220 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2221 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2222 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2223 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2224 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2225 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2226 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2227 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2228 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2229 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2230 };
2231
2232 /* Define parameter passing and return registers. */
2233
2234 static int const x86_64_int_parameter_registers[6] =
2235 {
2236 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2237 };
2238
2239 static int const x86_64_ms_abi_int_parameter_registers[4] =
2240 {
2241 CX_REG, DX_REG, R8_REG, R9_REG
2242 };
2243
2244 static int const x86_64_int_return_registers[4] =
2245 {
2246 AX_REG, DX_REG, DI_REG, SI_REG
2247 };
2248
2249 /* Additional registers that are clobbered by SYSV calls. */
2250
2251 int const x86_64_ms_sysv_extra_clobbered_registers[12] =
2252 {
2253 SI_REG, DI_REG,
2254 XMM6_REG, XMM7_REG,
2255 XMM8_REG, XMM9_REG, XMM10_REG, XMM11_REG,
2256 XMM12_REG, XMM13_REG, XMM14_REG, XMM15_REG
2257 };
2258
2259 /* Define the structure for the machine field in struct function. */
2260
2261 struct GTY(()) stack_local_entry {
2262 unsigned short mode;
2263 unsigned short n;
2264 rtx rtl;
2265 struct stack_local_entry *next;
2266 };
2267
2268 /* Structure describing stack frame layout.
2269 Stack grows downward:
2270
2271 [arguments]
2272 <- ARG_POINTER
2273 saved pc
2274
2275 saved static chain if ix86_static_chain_on_stack
2276
2277 saved frame pointer if frame_pointer_needed
2278 <- HARD_FRAME_POINTER
2279 [saved regs]
2280 <- regs_save_offset
2281 [padding0]
2282
2283 [saved SSE regs]
2284 <- sse_regs_save_offset
2285 [padding1] |
2286 | <- FRAME_POINTER
2287 [va_arg registers] |
2288 |
2289 [frame] |
2290 |
2291 [padding2] | = to_allocate
2292 <- STACK_POINTER
2293 */
2294 struct ix86_frame
2295 {
2296 int nsseregs;
2297 int nregs;
2298 int va_arg_size;
2299 int red_zone_size;
2300 int outgoing_arguments_size;
2301
2302 /* The offsets relative to ARG_POINTER. */
2303 HOST_WIDE_INT frame_pointer_offset;
2304 HOST_WIDE_INT hard_frame_pointer_offset;
2305 HOST_WIDE_INT stack_pointer_offset;
2306 HOST_WIDE_INT hfp_save_offset;
2307 HOST_WIDE_INT reg_save_offset;
2308 HOST_WIDE_INT sse_reg_save_offset;
2309
2310 /* When save_regs_using_mov is set, emit prologue using
2311 move instead of push instructions. */
2312 bool save_regs_using_mov;
2313 };
2314
2315 /* Which cpu are we scheduling for. */
2316 enum attr_cpu ix86_schedule;
2317
2318 /* Which cpu are we optimizing for. */
2319 enum processor_type ix86_tune;
2320
2321 /* Which instruction set architecture to use. */
2322 enum processor_type ix86_arch;
2323
2324 /* True if processor has SSE prefetch instruction. */
2325 unsigned char x86_prefetch_sse;
2326
2327 /* -mstackrealign option */
2328 static const char ix86_force_align_arg_pointer_string[]
2329 = "force_align_arg_pointer";
2330
2331 static rtx (*ix86_gen_leave) (void);
2332 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2333 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2334 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2335 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2336 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2337 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2338 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2339 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2340 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2341 static rtx (*ix86_gen_tls_global_dynamic_64) (rtx, rtx, rtx);
2342 static rtx (*ix86_gen_tls_local_dynamic_base_64) (rtx, rtx);
2343
2344 /* Preferred alignment for stack boundary in bits. */
2345 unsigned int ix86_preferred_stack_boundary;
2346
2347 /* Alignment for incoming stack boundary in bits specified at
2348 command line. */
2349 static unsigned int ix86_user_incoming_stack_boundary;
2350
2351 /* Default alignment for incoming stack boundary in bits. */
2352 static unsigned int ix86_default_incoming_stack_boundary;
2353
2354 /* Alignment for incoming stack boundary in bits. */
2355 unsigned int ix86_incoming_stack_boundary;
2356
2357 /* Calling abi specific va_list type nodes. */
2358 static GTY(()) tree sysv_va_list_type_node;
2359 static GTY(()) tree ms_va_list_type_node;
2360
2361 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2362 char internal_label_prefix[16];
2363 int internal_label_prefix_len;
2364
2365 /* Fence to use after loop using movnt. */
2366 tree x86_mfence;
2367
2368 /* Register class used for passing given 64bit part of the argument.
2369 These represent classes as documented by the PS ABI, with the exception
2370 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2371 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2372
2373 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2374 whenever possible (upper half does contain padding). */
2375 enum x86_64_reg_class
2376 {
2377 X86_64_NO_CLASS,
2378 X86_64_INTEGER_CLASS,
2379 X86_64_INTEGERSI_CLASS,
2380 X86_64_SSE_CLASS,
2381 X86_64_SSESF_CLASS,
2382 X86_64_SSEDF_CLASS,
2383 X86_64_SSEUP_CLASS,
2384 X86_64_X87_CLASS,
2385 X86_64_X87UP_CLASS,
2386 X86_64_COMPLEX_X87_CLASS,
2387 X86_64_MEMORY_CLASS
2388 };
2389
2390 #define MAX_CLASSES 8
2391
2392 /* Table of constants used by fldpi, fldln2, etc.... */
2393 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2394 static bool ext_80387_constants_init = 0;
2395
2396 \f
2397 static struct machine_function * ix86_init_machine_status (void);
2398 static rtx ix86_function_value (const_tree, const_tree, bool);
2399 static bool ix86_function_value_regno_p (const unsigned int);
2400 static unsigned int ix86_function_arg_boundary (enum machine_mode,
2401 const_tree);
2402 static rtx ix86_static_chain (const_tree, bool);
2403 static int ix86_function_regparm (const_tree, const_tree);
2404 static void ix86_compute_frame_layout (struct ix86_frame *);
2405 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
2406 rtx, rtx, int);
2407 static void ix86_add_new_builtins (HOST_WIDE_INT);
2408 static tree ix86_canonical_va_list_type (tree);
2409 static void predict_jump (int);
2410 static unsigned int split_stack_prologue_scratch_regno (void);
2411 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2412
2413 enum ix86_function_specific_strings
2414 {
2415 IX86_FUNCTION_SPECIFIC_ARCH,
2416 IX86_FUNCTION_SPECIFIC_TUNE,
2417 IX86_FUNCTION_SPECIFIC_MAX
2418 };
2419
2420 static char *ix86_target_string (HOST_WIDE_INT, int, const char *,
2421 const char *, enum fpmath_unit, bool);
2422 static void ix86_function_specific_save (struct cl_target_option *,
2423 struct gcc_options *opts);
2424 static void ix86_function_specific_restore (struct gcc_options *opts,
2425 struct cl_target_option *);
2426 static void ix86_function_specific_print (FILE *, int,
2427 struct cl_target_option *);
2428 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2429 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2430 struct gcc_options *,
2431 struct gcc_options *,
2432 struct gcc_options *);
2433 static bool ix86_can_inline_p (tree, tree);
2434 static void ix86_set_current_function (tree);
2435 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2436
2437 static enum calling_abi ix86_function_abi (const_tree);
2438
2439 \f
2440 #ifndef SUBTARGET32_DEFAULT_CPU
2441 #define SUBTARGET32_DEFAULT_CPU "i386"
2442 #endif
2443
2444 /* Whether -mtune= or -march= were specified */
2445 static int ix86_tune_defaulted;
2446 static int ix86_arch_specified;
2447
2448 /* Vectorization library interface and handlers. */
2449 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2450
2451 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2452 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2453
2454 /* Processor target table, indexed by processor number */
2455 struct ptt
2456 {
2457 const char *const name; /* processor name */
2458 const struct processor_costs *cost; /* Processor costs */
2459 const int align_loop; /* Default alignments. */
2460 const int align_loop_max_skip;
2461 const int align_jump;
2462 const int align_jump_max_skip;
2463 const int align_func;
2464 };
2465
2466 /* This table must be in sync with enum processor_type in i386.h. */
2467 static const struct ptt processor_target_table[PROCESSOR_max] =
2468 {
2469 {"generic", &generic_cost, 16, 10, 16, 10, 16},
2470 {"i386", &i386_cost, 4, 3, 4, 3, 4},
2471 {"i486", &i486_cost, 16, 15, 16, 15, 16},
2472 {"pentium", &pentium_cost, 16, 7, 16, 7, 16},
2473 {"pentiumpro", &pentiumpro_cost, 16, 15, 16, 10, 16},
2474 {"pentium4", &pentium4_cost, 0, 0, 0, 0, 0},
2475 {"nocona", &nocona_cost, 0, 0, 0, 0, 0},
2476 {"core2", &core_cost, 16, 10, 16, 10, 16},
2477 {"nehalem", &core_cost, 16, 10, 16, 10, 16},
2478 {"sandybridge", &core_cost, 16, 10, 16, 10, 16},
2479 {"haswell", &core_cost, 16, 10, 16, 10, 16},
2480 {"bonnell", &atom_cost, 16, 15, 16, 7, 16},
2481 {"silvermont", &slm_cost, 16, 15, 16, 7, 16},
2482 {"intel", &intel_cost, 16, 15, 16, 7, 16},
2483 {"geode", &geode_cost, 0, 0, 0, 0, 0},
2484 {"k6", &k6_cost, 32, 7, 32, 7, 32},
2485 {"athlon", &athlon_cost, 16, 7, 16, 7, 16},
2486 {"k8", &k8_cost, 16, 7, 16, 7, 16},
2487 {"amdfam10", &amdfam10_cost, 32, 24, 32, 7, 32},
2488 {"bdver1", &bdver1_cost, 16, 10, 16, 7, 11},
2489 {"bdver2", &bdver2_cost, 16, 10, 16, 7, 11},
2490 {"bdver3", &bdver3_cost, 16, 10, 16, 7, 11},
2491 {"bdver4", &bdver4_cost, 16, 10, 16, 7, 11},
2492 {"btver1", &btver1_cost, 16, 10, 16, 7, 11},
2493 {"btver2", &btver2_cost, 16, 10, 16, 7, 11}
2494 };
2495 \f
2496 static bool
2497 gate_insert_vzeroupper (void)
2498 {
2499 return TARGET_AVX && !TARGET_AVX512F && TARGET_VZEROUPPER;
2500 }
2501
2502 static unsigned int
2503 rest_of_handle_insert_vzeroupper (void)
2504 {
2505 int i;
2506
2507 /* vzeroupper instructions are inserted immediately after reload to
2508 account for possible spills from 256bit registers. The pass
2509 reuses mode switching infrastructure by re-running mode insertion
2510 pass, so disable entities that have already been processed. */
2511 for (i = 0; i < MAX_386_ENTITIES; i++)
2512 ix86_optimize_mode_switching[i] = 0;
2513
2514 ix86_optimize_mode_switching[AVX_U128] = 1;
2515
2516 /* Call optimize_mode_switching. */
2517 g->get_passes ()->execute_pass_mode_switching ();
2518 return 0;
2519 }
2520
2521 namespace {
2522
2523 const pass_data pass_data_insert_vzeroupper =
2524 {
2525 RTL_PASS, /* type */
2526 "vzeroupper", /* name */
2527 OPTGROUP_NONE, /* optinfo_flags */
2528 true, /* has_gate */
2529 true, /* has_execute */
2530 TV_NONE, /* tv_id */
2531 0, /* properties_required */
2532 0, /* properties_provided */
2533 0, /* properties_destroyed */
2534 0, /* todo_flags_start */
2535 ( TODO_df_finish | TODO_verify_rtl_sharing | 0 ), /* todo_flags_finish */
2536 };
2537
2538 class pass_insert_vzeroupper : public rtl_opt_pass
2539 {
2540 public:
2541 pass_insert_vzeroupper(gcc::context *ctxt)
2542 : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
2543 {}
2544
2545 /* opt_pass methods: */
2546 bool gate () { return gate_insert_vzeroupper (); }
2547 unsigned int execute () { return rest_of_handle_insert_vzeroupper (); }
2548
2549 }; // class pass_insert_vzeroupper
2550
2551 } // anon namespace
2552
2553 rtl_opt_pass *
2554 make_pass_insert_vzeroupper (gcc::context *ctxt)
2555 {
2556 return new pass_insert_vzeroupper (ctxt);
2557 }
2558
2559 /* Return true if a red-zone is in use. */
2560
2561 static inline bool
2562 ix86_using_red_zone (void)
2563 {
2564 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2565 }
2566 \f
2567 /* Return a string that documents the current -m options. The caller is
2568 responsible for freeing the string. */
2569
2570 static char *
2571 ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
2572 const char *tune, enum fpmath_unit fpmath,
2573 bool add_nl_p)
2574 {
2575 struct ix86_target_opts
2576 {
2577 const char *option; /* option string */
2578 HOST_WIDE_INT mask; /* isa mask options */
2579 };
2580
2581 /* This table is ordered so that options like -msse4.2 that imply
2582 preceding options while match those first. */
2583 static struct ix86_target_opts isa_opts[] =
2584 {
2585 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2586 { "-mfma", OPTION_MASK_ISA_FMA },
2587 { "-mxop", OPTION_MASK_ISA_XOP },
2588 { "-mlwp", OPTION_MASK_ISA_LWP },
2589 { "-mavx512f", OPTION_MASK_ISA_AVX512F },
2590 { "-mavx512er", OPTION_MASK_ISA_AVX512ER },
2591 { "-mavx512cd", OPTION_MASK_ISA_AVX512CD },
2592 { "-mavx512pf", OPTION_MASK_ISA_AVX512PF },
2593 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2594 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2595 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2596 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2597 { "-msse3", OPTION_MASK_ISA_SSE3 },
2598 { "-msse2", OPTION_MASK_ISA_SSE2 },
2599 { "-msse", OPTION_MASK_ISA_SSE },
2600 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2601 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2602 { "-mmmx", OPTION_MASK_ISA_MMX },
2603 { "-mabm", OPTION_MASK_ISA_ABM },
2604 { "-mbmi", OPTION_MASK_ISA_BMI },
2605 { "-mbmi2", OPTION_MASK_ISA_BMI2 },
2606 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
2607 { "-mhle", OPTION_MASK_ISA_HLE },
2608 { "-mfxsr", OPTION_MASK_ISA_FXSR },
2609 { "-mrdseed", OPTION_MASK_ISA_RDSEED },
2610 { "-mprfchw", OPTION_MASK_ISA_PRFCHW },
2611 { "-madx", OPTION_MASK_ISA_ADX },
2612 { "-mtbm", OPTION_MASK_ISA_TBM },
2613 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2614 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2615 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2616 { "-maes", OPTION_MASK_ISA_AES },
2617 { "-msha", OPTION_MASK_ISA_SHA },
2618 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2619 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2620 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2621 { "-mf16c", OPTION_MASK_ISA_F16C },
2622 { "-mrtm", OPTION_MASK_ISA_RTM },
2623 { "-mxsave", OPTION_MASK_ISA_XSAVE },
2624 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT },
2625 };
2626
2627 /* Flag options. */
2628 static struct ix86_target_opts flag_opts[] =
2629 {
2630 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2631 { "-mlong-double-128", MASK_LONG_DOUBLE_128 },
2632 { "-mlong-double-64", MASK_LONG_DOUBLE_64 },
2633 { "-m80387", MASK_80387 },
2634 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2635 { "-malign-double", MASK_ALIGN_DOUBLE },
2636 { "-mcld", MASK_CLD },
2637 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2638 { "-mieee-fp", MASK_IEEE_FP },
2639 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2640 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2641 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2642 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2643 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2644 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2645 { "-mno-red-zone", MASK_NO_RED_ZONE },
2646 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2647 { "-mrecip", MASK_RECIP },
2648 { "-mrtd", MASK_RTD },
2649 { "-msseregparm", MASK_SSEREGPARM },
2650 { "-mstack-arg-probe", MASK_STACK_PROBE },
2651 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2652 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2653 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2654 { "-mvzeroupper", MASK_VZEROUPPER },
2655 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
2656 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
2657 { "-mprefer-avx128", MASK_PREFER_AVX128},
2658 };
2659
2660 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2661
2662 char isa_other[40];
2663 char target_other[40];
2664 unsigned num = 0;
2665 unsigned i, j;
2666 char *ret;
2667 char *ptr;
2668 size_t len;
2669 size_t line_len;
2670 size_t sep_len;
2671 const char *abi;
2672
2673 memset (opts, '\0', sizeof (opts));
2674
2675 /* Add -march= option. */
2676 if (arch)
2677 {
2678 opts[num][0] = "-march=";
2679 opts[num++][1] = arch;
2680 }
2681
2682 /* Add -mtune= option. */
2683 if (tune)
2684 {
2685 opts[num][0] = "-mtune=";
2686 opts[num++][1] = tune;
2687 }
2688
2689 /* Add -m32/-m64/-mx32. */
2690 if ((isa & OPTION_MASK_ISA_64BIT) != 0)
2691 {
2692 if ((isa & OPTION_MASK_ABI_64) != 0)
2693 abi = "-m64";
2694 else
2695 abi = "-mx32";
2696 isa &= ~ (OPTION_MASK_ISA_64BIT
2697 | OPTION_MASK_ABI_64
2698 | OPTION_MASK_ABI_X32);
2699 }
2700 else
2701 abi = "-m32";
2702 opts[num++][0] = abi;
2703
2704 /* Pick out the options in isa options. */
2705 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2706 {
2707 if ((isa & isa_opts[i].mask) != 0)
2708 {
2709 opts[num++][0] = isa_opts[i].option;
2710 isa &= ~ isa_opts[i].mask;
2711 }
2712 }
2713
2714 if (isa && add_nl_p)
2715 {
2716 opts[num++][0] = isa_other;
2717 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)",
2718 isa);
2719 }
2720
2721 /* Add flag options. */
2722 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2723 {
2724 if ((flags & flag_opts[i].mask) != 0)
2725 {
2726 opts[num++][0] = flag_opts[i].option;
2727 flags &= ~ flag_opts[i].mask;
2728 }
2729 }
2730
2731 if (flags && add_nl_p)
2732 {
2733 opts[num++][0] = target_other;
2734 sprintf (target_other, "(other flags: %#x)", flags);
2735 }
2736
2737 /* Add -fpmath= option. */
2738 if (fpmath)
2739 {
2740 opts[num][0] = "-mfpmath=";
2741 switch ((int) fpmath)
2742 {
2743 case FPMATH_387:
2744 opts[num++][1] = "387";
2745 break;
2746
2747 case FPMATH_SSE:
2748 opts[num++][1] = "sse";
2749 break;
2750
2751 case FPMATH_387 | FPMATH_SSE:
2752 opts[num++][1] = "sse+387";
2753 break;
2754
2755 default:
2756 gcc_unreachable ();
2757 }
2758 }
2759
2760 /* Any options? */
2761 if (num == 0)
2762 return NULL;
2763
2764 gcc_assert (num < ARRAY_SIZE (opts));
2765
2766 /* Size the string. */
2767 len = 0;
2768 sep_len = (add_nl_p) ? 3 : 1;
2769 for (i = 0; i < num; i++)
2770 {
2771 len += sep_len;
2772 for (j = 0; j < 2; j++)
2773 if (opts[i][j])
2774 len += strlen (opts[i][j]);
2775 }
2776
2777 /* Build the string. */
2778 ret = ptr = (char *) xmalloc (len);
2779 line_len = 0;
2780
2781 for (i = 0; i < num; i++)
2782 {
2783 size_t len2[2];
2784
2785 for (j = 0; j < 2; j++)
2786 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2787
2788 if (i != 0)
2789 {
2790 *ptr++ = ' ';
2791 line_len++;
2792
2793 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2794 {
2795 *ptr++ = '\\';
2796 *ptr++ = '\n';
2797 line_len = 0;
2798 }
2799 }
2800
2801 for (j = 0; j < 2; j++)
2802 if (opts[i][j])
2803 {
2804 memcpy (ptr, opts[i][j], len2[j]);
2805 ptr += len2[j];
2806 line_len += len2[j];
2807 }
2808 }
2809
2810 *ptr = '\0';
2811 gcc_assert (ret + len >= ptr);
2812
2813 return ret;
2814 }
2815
2816 /* Return true, if profiling code should be emitted before
2817 prologue. Otherwise it returns false.
2818 Note: For x86 with "hotfix" it is sorried. */
2819 static bool
2820 ix86_profile_before_prologue (void)
2821 {
2822 return flag_fentry != 0;
2823 }
2824
2825 /* Function that is callable from the debugger to print the current
2826 options. */
2827 void ATTRIBUTE_UNUSED
2828 ix86_debug_options (void)
2829 {
2830 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2831 ix86_arch_string, ix86_tune_string,
2832 ix86_fpmath, true);
2833
2834 if (opts)
2835 {
2836 fprintf (stderr, "%s\n\n", opts);
2837 free (opts);
2838 }
2839 else
2840 fputs ("<no options>\n\n", stderr);
2841
2842 return;
2843 }
2844
2845 static const char *stringop_alg_names[] = {
2846 #define DEF_ENUM
2847 #define DEF_ALG(alg, name) #name,
2848 #include "stringop.def"
2849 #undef DEF_ENUM
2850 #undef DEF_ALG
2851 };
2852
2853 /* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=.
2854 The string is of the following form (or comma separated list of it):
2855
2856 strategy_alg:max_size:[align|noalign]
2857
2858 where the full size range for the strategy is either [0, max_size] or
2859 [min_size, max_size], in which min_size is the max_size + 1 of the
2860 preceding range. The last size range must have max_size == -1.
2861
2862 Examples:
2863
2864 1.
2865 -mmemcpy-strategy=libcall:-1:noalign
2866
2867 this is equivalent to (for known size memcpy) -mstringop-strategy=libcall
2868
2869
2870 2.
2871 -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign
2872
2873 This is to tell the compiler to use the following strategy for memset
2874 1) when the expected size is between [1, 16], use rep_8byte strategy;
2875 2) when the size is between [17, 2048], use vector_loop;
2876 3) when the size is > 2048, use libcall. */
2877
2878 struct stringop_size_range
2879 {
2880 int max;
2881 stringop_alg alg;
2882 bool noalign;
2883 };
2884
2885 static void
2886 ix86_parse_stringop_strategy_string (char *strategy_str, bool is_memset)
2887 {
2888 const struct stringop_algs *default_algs;
2889 stringop_size_range input_ranges[MAX_STRINGOP_ALGS];
2890 char *curr_range_str, *next_range_str;
2891 int i = 0, n = 0;
2892
2893 if (is_memset)
2894 default_algs = &ix86_cost->memset[TARGET_64BIT != 0];
2895 else
2896 default_algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
2897
2898 curr_range_str = strategy_str;
2899
2900 do
2901 {
2902 int maxs;
2903 char alg_name[128];
2904 char align[16];
2905 next_range_str = strchr (curr_range_str, ',');
2906 if (next_range_str)
2907 *next_range_str++ = '\0';
2908
2909 if (3 != sscanf (curr_range_str, "%20[^:]:%d:%10s",
2910 alg_name, &maxs, align))
2911 {
2912 error ("wrong arg %s to option %s", curr_range_str,
2913 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2914 return;
2915 }
2916
2917 if (n > 0 && (maxs < (input_ranges[n - 1].max + 1) && maxs != -1))
2918 {
2919 error ("size ranges of option %s should be increasing",
2920 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2921 return;
2922 }
2923
2924 for (i = 0; i < last_alg; i++)
2925 if (!strcmp (alg_name, stringop_alg_names[i]))
2926 break;
2927
2928 if (i == last_alg)
2929 {
2930 error ("wrong stringop strategy name %s specified for option %s",
2931 alg_name,
2932 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2933 return;
2934 }
2935
2936 input_ranges[n].max = maxs;
2937 input_ranges[n].alg = (stringop_alg) i;
2938 if (!strcmp (align, "align"))
2939 input_ranges[n].noalign = false;
2940 else if (!strcmp (align, "noalign"))
2941 input_ranges[n].noalign = true;
2942 else
2943 {
2944 error ("unknown alignment %s specified for option %s",
2945 align, is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2946 return;
2947 }
2948 n++;
2949 curr_range_str = next_range_str;
2950 }
2951 while (curr_range_str);
2952
2953 if (input_ranges[n - 1].max != -1)
2954 {
2955 error ("the max value for the last size range should be -1"
2956 " for option %s",
2957 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2958 return;
2959 }
2960
2961 if (n > MAX_STRINGOP_ALGS)
2962 {
2963 error ("too many size ranges specified in option %s",
2964 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2965 return;
2966 }
2967
2968 /* Now override the default algs array. */
2969 for (i = 0; i < n; i++)
2970 {
2971 *const_cast<int *>(&default_algs->size[i].max) = input_ranges[i].max;
2972 *const_cast<stringop_alg *>(&default_algs->size[i].alg)
2973 = input_ranges[i].alg;
2974 *const_cast<int *>(&default_algs->size[i].noalign)
2975 = input_ranges[i].noalign;
2976 }
2977 }
2978
2979 \f
2980 /* parse -mtune-ctrl= option. When DUMP is true,
2981 print the features that are explicitly set. */
2982
2983 static void
2984 parse_mtune_ctrl_str (bool dump)
2985 {
2986 if (!ix86_tune_ctrl_string)
2987 return;
2988
2989 char *next_feature_string = NULL;
2990 char *curr_feature_string = xstrdup (ix86_tune_ctrl_string);
2991 char *orig = curr_feature_string;
2992 int i;
2993 do
2994 {
2995 bool clear = false;
2996
2997 next_feature_string = strchr (curr_feature_string, ',');
2998 if (next_feature_string)
2999 *next_feature_string++ = '\0';
3000 if (*curr_feature_string == '^')
3001 {
3002 curr_feature_string++;
3003 clear = true;
3004 }
3005 for (i = 0; i < X86_TUNE_LAST; i++)
3006 {
3007 if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
3008 {
3009 ix86_tune_features[i] = !clear;
3010 if (dump)
3011 fprintf (stderr, "Explicitly %s feature %s\n",
3012 clear ? "clear" : "set", ix86_tune_feature_names[i]);
3013 break;
3014 }
3015 }
3016 if (i == X86_TUNE_LAST)
3017 error ("Unknown parameter to option -mtune-ctrl: %s",
3018 clear ? curr_feature_string - 1 : curr_feature_string);
3019 curr_feature_string = next_feature_string;
3020 }
3021 while (curr_feature_string);
3022 free (orig);
3023 }
3024
3025 /* Helper function to set ix86_tune_features. IX86_TUNE is the
3026 processor type. */
3027
3028 static void
3029 set_ix86_tune_features (enum processor_type ix86_tune, bool dump)
3030 {
3031 unsigned int ix86_tune_mask = 1u << ix86_tune;
3032 int i;
3033
3034 for (i = 0; i < X86_TUNE_LAST; ++i)
3035 {
3036 if (ix86_tune_no_default)
3037 ix86_tune_features[i] = 0;
3038 else
3039 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3040 }
3041
3042 if (dump)
3043 {
3044 fprintf (stderr, "List of x86 specific tuning parameter names:\n");
3045 for (i = 0; i < X86_TUNE_LAST; i++)
3046 fprintf (stderr, "%s : %s\n", ix86_tune_feature_names[i],
3047 ix86_tune_features[i] ? "on" : "off");
3048 }
3049
3050 parse_mtune_ctrl_str (dump);
3051 }
3052
3053
3054 /* Override various settings based on options. If MAIN_ARGS_P, the
3055 options are from the command line, otherwise they are from
3056 attributes. */
3057
3058 static void
3059 ix86_option_override_internal (bool main_args_p,
3060 struct gcc_options *opts,
3061 struct gcc_options *opts_set)
3062 {
3063 int i;
3064 unsigned int ix86_arch_mask;
3065 const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL);
3066 const char *prefix;
3067 const char *suffix;
3068 const char *sw;
3069
3070 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
3071 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
3072 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
3073 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
3074 #define PTA_AES (HOST_WIDE_INT_1 << 4)
3075 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
3076 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
3077 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
3078 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
3079 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
3080 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
3081 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
3082 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
3083 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
3084 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
3085 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
3086 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
3087 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
3088 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
3089 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
3090 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
3091 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
3092 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
3093 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
3094 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
3095 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
3096 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
3097 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
3098 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
3099 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
3100 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
3101 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
3102 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
3103 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
3104 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
3105 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
3106 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
3107 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
3108 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
3109 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
3110 #define PTA_AVX512F (HOST_WIDE_INT_1 << 40)
3111 #define PTA_AVX512ER (HOST_WIDE_INT_1 << 41)
3112 #define PTA_AVX512PF (HOST_WIDE_INT_1 << 42)
3113 #define PTA_AVX512CD (HOST_WIDE_INT_1 << 43)
3114 #define PTA_SHA (HOST_WIDE_INT_1 << 45)
3115
3116 #define PTA_CORE2 \
3117 (PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \
3118 | PTA_CX16 | PTA_FXSR)
3119 #define PTA_NEHALEM \
3120 (PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_POPCNT)
3121 #define PTA_WESTMERE \
3122 (PTA_NEHALEM | PTA_AES | PTA_PCLMUL)
3123 #define PTA_SANDYBRIDGE \
3124 (PTA_WESTMERE | PTA_AVX | PTA_XSAVE | PTA_XSAVEOPT)
3125 #define PTA_IVYBRIDGE \
3126 (PTA_SANDYBRIDGE | PTA_FSGSBASE | PTA_RDRND | PTA_F16C)
3127 #define PTA_HASWELL \
3128 (PTA_IVYBRIDGE | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_LZCNT \
3129 | PTA_FMA | PTA_MOVBE | PTA_RTM | PTA_HLE)
3130 #define PTA_BROADWELL \
3131 (PTA_HASWELL | PTA_ADX | PTA_PRFCHW | PTA_RDSEED)
3132 #define PTA_BONNELL \
3133 (PTA_CORE2 | PTA_MOVBE)
3134 #define PTA_SILVERMONT \
3135 (PTA_WESTMERE | PTA_MOVBE)
3136
3137 /* if this reaches 64, need to widen struct pta flags below */
3138
3139 static struct pta
3140 {
3141 const char *const name; /* processor name or nickname. */
3142 const enum processor_type processor;
3143 const enum attr_cpu schedule;
3144 const unsigned HOST_WIDE_INT flags;
3145 }
3146 const processor_alias_table[] =
3147 {
3148 {"i386", PROCESSOR_I386, CPU_NONE, 0},
3149 {"i486", PROCESSOR_I486, CPU_NONE, 0},
3150 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3151 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3152 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
3153 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
3154 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3155 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3156 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3157 PTA_MMX | PTA_SSE | PTA_FXSR},
3158 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3159 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3160 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_FXSR},
3161 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3162 PTA_MMX | PTA_SSE | PTA_FXSR},
3163 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3164 PTA_MMX | PTA_SSE | PTA_FXSR},
3165 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3166 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3167 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
3168 PTA_MMX |PTA_SSE | PTA_SSE2 | PTA_FXSR},
3169 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
3170 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3171 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
3172 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3173 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
3174 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3175 | PTA_CX16 | PTA_NO_SAHF | PTA_FXSR},
3176 {"core2", PROCESSOR_CORE2, CPU_CORE2, PTA_CORE2},
3177 {"nehalem", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3178 {"corei7", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3179 {"westmere", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_WESTMERE},
3180 {"sandybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3181 PTA_SANDYBRIDGE},
3182 {"corei7-avx", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3183 PTA_SANDYBRIDGE},
3184 {"ivybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3185 PTA_IVYBRIDGE},
3186 {"core-avx-i", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3187 PTA_IVYBRIDGE},
3188 {"haswell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3189 {"core-avx2", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3190 {"broadwell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_BROADWELL},
3191 {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3192 {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3193 {"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3194 {"slm", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3195 {"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM},
3196 {"geode", PROCESSOR_GEODE, CPU_GEODE,
3197 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3198 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
3199 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3200 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3201 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3202 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3203 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3204 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3205 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3206 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3207 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3208 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3209 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3210 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3211 {"x86-64", PROCESSOR_K8, CPU_K8,
3212 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3213 {"k8", PROCESSOR_K8, CPU_K8,
3214 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3215 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3216 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3217 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3218 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3219 {"opteron", PROCESSOR_K8, CPU_K8,
3220 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3221 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3222 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3223 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3224 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3225 {"athlon64", PROCESSOR_K8, CPU_K8,
3226 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3227 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3228 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3229 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3230 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3231 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3232 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3233 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3234 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3235 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3236 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3237 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3238 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3239 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3240 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3241 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3242 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3243 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3244 | PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3245 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
3246 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3247 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3248 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3249 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3250 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3251 {"bdver3", PROCESSOR_BDVER3, CPU_BDVER3,
3252 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3253 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3254 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3255 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3256 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE
3257 | PTA_XSAVEOPT | PTA_FSGSBASE},
3258 {"bdver4", PROCESSOR_BDVER4, CPU_BDVER4,
3259 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3260 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3261 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
3262 | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2
3263 | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR
3264 | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE},
3265 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
3266 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3267 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW
3268 | PTA_FXSR | PTA_XSAVE},
3269 {"btver2", PROCESSOR_BTVER2, CPU_BTVER2,
3270 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3271 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_SSE4_1
3272 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3273 | PTA_BMI | PTA_F16C | PTA_MOVBE | PTA_PRFCHW
3274 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT},
3275
3276 {"generic", PROCESSOR_GENERIC, CPU_GENERIC,
3277 PTA_64BIT
3278 | PTA_HLE /* flags are only used for -march switch. */ },
3279 };
3280
3281 /* -mrecip options. */
3282 static struct
3283 {
3284 const char *string; /* option name */
3285 unsigned int mask; /* mask bits to set */
3286 }
3287 const recip_options[] =
3288 {
3289 { "all", RECIP_MASK_ALL },
3290 { "none", RECIP_MASK_NONE },
3291 { "div", RECIP_MASK_DIV },
3292 { "sqrt", RECIP_MASK_SQRT },
3293 { "vec-div", RECIP_MASK_VEC_DIV },
3294 { "vec-sqrt", RECIP_MASK_VEC_SQRT },
3295 };
3296
3297 int const pta_size = ARRAY_SIZE (processor_alias_table);
3298
3299 /* Set up prefix/suffix so the error messages refer to either the command
3300 line argument, or the attribute(target). */
3301 if (main_args_p)
3302 {
3303 prefix = "-m";
3304 suffix = "";
3305 sw = "switch";
3306 }
3307 else
3308 {
3309 prefix = "option(\"";
3310 suffix = "\")";
3311 sw = "attribute";
3312 }
3313
3314 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3315 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3316 if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3317 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ABI_64 | OPTION_MASK_ABI_X32);
3318 #ifdef TARGET_BI_ARCH
3319 else
3320 {
3321 #if TARGET_BI_ARCH == 1
3322 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3323 is on and OPTION_MASK_ABI_X32 is off. We turn off
3324 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3325 -mx32. */
3326 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3327 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3328 #else
3329 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3330 on and OPTION_MASK_ABI_64 is off. We turn off
3331 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3332 -m64. */
3333 if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3334 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3335 #endif
3336 }
3337 #endif
3338
3339 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3340 {
3341 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3342 OPTION_MASK_ABI_64 for TARGET_X32. */
3343 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3344 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3345 }
3346 else if (TARGET_16BIT_P (opts->x_ix86_isa_flags))
3347 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ISA_64BIT
3348 | OPTION_MASK_ABI_X32
3349 | OPTION_MASK_ABI_64);
3350 else if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3351 {
3352 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3353 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3354 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3355 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3356 }
3357
3358 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3359 SUBTARGET_OVERRIDE_OPTIONS;
3360 #endif
3361
3362 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3363 SUBSUBTARGET_OVERRIDE_OPTIONS;
3364 #endif
3365
3366 /* -fPIC is the default for x86_64. */
3367 if (TARGET_MACHO && TARGET_64BIT_P (opts->x_ix86_isa_flags))
3368 opts->x_flag_pic = 2;
3369
3370 /* Need to check -mtune=generic first. */
3371 if (opts->x_ix86_tune_string)
3372 {
3373 /* As special support for cross compilers we read -mtune=native
3374 as -mtune=generic. With native compilers we won't see the
3375 -mtune=native, as it was changed by the driver. */
3376 if (!strcmp (opts->x_ix86_tune_string, "native"))
3377 {
3378 opts->x_ix86_tune_string = "generic";
3379 }
3380 else if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3381 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3382 "%stune=k8%s or %stune=generic%s instead as appropriate",
3383 prefix, suffix, prefix, suffix, prefix, suffix);
3384 }
3385 else
3386 {
3387 if (opts->x_ix86_arch_string)
3388 opts->x_ix86_tune_string = opts->x_ix86_arch_string;
3389 if (!opts->x_ix86_tune_string)
3390 {
3391 opts->x_ix86_tune_string
3392 = processor_target_table[TARGET_CPU_DEFAULT].name;
3393 ix86_tune_defaulted = 1;
3394 }
3395
3396 /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string
3397 or defaulted. We need to use a sensible tune option. */
3398 if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3399 {
3400 opts->x_ix86_tune_string = "generic";
3401 }
3402 }
3403
3404 if (opts->x_ix86_stringop_alg == rep_prefix_8_byte
3405 && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3406 {
3407 /* rep; movq isn't available in 32-bit code. */
3408 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3409 opts->x_ix86_stringop_alg = no_stringop;
3410 }
3411
3412 if (!opts->x_ix86_arch_string)
3413 opts->x_ix86_arch_string
3414 = TARGET_64BIT_P (opts->x_ix86_isa_flags)
3415 ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3416 else
3417 ix86_arch_specified = 1;
3418
3419 if (opts_set->x_ix86_pmode)
3420 {
3421 if ((TARGET_LP64_P (opts->x_ix86_isa_flags)
3422 && opts->x_ix86_pmode == PMODE_SI)
3423 || (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
3424 && opts->x_ix86_pmode == PMODE_DI))
3425 error ("address mode %qs not supported in the %s bit mode",
3426 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "short" : "long",
3427 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "64" : "32");
3428 }
3429 else
3430 opts->x_ix86_pmode = TARGET_LP64_P (opts->x_ix86_isa_flags)
3431 ? PMODE_DI : PMODE_SI;
3432
3433 if (!opts_set->x_ix86_abi)
3434 opts->x_ix86_abi = DEFAULT_ABI;
3435
3436 /* For targets using ms ABI enable ms-extensions, if not
3437 explicit turned off. For non-ms ABI we turn off this
3438 option. */
3439 if (!opts_set->x_flag_ms_extensions)
3440 opts->x_flag_ms_extensions = (MS_ABI == DEFAULT_ABI);
3441
3442 if (opts_set->x_ix86_cmodel)
3443 {
3444 switch (opts->x_ix86_cmodel)
3445 {
3446 case CM_SMALL:
3447 case CM_SMALL_PIC:
3448 if (opts->x_flag_pic)
3449 opts->x_ix86_cmodel = CM_SMALL_PIC;
3450 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3451 error ("code model %qs not supported in the %s bit mode",
3452 "small", "32");
3453 break;
3454
3455 case CM_MEDIUM:
3456 case CM_MEDIUM_PIC:
3457 if (opts->x_flag_pic)
3458 opts->x_ix86_cmodel = CM_MEDIUM_PIC;
3459 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3460 error ("code model %qs not supported in the %s bit mode",
3461 "medium", "32");
3462 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3463 error ("code model %qs not supported in x32 mode",
3464 "medium");
3465 break;
3466
3467 case CM_LARGE:
3468 case CM_LARGE_PIC:
3469 if (opts->x_flag_pic)
3470 opts->x_ix86_cmodel = CM_LARGE_PIC;
3471 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3472 error ("code model %qs not supported in the %s bit mode",
3473 "large", "32");
3474 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3475 error ("code model %qs not supported in x32 mode",
3476 "large");
3477 break;
3478
3479 case CM_32:
3480 if (opts->x_flag_pic)
3481 error ("code model %s does not support PIC mode", "32");
3482 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3483 error ("code model %qs not supported in the %s bit mode",
3484 "32", "64");
3485 break;
3486
3487 case CM_KERNEL:
3488 if (opts->x_flag_pic)
3489 {
3490 error ("code model %s does not support PIC mode", "kernel");
3491 opts->x_ix86_cmodel = CM_32;
3492 }
3493 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3494 error ("code model %qs not supported in the %s bit mode",
3495 "kernel", "32");
3496 break;
3497
3498 default:
3499 gcc_unreachable ();
3500 }
3501 }
3502 else
3503 {
3504 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3505 use of rip-relative addressing. This eliminates fixups that
3506 would otherwise be needed if this object is to be placed in a
3507 DLL, and is essentially just as efficient as direct addressing. */
3508 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3509 && (TARGET_RDOS || TARGET_PECOFF))
3510 opts->x_ix86_cmodel = CM_MEDIUM_PIC, opts->x_flag_pic = 1;
3511 else if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3512 opts->x_ix86_cmodel = opts->x_flag_pic ? CM_SMALL_PIC : CM_SMALL;
3513 else
3514 opts->x_ix86_cmodel = CM_32;
3515 }
3516 if (TARGET_MACHO && opts->x_ix86_asm_dialect == ASM_INTEL)
3517 {
3518 error ("-masm=intel not supported in this configuration");
3519 opts->x_ix86_asm_dialect = ASM_ATT;
3520 }
3521 if ((TARGET_64BIT_P (opts->x_ix86_isa_flags) != 0)
3522 != ((opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3523 sorry ("%i-bit mode not compiled in",
3524 (opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3525
3526 for (i = 0; i < pta_size; i++)
3527 if (! strcmp (opts->x_ix86_arch_string, processor_alias_table[i].name))
3528 {
3529 ix86_schedule = processor_alias_table[i].schedule;
3530 ix86_arch = processor_alias_table[i].processor;
3531 /* Default cpu tuning to the architecture. */
3532 ix86_tune = ix86_arch;
3533
3534 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3535 && !(processor_alias_table[i].flags & PTA_64BIT))
3536 error ("CPU you selected does not support x86-64 "
3537 "instruction set");
3538
3539 if (processor_alias_table[i].flags & PTA_MMX
3540 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3541 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3542 if (processor_alias_table[i].flags & PTA_3DNOW
3543 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3544 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3545 if (processor_alias_table[i].flags & PTA_3DNOW_A
3546 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3547 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3548 if (processor_alias_table[i].flags & PTA_SSE
3549 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3550 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3551 if (processor_alias_table[i].flags & PTA_SSE2
3552 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3553 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3554 if (processor_alias_table[i].flags & PTA_SSE3
3555 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3556 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3557 if (processor_alias_table[i].flags & PTA_SSSE3
3558 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3559 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3560 if (processor_alias_table[i].flags & PTA_SSE4_1
3561 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3562 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3563 if (processor_alias_table[i].flags & PTA_SSE4_2
3564 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3565 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3566 if (processor_alias_table[i].flags & PTA_AVX
3567 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3568 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3569 if (processor_alias_table[i].flags & PTA_AVX2
3570 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
3571 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
3572 if (processor_alias_table[i].flags & PTA_FMA
3573 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3574 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3575 if (processor_alias_table[i].flags & PTA_SSE4A
3576 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3577 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3578 if (processor_alias_table[i].flags & PTA_FMA4
3579 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3580 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3581 if (processor_alias_table[i].flags & PTA_XOP
3582 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3583 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3584 if (processor_alias_table[i].flags & PTA_LWP
3585 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3586 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3587 if (processor_alias_table[i].flags & PTA_ABM
3588 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3589 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3590 if (processor_alias_table[i].flags & PTA_BMI
3591 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3592 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3593 if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
3594 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
3595 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
3596 if (processor_alias_table[i].flags & PTA_TBM
3597 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3598 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3599 if (processor_alias_table[i].flags & PTA_BMI2
3600 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
3601 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
3602 if (processor_alias_table[i].flags & PTA_CX16
3603 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3604 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3605 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3606 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3607 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3608 if (!(TARGET_64BIT_P (opts->x_ix86_isa_flags)
3609 && (processor_alias_table[i].flags & PTA_NO_SAHF))
3610 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3611 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3612 if (processor_alias_table[i].flags & PTA_MOVBE
3613 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3614 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3615 if (processor_alias_table[i].flags & PTA_AES
3616 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3617 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3618 if (processor_alias_table[i].flags & PTA_SHA
3619 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA))
3620 ix86_isa_flags |= OPTION_MASK_ISA_SHA;
3621 if (processor_alias_table[i].flags & PTA_PCLMUL
3622 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3623 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3624 if (processor_alias_table[i].flags & PTA_FSGSBASE
3625 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3626 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3627 if (processor_alias_table[i].flags & PTA_RDRND
3628 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3629 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3630 if (processor_alias_table[i].flags & PTA_F16C
3631 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3632 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3633 if (processor_alias_table[i].flags & PTA_RTM
3634 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RTM))
3635 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RTM;
3636 if (processor_alias_table[i].flags & PTA_HLE
3637 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_HLE))
3638 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_HLE;
3639 if (processor_alias_table[i].flags & PTA_PRFCHW
3640 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PRFCHW))
3641 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW;
3642 if (processor_alias_table[i].flags & PTA_RDSEED
3643 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDSEED))
3644 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDSEED;
3645 if (processor_alias_table[i].flags & PTA_ADX
3646 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ADX))
3647 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ADX;
3648 if (processor_alias_table[i].flags & PTA_FXSR
3649 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FXSR))
3650 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FXSR;
3651 if (processor_alias_table[i].flags & PTA_XSAVE
3652 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVE))
3653 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVE;
3654 if (processor_alias_table[i].flags & PTA_XSAVEOPT
3655 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT))
3656 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT;
3657 if (processor_alias_table[i].flags & PTA_AVX512F
3658 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F))
3659 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F;
3660 if (processor_alias_table[i].flags & PTA_AVX512ER
3661 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512ER))
3662 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER;
3663 if (processor_alias_table[i].flags & PTA_AVX512PF
3664 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512PF))
3665 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF;
3666 if (processor_alias_table[i].flags & PTA_AVX512CD
3667 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512CD))
3668 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD;
3669 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3670 x86_prefetch_sse = true;
3671
3672 break;
3673 }
3674
3675 if (!strcmp (opts->x_ix86_arch_string, "generic"))
3676 error ("generic CPU can be used only for %stune=%s %s",
3677 prefix, suffix, sw);
3678 else if (!strcmp (opts->x_ix86_arch_string, "intel"))
3679 error ("intel CPU can be used only for %stune=%s %s",
3680 prefix, suffix, sw);
3681 else if (i == pta_size)
3682 error ("bad value (%s) for %sarch=%s %s",
3683 opts->x_ix86_arch_string, prefix, suffix, sw);
3684
3685 ix86_arch_mask = 1u << ix86_arch;
3686 for (i = 0; i < X86_ARCH_LAST; ++i)
3687 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3688
3689 for (i = 0; i < pta_size; i++)
3690 if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name))
3691 {
3692 ix86_schedule = processor_alias_table[i].schedule;
3693 ix86_tune = processor_alias_table[i].processor;
3694 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3695 {
3696 if (!(processor_alias_table[i].flags & PTA_64BIT))
3697 {
3698 if (ix86_tune_defaulted)
3699 {
3700 opts->x_ix86_tune_string = "x86-64";
3701 for (i = 0; i < pta_size; i++)
3702 if (! strcmp (opts->x_ix86_tune_string,
3703 processor_alias_table[i].name))
3704 break;
3705 ix86_schedule = processor_alias_table[i].schedule;
3706 ix86_tune = processor_alias_table[i].processor;
3707 }
3708 else
3709 error ("CPU you selected does not support x86-64 "
3710 "instruction set");
3711 }
3712 }
3713 /* Intel CPUs have always interpreted SSE prefetch instructions as
3714 NOPs; so, we can enable SSE prefetch instructions even when
3715 -mtune (rather than -march) points us to a processor that has them.
3716 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3717 higher processors. */
3718 if (TARGET_CMOV
3719 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3720 x86_prefetch_sse = true;
3721 break;
3722 }
3723
3724 if (ix86_tune_specified && i == pta_size)
3725 error ("bad value (%s) for %stune=%s %s",
3726 opts->x_ix86_tune_string, prefix, suffix, sw);
3727
3728 set_ix86_tune_features (ix86_tune, opts->x_ix86_dump_tunes);
3729
3730 #ifndef USE_IX86_FRAME_POINTER
3731 #define USE_IX86_FRAME_POINTER 0
3732 #endif
3733
3734 #ifndef USE_X86_64_FRAME_POINTER
3735 #define USE_X86_64_FRAME_POINTER 0
3736 #endif
3737
3738 /* Set the default values for switches whose default depends on TARGET_64BIT
3739 in case they weren't overwritten by command line options. */
3740 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3741 {
3742 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3743 opts->x_flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3744 if (opts->x_flag_asynchronous_unwind_tables
3745 && !opts_set->x_flag_unwind_tables
3746 && TARGET_64BIT_MS_ABI)
3747 opts->x_flag_unwind_tables = 1;
3748 if (opts->x_flag_asynchronous_unwind_tables == 2)
3749 opts->x_flag_unwind_tables
3750 = opts->x_flag_asynchronous_unwind_tables = 1;
3751 if (opts->x_flag_pcc_struct_return == 2)
3752 opts->x_flag_pcc_struct_return = 0;
3753 }
3754 else
3755 {
3756 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3757 opts->x_flag_omit_frame_pointer
3758 = !(USE_IX86_FRAME_POINTER || opts->x_optimize_size);
3759 if (opts->x_flag_asynchronous_unwind_tables == 2)
3760 opts->x_flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3761 if (opts->x_flag_pcc_struct_return == 2)
3762 opts->x_flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3763 }
3764
3765 ix86_tune_cost = processor_target_table[ix86_tune].cost;
3766 if (opts->x_optimize_size)
3767 ix86_cost = &ix86_size_cost;
3768 else
3769 ix86_cost = ix86_tune_cost;
3770
3771 /* Arrange to set up i386_stack_locals for all functions. */
3772 init_machine_status = ix86_init_machine_status;
3773
3774 /* Validate -mregparm= value. */
3775 if (opts_set->x_ix86_regparm)
3776 {
3777 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3778 warning (0, "-mregparm is ignored in 64-bit mode");
3779 if (opts->x_ix86_regparm > REGPARM_MAX)
3780 {
3781 error ("-mregparm=%d is not between 0 and %d",
3782 opts->x_ix86_regparm, REGPARM_MAX);
3783 opts->x_ix86_regparm = 0;
3784 }
3785 }
3786 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3787 opts->x_ix86_regparm = REGPARM_MAX;
3788
3789 /* Default align_* from the processor table. */
3790 if (opts->x_align_loops == 0)
3791 {
3792 opts->x_align_loops = processor_target_table[ix86_tune].align_loop;
3793 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3794 }
3795 if (opts->x_align_jumps == 0)
3796 {
3797 opts->x_align_jumps = processor_target_table[ix86_tune].align_jump;
3798 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3799 }
3800 if (opts->x_align_functions == 0)
3801 {
3802 opts->x_align_functions = processor_target_table[ix86_tune].align_func;
3803 }
3804
3805 /* Provide default for -mbranch-cost= value. */
3806 if (!opts_set->x_ix86_branch_cost)
3807 opts->x_ix86_branch_cost = ix86_cost->branch_cost;
3808
3809 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3810 {
3811 opts->x_target_flags
3812 |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags;
3813
3814 /* Enable by default the SSE and MMX builtins. Do allow the user to
3815 explicitly disable any of these. In particular, disabling SSE and
3816 MMX for kernel code is extremely useful. */
3817 if (!ix86_arch_specified)
3818 opts->x_ix86_isa_flags
3819 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3820 | TARGET_SUBTARGET64_ISA_DEFAULT)
3821 & ~opts->x_ix86_isa_flags_explicit);
3822
3823 if (TARGET_RTD_P (opts->x_target_flags))
3824 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3825 }
3826 else
3827 {
3828 opts->x_target_flags
3829 |= TARGET_SUBTARGET32_DEFAULT & ~opts_set->x_target_flags;
3830
3831 if (!ix86_arch_specified)
3832 opts->x_ix86_isa_flags
3833 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit;
3834
3835 /* i386 ABI does not specify red zone. It still makes sense to use it
3836 when programmer takes care to stack from being destroyed. */
3837 if (!(opts_set->x_target_flags & MASK_NO_RED_ZONE))
3838 opts->x_target_flags |= MASK_NO_RED_ZONE;
3839 }
3840
3841 /* Keep nonleaf frame pointers. */
3842 if (opts->x_flag_omit_frame_pointer)
3843 opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3844 else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags))
3845 opts->x_flag_omit_frame_pointer = 1;
3846
3847 /* If we're doing fast math, we don't care about comparison order
3848 wrt NaNs. This lets us use a shorter comparison sequence. */
3849 if (opts->x_flag_finite_math_only)
3850 opts->x_target_flags &= ~MASK_IEEE_FP;
3851
3852 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3853 since the insns won't need emulation. */
3854 if (ix86_tune_features [X86_TUNE_ALWAYS_FANCY_MATH_387])
3855 opts->x_target_flags &= ~MASK_NO_FANCY_MATH_387;
3856
3857 /* Likewise, if the target doesn't have a 387, or we've specified
3858 software floating point, don't use 387 inline intrinsics. */
3859 if (!TARGET_80387_P (opts->x_target_flags))
3860 opts->x_target_flags |= MASK_NO_FANCY_MATH_387;
3861
3862 /* Turn on MMX builtins for -msse. */
3863 if (TARGET_SSE_P (opts->x_ix86_isa_flags))
3864 opts->x_ix86_isa_flags
3865 |= OPTION_MASK_ISA_MMX & ~opts->x_ix86_isa_flags_explicit;
3866
3867 /* Enable SSE prefetch. */
3868 if (TARGET_SSE_P (opts->x_ix86_isa_flags)
3869 || (TARGET_PRFCHW && !TARGET_3DNOW_P (opts->x_ix86_isa_flags)))
3870 x86_prefetch_sse = true;
3871
3872 /* Enable prefetch{,w} instructions for -m3dnow. */
3873 if (TARGET_3DNOW_P (opts->x_ix86_isa_flags))
3874 opts->x_ix86_isa_flags
3875 |= OPTION_MASK_ISA_PRFCHW & ~opts->x_ix86_isa_flags_explicit;
3876
3877 /* Enable popcnt instruction for -msse4.2 or -mabm. */
3878 if (TARGET_SSE4_2_P (opts->x_ix86_isa_flags)
3879 || TARGET_ABM_P (opts->x_ix86_isa_flags))
3880 opts->x_ix86_isa_flags
3881 |= OPTION_MASK_ISA_POPCNT & ~opts->x_ix86_isa_flags_explicit;
3882
3883 /* Enable lzcnt instruction for -mabm. */
3884 if (TARGET_ABM_P(opts->x_ix86_isa_flags))
3885 opts->x_ix86_isa_flags
3886 |= OPTION_MASK_ISA_LZCNT & ~opts->x_ix86_isa_flags_explicit;
3887
3888 /* Validate -mpreferred-stack-boundary= value or default it to
3889 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3890 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3891 if (opts_set->x_ix86_preferred_stack_boundary_arg)
3892 {
3893 int min = (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3894 ? (TARGET_SSE_P (opts->x_ix86_isa_flags) ? 4 : 3) : 2);
3895 int max = (TARGET_SEH ? 4 : 12);
3896
3897 if (opts->x_ix86_preferred_stack_boundary_arg < min
3898 || opts->x_ix86_preferred_stack_boundary_arg > max)
3899 {
3900 if (min == max)
3901 error ("-mpreferred-stack-boundary is not supported "
3902 "for this target");
3903 else
3904 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3905 opts->x_ix86_preferred_stack_boundary_arg, min, max);
3906 }
3907 else
3908 ix86_preferred_stack_boundary
3909 = (1 << opts->x_ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
3910 }
3911
3912 /* Set the default value for -mstackrealign. */
3913 if (opts->x_ix86_force_align_arg_pointer == -1)
3914 opts->x_ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3915
3916 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3917
3918 /* Validate -mincoming-stack-boundary= value or default it to
3919 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3920 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3921 if (opts_set->x_ix86_incoming_stack_boundary_arg)
3922 {
3923 if (opts->x_ix86_incoming_stack_boundary_arg
3924 < (TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2)
3925 || opts->x_ix86_incoming_stack_boundary_arg > 12)
3926 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3927 opts->x_ix86_incoming_stack_boundary_arg,
3928 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2);
3929 else
3930 {
3931 ix86_user_incoming_stack_boundary
3932 = (1 << opts->x_ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
3933 ix86_incoming_stack_boundary
3934 = ix86_user_incoming_stack_boundary;
3935 }
3936 }
3937
3938 /* Accept -msseregparm only if at least SSE support is enabled. */
3939 if (TARGET_SSEREGPARM_P (opts->x_target_flags)
3940 && ! TARGET_SSE_P (opts->x_ix86_isa_flags))
3941 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3942
3943 if (opts_set->x_ix86_fpmath)
3944 {
3945 if (opts->x_ix86_fpmath & FPMATH_SSE)
3946 {
3947 if (!TARGET_SSE_P (opts->x_ix86_isa_flags))
3948 {
3949 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3950 opts->x_ix86_fpmath = FPMATH_387;
3951 }
3952 else if ((opts->x_ix86_fpmath & FPMATH_387)
3953 && !TARGET_80387_P (opts->x_target_flags))
3954 {
3955 warning (0, "387 instruction set disabled, using SSE arithmetics");
3956 opts->x_ix86_fpmath = FPMATH_SSE;
3957 }
3958 }
3959 }
3960 /* For all chips supporting SSE2, -mfpmath=sse performs better than
3961 fpmath=387. The second is however default at many targets since the
3962 extra 80bit precision of temporaries is considered to be part of ABI.
3963 Overwrite the default at least for -ffast-math.
3964 TODO: -mfpmath=both seems to produce same performing code with bit
3965 smaller binaries. It is however not clear if register allocation is
3966 ready for this setting.
3967 Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE
3968 codegen. We may switch to 387 with -ffast-math for size optimized
3969 functions. */
3970 else if (fast_math_flags_set_p (&global_options)
3971 && TARGET_SSE2_P (opts->x_ix86_isa_flags))
3972 opts->x_ix86_fpmath = FPMATH_SSE;
3973 else
3974 opts->x_ix86_fpmath = TARGET_FPMATH_DEFAULT_P (opts->x_ix86_isa_flags);
3975
3976 /* If the i387 is disabled, then do not return values in it. */
3977 if (!TARGET_80387_P (opts->x_target_flags))
3978 opts->x_target_flags &= ~MASK_FLOAT_RETURNS;
3979
3980 /* Use external vectorized library in vectorizing intrinsics. */
3981 if (opts_set->x_ix86_veclibabi_type)
3982 switch (opts->x_ix86_veclibabi_type)
3983 {
3984 case ix86_veclibabi_type_svml:
3985 ix86_veclib_handler = ix86_veclibabi_svml;
3986 break;
3987
3988 case ix86_veclibabi_type_acml:
3989 ix86_veclib_handler = ix86_veclibabi_acml;
3990 break;
3991
3992 default:
3993 gcc_unreachable ();
3994 }
3995
3996 if (ix86_tune_features [X86_TUNE_ACCUMULATE_OUTGOING_ARGS]
3997 && !(opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
3998 && !opts->x_optimize_size)
3999 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4000
4001 /* If stack probes are required, the space used for large function
4002 arguments on the stack must also be probed, so enable
4003 -maccumulate-outgoing-args so this happens in the prologue. */
4004 if (TARGET_STACK_PROBE_P (opts->x_target_flags)
4005 && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4006 {
4007 if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4008 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
4009 "for correctness", prefix, suffix);
4010 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4011 }
4012
4013 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
4014 {
4015 char *p;
4016 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
4017 p = strchr (internal_label_prefix, 'X');
4018 internal_label_prefix_len = p - internal_label_prefix;
4019 *p = '\0';
4020 }
4021
4022 /* When scheduling description is not available, disable scheduler pass
4023 so it won't slow down the compilation and make x87 code slower. */
4024 if (!TARGET_SCHEDULE)
4025 opts->x_flag_schedule_insns_after_reload = opts->x_flag_schedule_insns = 0;
4026
4027 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4028 ix86_tune_cost->simultaneous_prefetches,
4029 opts->x_param_values,
4030 opts_set->x_param_values);
4031 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4032 ix86_tune_cost->prefetch_block,
4033 opts->x_param_values,
4034 opts_set->x_param_values);
4035 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
4036 ix86_tune_cost->l1_cache_size,
4037 opts->x_param_values,
4038 opts_set->x_param_values);
4039 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
4040 ix86_tune_cost->l2_cache_size,
4041 opts->x_param_values,
4042 opts_set->x_param_values);
4043
4044 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
4045 if (opts->x_flag_prefetch_loop_arrays < 0
4046 && HAVE_prefetch
4047 && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
4048 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
4049 opts->x_flag_prefetch_loop_arrays = 1;
4050
4051 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
4052 can be opts->x_optimized to ap = __builtin_next_arg (0). */
4053 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && !opts->x_flag_split_stack)
4054 targetm.expand_builtin_va_start = NULL;
4055
4056 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4057 {
4058 ix86_gen_leave = gen_leave_rex64;
4059 if (Pmode == DImode)
4060 {
4061 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_di;
4062 ix86_gen_tls_local_dynamic_base_64
4063 = gen_tls_local_dynamic_base_64_di;
4064 }
4065 else
4066 {
4067 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_si;
4068 ix86_gen_tls_local_dynamic_base_64
4069 = gen_tls_local_dynamic_base_64_si;
4070 }
4071 }
4072 else
4073 ix86_gen_leave = gen_leave;
4074
4075 if (Pmode == DImode)
4076 {
4077 ix86_gen_add3 = gen_adddi3;
4078 ix86_gen_sub3 = gen_subdi3;
4079 ix86_gen_sub3_carry = gen_subdi3_carry;
4080 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
4081 ix86_gen_andsp = gen_anddi3;
4082 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
4083 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
4084 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
4085 ix86_gen_monitor = gen_sse3_monitor_di;
4086 }
4087 else
4088 {
4089 ix86_gen_add3 = gen_addsi3;
4090 ix86_gen_sub3 = gen_subsi3;
4091 ix86_gen_sub3_carry = gen_subsi3_carry;
4092 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
4093 ix86_gen_andsp = gen_andsi3;
4094 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
4095 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
4096 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
4097 ix86_gen_monitor = gen_sse3_monitor_si;
4098 }
4099
4100 #ifdef USE_IX86_CLD
4101 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
4102 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4103 opts->x_target_flags |= MASK_CLD & ~opts_set->x_target_flags;
4104 #endif
4105
4106 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && opts->x_flag_pic)
4107 {
4108 if (opts->x_flag_fentry > 0)
4109 sorry ("-mfentry isn%'t supported for 32-bit in combination "
4110 "with -fpic");
4111 opts->x_flag_fentry = 0;
4112 }
4113 else if (TARGET_SEH)
4114 {
4115 if (opts->x_flag_fentry == 0)
4116 sorry ("-mno-fentry isn%'t compatible with SEH");
4117 opts->x_flag_fentry = 1;
4118 }
4119 else if (opts->x_flag_fentry < 0)
4120 {
4121 #if defined(PROFILE_BEFORE_PROLOGUE)
4122 opts->x_flag_fentry = 1;
4123 #else
4124 opts->x_flag_fentry = 0;
4125 #endif
4126 }
4127
4128 /* When not opts->x_optimize for size, enable vzeroupper optimization for
4129 TARGET_AVX with -fexpensive-optimizations and split 32-byte
4130 AVX unaligned load/store. */
4131 if (!opts->x_optimize_size)
4132 {
4133 if (flag_expensive_optimizations
4134 && !(opts_set->x_target_flags & MASK_VZEROUPPER))
4135 opts->x_target_flags |= MASK_VZEROUPPER;
4136 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL]
4137 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
4138 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
4139 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL]
4140 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE))
4141 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
4142 /* Enable 128-bit AVX instruction generation
4143 for the auto-vectorizer. */
4144 if (TARGET_AVX128_OPTIMAL
4145 && !(opts_set->x_target_flags & MASK_PREFER_AVX128))
4146 opts->x_target_flags |= MASK_PREFER_AVX128;
4147 }
4148
4149 if (opts->x_ix86_recip_name)
4150 {
4151 char *p = ASTRDUP (opts->x_ix86_recip_name);
4152 char *q;
4153 unsigned int mask, i;
4154 bool invert;
4155
4156 while ((q = strtok (p, ",")) != NULL)
4157 {
4158 p = NULL;
4159 if (*q == '!')
4160 {
4161 invert = true;
4162 q++;
4163 }
4164 else
4165 invert = false;
4166
4167 if (!strcmp (q, "default"))
4168 mask = RECIP_MASK_ALL;
4169 else
4170 {
4171 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4172 if (!strcmp (q, recip_options[i].string))
4173 {
4174 mask = recip_options[i].mask;
4175 break;
4176 }
4177
4178 if (i == ARRAY_SIZE (recip_options))
4179 {
4180 error ("unknown option for -mrecip=%s", q);
4181 invert = false;
4182 mask = RECIP_MASK_NONE;
4183 }
4184 }
4185
4186 opts->x_recip_mask_explicit |= mask;
4187 if (invert)
4188 opts->x_recip_mask &= ~mask;
4189 else
4190 opts->x_recip_mask |= mask;
4191 }
4192 }
4193
4194 if (TARGET_RECIP_P (opts->x_target_flags))
4195 opts->x_recip_mask |= RECIP_MASK_ALL & ~opts->x_recip_mask_explicit;
4196 else if (opts_set->x_target_flags & MASK_RECIP)
4197 opts->x_recip_mask &= ~(RECIP_MASK_ALL & ~opts->x_recip_mask_explicit);
4198
4199 /* Default long double to 64-bit for 32-bit Bionic and to __float128
4200 for 64-bit Bionic. */
4201 if (TARGET_HAS_BIONIC
4202 && !(opts_set->x_target_flags
4203 & (MASK_LONG_DOUBLE_64 | MASK_LONG_DOUBLE_128)))
4204 opts->x_target_flags |= (TARGET_64BIT
4205 ? MASK_LONG_DOUBLE_128
4206 : MASK_LONG_DOUBLE_64);
4207
4208 /* Only one of them can be active. */
4209 gcc_assert ((opts->x_target_flags & MASK_LONG_DOUBLE_64) == 0
4210 || (opts->x_target_flags & MASK_LONG_DOUBLE_128) == 0);
4211
4212 /* Save the initial options in case the user does function specific
4213 options. */
4214 if (main_args_p)
4215 target_option_default_node = target_option_current_node
4216 = build_target_option_node (opts);
4217
4218 /* Handle stack protector */
4219 if (!opts_set->x_ix86_stack_protector_guard)
4220 opts->x_ix86_stack_protector_guard
4221 = TARGET_HAS_BIONIC ? SSP_GLOBAL : SSP_TLS;
4222
4223 /* Handle -mmemcpy-strategy= and -mmemset-strategy= */
4224 if (opts->x_ix86_tune_memcpy_strategy)
4225 {
4226 char *str = xstrdup (opts->x_ix86_tune_memcpy_strategy);
4227 ix86_parse_stringop_strategy_string (str, false);
4228 free (str);
4229 }
4230
4231 if (opts->x_ix86_tune_memset_strategy)
4232 {
4233 char *str = xstrdup (opts->x_ix86_tune_memset_strategy);
4234 ix86_parse_stringop_strategy_string (str, true);
4235 free (str);
4236 }
4237 }
4238
4239 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4240
4241 static void
4242 ix86_option_override (void)
4243 {
4244 opt_pass *pass_insert_vzeroupper = make_pass_insert_vzeroupper (g);
4245 static struct register_pass_info insert_vzeroupper_info
4246 = { pass_insert_vzeroupper, "reload",
4247 1, PASS_POS_INSERT_AFTER
4248 };
4249
4250 ix86_option_override_internal (true, &global_options, &global_options_set);
4251
4252
4253 /* This needs to be done at start up. It's convenient to do it here. */
4254 register_pass (&insert_vzeroupper_info);
4255 }
4256
4257 /* Update register usage after having seen the compiler flags. */
4258
4259 static void
4260 ix86_conditional_register_usage (void)
4261 {
4262 int i, c_mask;
4263 unsigned int j;
4264
4265 /* The PIC register, if it exists, is fixed. */
4266 j = PIC_OFFSET_TABLE_REGNUM;
4267 if (j != INVALID_REGNUM)
4268 fixed_regs[j] = call_used_regs[j] = 1;
4269
4270 /* For 32-bit targets, squash the REX registers. */
4271 if (! TARGET_64BIT)
4272 {
4273 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
4274 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4275 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4276 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4277 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4278 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4279 }
4280
4281 /* See the definition of CALL_USED_REGISTERS in i386.h. */
4282 c_mask = (TARGET_64BIT_MS_ABI ? (1 << 3)
4283 : TARGET_64BIT ? (1 << 2)
4284 : (1 << 1));
4285
4286 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
4287
4288 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4289 {
4290 /* Set/reset conditionally defined registers from
4291 CALL_USED_REGISTERS initializer. */
4292 if (call_used_regs[i] > 1)
4293 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
4294
4295 /* Calculate registers of CLOBBERED_REGS register set
4296 as call used registers from GENERAL_REGS register set. */
4297 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
4298 && call_used_regs[i])
4299 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
4300 }
4301
4302 /* If MMX is disabled, squash the registers. */
4303 if (! TARGET_MMX)
4304 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4305 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
4306 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4307
4308 /* If SSE is disabled, squash the registers. */
4309 if (! TARGET_SSE)
4310 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4311 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
4312 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4313
4314 /* If the FPU is disabled, squash the registers. */
4315 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
4316 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4317 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
4318 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4319
4320 /* If AVX512F is disabled, squash the registers. */
4321 if (! TARGET_AVX512F)
4322 {
4323 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4324 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4325
4326 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
4327 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4328 }
4329 }
4330
4331 \f
4332 /* Save the current options */
4333
4334 static void
4335 ix86_function_specific_save (struct cl_target_option *ptr,
4336 struct gcc_options *opts)
4337 {
4338 ptr->arch = ix86_arch;
4339 ptr->schedule = ix86_schedule;
4340 ptr->tune = ix86_tune;
4341 ptr->branch_cost = ix86_branch_cost;
4342 ptr->tune_defaulted = ix86_tune_defaulted;
4343 ptr->arch_specified = ix86_arch_specified;
4344 ptr->x_ix86_isa_flags_explicit = opts->x_ix86_isa_flags_explicit;
4345 ptr->x_ix86_target_flags_explicit = opts->x_ix86_target_flags_explicit;
4346 ptr->x_recip_mask_explicit = opts->x_recip_mask_explicit;
4347 ptr->x_ix86_arch_string = opts->x_ix86_arch_string;
4348 ptr->x_ix86_tune_string = opts->x_ix86_tune_string;
4349 ptr->x_ix86_cmodel = opts->x_ix86_cmodel;
4350 ptr->x_ix86_abi = opts->x_ix86_abi;
4351 ptr->x_ix86_asm_dialect = opts->x_ix86_asm_dialect;
4352 ptr->x_ix86_branch_cost = opts->x_ix86_branch_cost;
4353 ptr->x_ix86_dump_tunes = opts->x_ix86_dump_tunes;
4354 ptr->x_ix86_force_align_arg_pointer = opts->x_ix86_force_align_arg_pointer;
4355 ptr->x_ix86_force_drap = opts->x_ix86_force_drap;
4356 ptr->x_ix86_incoming_stack_boundary_arg = opts->x_ix86_incoming_stack_boundary_arg;
4357 ptr->x_ix86_pmode = opts->x_ix86_pmode;
4358 ptr->x_ix86_preferred_stack_boundary_arg = opts->x_ix86_preferred_stack_boundary_arg;
4359 ptr->x_ix86_recip_name = opts->x_ix86_recip_name;
4360 ptr->x_ix86_regparm = opts->x_ix86_regparm;
4361 ptr->x_ix86_section_threshold = opts->x_ix86_section_threshold;
4362 ptr->x_ix86_sse2avx = opts->x_ix86_sse2avx;
4363 ptr->x_ix86_stack_protector_guard = opts->x_ix86_stack_protector_guard;
4364 ptr->x_ix86_stringop_alg = opts->x_ix86_stringop_alg;
4365 ptr->x_ix86_tls_dialect = opts->x_ix86_tls_dialect;
4366 ptr->x_ix86_tune_ctrl_string = opts->x_ix86_tune_ctrl_string;
4367 ptr->x_ix86_tune_memcpy_strategy = opts->x_ix86_tune_memcpy_strategy;
4368 ptr->x_ix86_tune_memset_strategy = opts->x_ix86_tune_memset_strategy;
4369 ptr->x_ix86_tune_no_default = opts->x_ix86_tune_no_default;
4370 ptr->x_ix86_veclibabi_type = opts->x_ix86_veclibabi_type;
4371
4372 /* The fields are char but the variables are not; make sure the
4373 values fit in the fields. */
4374 gcc_assert (ptr->arch == ix86_arch);
4375 gcc_assert (ptr->schedule == ix86_schedule);
4376 gcc_assert (ptr->tune == ix86_tune);
4377 gcc_assert (ptr->branch_cost == ix86_branch_cost);
4378 }
4379
4380 /* Restore the current options */
4381
4382 static void
4383 ix86_function_specific_restore (struct gcc_options *opts,
4384 struct cl_target_option *ptr)
4385 {
4386 enum processor_type old_tune = ix86_tune;
4387 enum processor_type old_arch = ix86_arch;
4388 unsigned int ix86_arch_mask;
4389 int i;
4390
4391 /* We don't change -fPIC. */
4392 opts->x_flag_pic = flag_pic;
4393
4394 ix86_arch = (enum processor_type) ptr->arch;
4395 ix86_schedule = (enum attr_cpu) ptr->schedule;
4396 ix86_tune = (enum processor_type) ptr->tune;
4397 opts->x_ix86_branch_cost = ptr->branch_cost;
4398 ix86_tune_defaulted = ptr->tune_defaulted;
4399 ix86_arch_specified = ptr->arch_specified;
4400 opts->x_ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
4401 opts->x_ix86_target_flags_explicit = ptr->x_ix86_target_flags_explicit;
4402 opts->x_recip_mask_explicit = ptr->x_recip_mask_explicit;
4403 opts->x_ix86_arch_string = ptr->x_ix86_arch_string;
4404 opts->x_ix86_tune_string = ptr->x_ix86_tune_string;
4405 opts->x_ix86_cmodel = ptr->x_ix86_cmodel;
4406 opts->x_ix86_abi = ptr->x_ix86_abi;
4407 opts->x_ix86_asm_dialect = ptr->x_ix86_asm_dialect;
4408 opts->x_ix86_branch_cost = ptr->x_ix86_branch_cost;
4409 opts->x_ix86_dump_tunes = ptr->x_ix86_dump_tunes;
4410 opts->x_ix86_force_align_arg_pointer = ptr->x_ix86_force_align_arg_pointer;
4411 opts->x_ix86_force_drap = ptr->x_ix86_force_drap;
4412 opts->x_ix86_incoming_stack_boundary_arg = ptr->x_ix86_incoming_stack_boundary_arg;
4413 opts->x_ix86_pmode = ptr->x_ix86_pmode;
4414 opts->x_ix86_preferred_stack_boundary_arg = ptr->x_ix86_preferred_stack_boundary_arg;
4415 opts->x_ix86_recip_name = ptr->x_ix86_recip_name;
4416 opts->x_ix86_regparm = ptr->x_ix86_regparm;
4417 opts->x_ix86_section_threshold = ptr->x_ix86_section_threshold;
4418 opts->x_ix86_sse2avx = ptr->x_ix86_sse2avx;
4419 opts->x_ix86_stack_protector_guard = ptr->x_ix86_stack_protector_guard;
4420 opts->x_ix86_stringop_alg = ptr->x_ix86_stringop_alg;
4421 opts->x_ix86_tls_dialect = ptr->x_ix86_tls_dialect;
4422 opts->x_ix86_tune_ctrl_string = ptr->x_ix86_tune_ctrl_string;
4423 opts->x_ix86_tune_memcpy_strategy = ptr->x_ix86_tune_memcpy_strategy;
4424 opts->x_ix86_tune_memset_strategy = ptr->x_ix86_tune_memset_strategy;
4425 opts->x_ix86_tune_no_default = ptr->x_ix86_tune_no_default;
4426 opts->x_ix86_veclibabi_type = ptr->x_ix86_veclibabi_type;
4427
4428 /* Recreate the arch feature tests if the arch changed */
4429 if (old_arch != ix86_arch)
4430 {
4431 ix86_arch_mask = 1u << ix86_arch;
4432 for (i = 0; i < X86_ARCH_LAST; ++i)
4433 ix86_arch_features[i]
4434 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4435 }
4436
4437 /* Recreate the tune optimization tests */
4438 if (old_tune != ix86_tune)
4439 set_ix86_tune_features (ix86_tune, false);
4440 }
4441
4442 /* Print the current options */
4443
4444 static void
4445 ix86_function_specific_print (FILE *file, int indent,
4446 struct cl_target_option *ptr)
4447 {
4448 char *target_string
4449 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
4450 NULL, NULL, ptr->x_ix86_fpmath, false);
4451
4452 gcc_assert (ptr->arch < PROCESSOR_max);
4453 fprintf (file, "%*sarch = %d (%s)\n",
4454 indent, "",
4455 ptr->arch, processor_target_table[ptr->arch].name);
4456
4457 gcc_assert (ptr->tune < PROCESSOR_max);
4458 fprintf (file, "%*stune = %d (%s)\n",
4459 indent, "",
4460 ptr->tune, processor_target_table[ptr->tune].name);
4461
4462 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4463
4464 if (target_string)
4465 {
4466 fprintf (file, "%*s%s\n", indent, "", target_string);
4467 free (target_string);
4468 }
4469 }
4470
4471 \f
4472 /* Inner function to process the attribute((target(...))), take an argument and
4473 set the current options from the argument. If we have a list, recursively go
4474 over the list. */
4475
4476 static bool
4477 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
4478 struct gcc_options *opts,
4479 struct gcc_options *opts_set,
4480 struct gcc_options *enum_opts_set)
4481 {
4482 char *next_optstr;
4483 bool ret = true;
4484
4485 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4486 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4487 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4488 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4489 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4490
4491 enum ix86_opt_type
4492 {
4493 ix86_opt_unknown,
4494 ix86_opt_yes,
4495 ix86_opt_no,
4496 ix86_opt_str,
4497 ix86_opt_enum,
4498 ix86_opt_isa
4499 };
4500
4501 static const struct
4502 {
4503 const char *string;
4504 size_t len;
4505 enum ix86_opt_type type;
4506 int opt;
4507 int mask;
4508 } attrs[] = {
4509 /* isa options */
4510 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4511 IX86_ATTR_ISA ("abm", OPT_mabm),
4512 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4513 IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
4514 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
4515 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4516 IX86_ATTR_ISA ("aes", OPT_maes),
4517 IX86_ATTR_ISA ("sha", OPT_msha),
4518 IX86_ATTR_ISA ("avx", OPT_mavx),
4519 IX86_ATTR_ISA ("avx2", OPT_mavx2),
4520 IX86_ATTR_ISA ("avx512f", OPT_mavx512f),
4521 IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf),
4522 IX86_ATTR_ISA ("avx512er", OPT_mavx512er),
4523 IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd),
4524 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4525 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4526 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4527 IX86_ATTR_ISA ("sse", OPT_msse),
4528 IX86_ATTR_ISA ("sse2", OPT_msse2),
4529 IX86_ATTR_ISA ("sse3", OPT_msse3),
4530 IX86_ATTR_ISA ("sse4", OPT_msse4),
4531 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4532 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4533 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4534 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4535 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4536 IX86_ATTR_ISA ("fma", OPT_mfma),
4537 IX86_ATTR_ISA ("xop", OPT_mxop),
4538 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4539 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4540 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4541 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4542 IX86_ATTR_ISA ("rtm", OPT_mrtm),
4543 IX86_ATTR_ISA ("hle", OPT_mhle),
4544 IX86_ATTR_ISA ("prfchw", OPT_mprfchw),
4545 IX86_ATTR_ISA ("rdseed", OPT_mrdseed),
4546 IX86_ATTR_ISA ("adx", OPT_madx),
4547 IX86_ATTR_ISA ("fxsr", OPT_mfxsr),
4548 IX86_ATTR_ISA ("xsave", OPT_mxsave),
4549 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt),
4550
4551 /* enum options */
4552 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
4553
4554 /* string options */
4555 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4556 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4557
4558 /* flag options */
4559 IX86_ATTR_YES ("cld",
4560 OPT_mcld,
4561 MASK_CLD),
4562
4563 IX86_ATTR_NO ("fancy-math-387",
4564 OPT_mfancy_math_387,
4565 MASK_NO_FANCY_MATH_387),
4566
4567 IX86_ATTR_YES ("ieee-fp",
4568 OPT_mieee_fp,
4569 MASK_IEEE_FP),
4570
4571 IX86_ATTR_YES ("inline-all-stringops",
4572 OPT_minline_all_stringops,
4573 MASK_INLINE_ALL_STRINGOPS),
4574
4575 IX86_ATTR_YES ("inline-stringops-dynamically",
4576 OPT_minline_stringops_dynamically,
4577 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4578
4579 IX86_ATTR_NO ("align-stringops",
4580 OPT_mno_align_stringops,
4581 MASK_NO_ALIGN_STRINGOPS),
4582
4583 IX86_ATTR_YES ("recip",
4584 OPT_mrecip,
4585 MASK_RECIP),
4586
4587 };
4588
4589 /* If this is a list, recurse to get the options. */
4590 if (TREE_CODE (args) == TREE_LIST)
4591 {
4592 bool ret = true;
4593
4594 for (; args; args = TREE_CHAIN (args))
4595 if (TREE_VALUE (args)
4596 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
4597 p_strings, opts, opts_set,
4598 enum_opts_set))
4599 ret = false;
4600
4601 return ret;
4602 }
4603
4604 else if (TREE_CODE (args) != STRING_CST)
4605 {
4606 error ("attribute %<target%> argument not a string");
4607 return false;
4608 }
4609
4610 /* Handle multiple arguments separated by commas. */
4611 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4612
4613 while (next_optstr && *next_optstr != '\0')
4614 {
4615 char *p = next_optstr;
4616 char *orig_p = p;
4617 char *comma = strchr (next_optstr, ',');
4618 const char *opt_string;
4619 size_t len, opt_len;
4620 int opt;
4621 bool opt_set_p;
4622 char ch;
4623 unsigned i;
4624 enum ix86_opt_type type = ix86_opt_unknown;
4625 int mask = 0;
4626
4627 if (comma)
4628 {
4629 *comma = '\0';
4630 len = comma - next_optstr;
4631 next_optstr = comma + 1;
4632 }
4633 else
4634 {
4635 len = strlen (p);
4636 next_optstr = NULL;
4637 }
4638
4639 /* Recognize no-xxx. */
4640 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4641 {
4642 opt_set_p = false;
4643 p += 3;
4644 len -= 3;
4645 }
4646 else
4647 opt_set_p = true;
4648
4649 /* Find the option. */
4650 ch = *p;
4651 opt = N_OPTS;
4652 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4653 {
4654 type = attrs[i].type;
4655 opt_len = attrs[i].len;
4656 if (ch == attrs[i].string[0]
4657 && ((type != ix86_opt_str && type != ix86_opt_enum)
4658 ? len == opt_len
4659 : len > opt_len)
4660 && memcmp (p, attrs[i].string, opt_len) == 0)
4661 {
4662 opt = attrs[i].opt;
4663 mask = attrs[i].mask;
4664 opt_string = attrs[i].string;
4665 break;
4666 }
4667 }
4668
4669 /* Process the option. */
4670 if (opt == N_OPTS)
4671 {
4672 error ("attribute(target(\"%s\")) is unknown", orig_p);
4673 ret = false;
4674 }
4675
4676 else if (type == ix86_opt_isa)
4677 {
4678 struct cl_decoded_option decoded;
4679
4680 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
4681 ix86_handle_option (opts, opts_set,
4682 &decoded, input_location);
4683 }
4684
4685 else if (type == ix86_opt_yes || type == ix86_opt_no)
4686 {
4687 if (type == ix86_opt_no)
4688 opt_set_p = !opt_set_p;
4689
4690 if (opt_set_p)
4691 opts->x_target_flags |= mask;
4692 else
4693 opts->x_target_flags &= ~mask;
4694 }
4695
4696 else if (type == ix86_opt_str)
4697 {
4698 if (p_strings[opt])
4699 {
4700 error ("option(\"%s\") was already specified", opt_string);
4701 ret = false;
4702 }
4703 else
4704 p_strings[opt] = xstrdup (p + opt_len);
4705 }
4706
4707 else if (type == ix86_opt_enum)
4708 {
4709 bool arg_ok;
4710 int value;
4711
4712 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
4713 if (arg_ok)
4714 set_option (opts, enum_opts_set, opt, value,
4715 p + opt_len, DK_UNSPECIFIED, input_location,
4716 global_dc);
4717 else
4718 {
4719 error ("attribute(target(\"%s\")) is unknown", orig_p);
4720 ret = false;
4721 }
4722 }
4723
4724 else
4725 gcc_unreachable ();
4726 }
4727
4728 return ret;
4729 }
4730
4731 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4732
4733 tree
4734 ix86_valid_target_attribute_tree (tree args,
4735 struct gcc_options *opts,
4736 struct gcc_options *opts_set)
4737 {
4738 const char *orig_arch_string = opts->x_ix86_arch_string;
4739 const char *orig_tune_string = opts->x_ix86_tune_string;
4740 enum fpmath_unit orig_fpmath_set = opts_set->x_ix86_fpmath;
4741 int orig_tune_defaulted = ix86_tune_defaulted;
4742 int orig_arch_specified = ix86_arch_specified;
4743 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
4744 tree t = NULL_TREE;
4745 int i;
4746 struct cl_target_option *def
4747 = TREE_TARGET_OPTION (target_option_default_node);
4748 struct gcc_options enum_opts_set;
4749
4750 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
4751
4752 /* Process each of the options on the chain. */
4753 if (! ix86_valid_target_attribute_inner_p (args, option_strings, opts,
4754 opts_set, &enum_opts_set))
4755 return error_mark_node;
4756
4757 /* If the changed options are different from the default, rerun
4758 ix86_option_override_internal, and then save the options away.
4759 The string options are are attribute options, and will be undone
4760 when we copy the save structure. */
4761 if (opts->x_ix86_isa_flags != def->x_ix86_isa_flags
4762 || opts->x_target_flags != def->x_target_flags
4763 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4764 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4765 || enum_opts_set.x_ix86_fpmath)
4766 {
4767 /* If we are using the default tune= or arch=, undo the string assigned,
4768 and use the default. */
4769 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4770 opts->x_ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4771 else if (!orig_arch_specified)
4772 opts->x_ix86_arch_string = NULL;
4773
4774 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4775 opts->x_ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4776 else if (orig_tune_defaulted)
4777 opts->x_ix86_tune_string = NULL;
4778
4779 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4780 if (enum_opts_set.x_ix86_fpmath)
4781 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4782 else if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
4783 && TARGET_SSE_P (opts->x_ix86_isa_flags))
4784 {
4785 opts->x_ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
4786 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4787 }
4788
4789 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4790 ix86_option_override_internal (false, opts, opts_set);
4791
4792 /* Add any builtin functions with the new isa if any. */
4793 ix86_add_new_builtins (opts->x_ix86_isa_flags);
4794
4795 /* Save the current options unless we are validating options for
4796 #pragma. */
4797 t = build_target_option_node (opts);
4798
4799 opts->x_ix86_arch_string = orig_arch_string;
4800 opts->x_ix86_tune_string = orig_tune_string;
4801 opts_set->x_ix86_fpmath = orig_fpmath_set;
4802
4803 /* Free up memory allocated to hold the strings */
4804 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4805 free (option_strings[i]);
4806 }
4807
4808 return t;
4809 }
4810
4811 /* Hook to validate attribute((target("string"))). */
4812
4813 static bool
4814 ix86_valid_target_attribute_p (tree fndecl,
4815 tree ARG_UNUSED (name),
4816 tree args,
4817 int ARG_UNUSED (flags))
4818 {
4819 struct gcc_options func_options;
4820 tree new_target, new_optimize;
4821 bool ret = true;
4822
4823 /* attribute((target("default"))) does nothing, beyond
4824 affecting multi-versioning. */
4825 if (TREE_VALUE (args)
4826 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
4827 && TREE_CHAIN (args) == NULL_TREE
4828 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
4829 return true;
4830
4831 tree old_optimize = build_optimization_node (&global_options);
4832
4833 /* Get the optimization options of the current function. */
4834 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4835
4836 if (!func_optimize)
4837 func_optimize = old_optimize;
4838
4839 /* Init func_options. */
4840 memset (&func_options, 0, sizeof (func_options));
4841 init_options_struct (&func_options, NULL);
4842 lang_hooks.init_options_struct (&func_options);
4843
4844 cl_optimization_restore (&func_options,
4845 TREE_OPTIMIZATION (func_optimize));
4846
4847 /* Initialize func_options to the default before its target options can
4848 be set. */
4849 cl_target_option_restore (&func_options,
4850 TREE_TARGET_OPTION (target_option_default_node));
4851
4852 new_target = ix86_valid_target_attribute_tree (args, &func_options,
4853 &global_options_set);
4854
4855 new_optimize = build_optimization_node (&func_options);
4856
4857 if (new_target == error_mark_node)
4858 ret = false;
4859
4860 else if (fndecl && new_target)
4861 {
4862 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4863
4864 if (old_optimize != new_optimize)
4865 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4866 }
4867
4868 return ret;
4869 }
4870
4871 \f
4872 /* Hook to determine if one function can safely inline another. */
4873
4874 static bool
4875 ix86_can_inline_p (tree caller, tree callee)
4876 {
4877 bool ret = false;
4878 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
4879 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
4880
4881 /* If callee has no option attributes, then it is ok to inline. */
4882 if (!callee_tree)
4883 ret = true;
4884
4885 /* If caller has no option attributes, but callee does then it is not ok to
4886 inline. */
4887 else if (!caller_tree)
4888 ret = false;
4889
4890 else
4891 {
4892 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
4893 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
4894
4895 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4896 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4897 function. */
4898 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
4899 != callee_opts->x_ix86_isa_flags)
4900 ret = false;
4901
4902 /* See if we have the same non-isa options. */
4903 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
4904 ret = false;
4905
4906 /* See if arch, tune, etc. are the same. */
4907 else if (caller_opts->arch != callee_opts->arch)
4908 ret = false;
4909
4910 else if (caller_opts->tune != callee_opts->tune)
4911 ret = false;
4912
4913 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
4914 ret = false;
4915
4916 else if (caller_opts->branch_cost != callee_opts->branch_cost)
4917 ret = false;
4918
4919 else
4920 ret = true;
4921 }
4922
4923 return ret;
4924 }
4925
4926 \f
4927 /* Remember the last target of ix86_set_current_function. */
4928 static GTY(()) tree ix86_previous_fndecl;
4929
4930 /* Invalidate ix86_previous_fndecl cache. */
4931 void
4932 ix86_reset_previous_fndecl (void)
4933 {
4934 ix86_previous_fndecl = NULL_TREE;
4935 }
4936
4937 /* Establish appropriate back-end context for processing the function
4938 FNDECL. The argument might be NULL to indicate processing at top
4939 level, outside of any function scope. */
4940 static void
4941 ix86_set_current_function (tree fndecl)
4942 {
4943 /* Only change the context if the function changes. This hook is called
4944 several times in the course of compiling a function, and we don't want to
4945 slow things down too much or call target_reinit when it isn't safe. */
4946 if (fndecl && fndecl != ix86_previous_fndecl)
4947 {
4948 tree old_tree = (ix86_previous_fndecl
4949 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
4950 : NULL_TREE);
4951
4952 tree new_tree = (fndecl
4953 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
4954 : NULL_TREE);
4955
4956 ix86_previous_fndecl = fndecl;
4957 if (old_tree == new_tree)
4958 ;
4959
4960 else if (new_tree)
4961 {
4962 cl_target_option_restore (&global_options,
4963 TREE_TARGET_OPTION (new_tree));
4964 if (TREE_TARGET_GLOBALS (new_tree))
4965 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
4966 else
4967 TREE_TARGET_GLOBALS (new_tree)
4968 = save_target_globals_default_opts ();
4969 }
4970
4971 else if (old_tree)
4972 {
4973 new_tree = target_option_current_node;
4974 cl_target_option_restore (&global_options,
4975 TREE_TARGET_OPTION (new_tree));
4976 if (TREE_TARGET_GLOBALS (new_tree))
4977 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
4978 else if (new_tree == target_option_default_node)
4979 restore_target_globals (&default_target_globals);
4980 else
4981 TREE_TARGET_GLOBALS (new_tree)
4982 = save_target_globals_default_opts ();
4983 }
4984 }
4985 }
4986
4987 \f
4988 /* Return true if this goes in large data/bss. */
4989
4990 static bool
4991 ix86_in_large_data_p (tree exp)
4992 {
4993 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
4994 return false;
4995
4996 /* Functions are never large data. */
4997 if (TREE_CODE (exp) == FUNCTION_DECL)
4998 return false;
4999
5000 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
5001 {
5002 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
5003 if (strcmp (section, ".ldata") == 0
5004 || strcmp (section, ".lbss") == 0)
5005 return true;
5006 return false;
5007 }
5008 else
5009 {
5010 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
5011
5012 /* If this is an incomplete type with size 0, then we can't put it
5013 in data because it might be too big when completed. */
5014 if (!size || size > ix86_section_threshold)
5015 return true;
5016 }
5017
5018 return false;
5019 }
5020
5021 /* Switch to the appropriate section for output of DECL.
5022 DECL is either a `VAR_DECL' node or a constant of some sort.
5023 RELOC indicates whether forming the initial value of DECL requires
5024 link-time relocations. */
5025
5026 ATTRIBUTE_UNUSED static section *
5027 x86_64_elf_select_section (tree decl, int reloc,
5028 unsigned HOST_WIDE_INT align)
5029 {
5030 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5031 && ix86_in_large_data_p (decl))
5032 {
5033 const char *sname = NULL;
5034 unsigned int flags = SECTION_WRITE;
5035 switch (categorize_decl_for_section (decl, reloc))
5036 {
5037 case SECCAT_DATA:
5038 sname = ".ldata";
5039 break;
5040 case SECCAT_DATA_REL:
5041 sname = ".ldata.rel";
5042 break;
5043 case SECCAT_DATA_REL_LOCAL:
5044 sname = ".ldata.rel.local";
5045 break;
5046 case SECCAT_DATA_REL_RO:
5047 sname = ".ldata.rel.ro";
5048 break;
5049 case SECCAT_DATA_REL_RO_LOCAL:
5050 sname = ".ldata.rel.ro.local";
5051 break;
5052 case SECCAT_BSS:
5053 sname = ".lbss";
5054 flags |= SECTION_BSS;
5055 break;
5056 case SECCAT_RODATA:
5057 case SECCAT_RODATA_MERGE_STR:
5058 case SECCAT_RODATA_MERGE_STR_INIT:
5059 case SECCAT_RODATA_MERGE_CONST:
5060 sname = ".lrodata";
5061 flags = 0;
5062 break;
5063 case SECCAT_SRODATA:
5064 case SECCAT_SDATA:
5065 case SECCAT_SBSS:
5066 gcc_unreachable ();
5067 case SECCAT_TEXT:
5068 case SECCAT_TDATA:
5069 case SECCAT_TBSS:
5070 /* We don't split these for medium model. Place them into
5071 default sections and hope for best. */
5072 break;
5073 }
5074 if (sname)
5075 {
5076 /* We might get called with string constants, but get_named_section
5077 doesn't like them as they are not DECLs. Also, we need to set
5078 flags in that case. */
5079 if (!DECL_P (decl))
5080 return get_section (sname, flags, NULL);
5081 return get_named_section (decl, sname, reloc);
5082 }
5083 }
5084 return default_elf_select_section (decl, reloc, align);
5085 }
5086
5087 /* Select a set of attributes for section NAME based on the properties
5088 of DECL and whether or not RELOC indicates that DECL's initializer
5089 might contain runtime relocations. */
5090
5091 static unsigned int ATTRIBUTE_UNUSED
5092 x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
5093 {
5094 unsigned int flags = default_section_type_flags (decl, name, reloc);
5095
5096 if (decl == NULL_TREE
5097 && (strcmp (name, ".ldata.rel.ro") == 0
5098 || strcmp (name, ".ldata.rel.ro.local") == 0))
5099 flags |= SECTION_RELRO;
5100
5101 if (strcmp (name, ".lbss") == 0
5102 || strncmp (name, ".lbss.", 5) == 0
5103 || strncmp (name, ".gnu.linkonce.lb.", 16) == 0)
5104 flags |= SECTION_BSS;
5105
5106 return flags;
5107 }
5108
5109 /* Build up a unique section name, expressed as a
5110 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
5111 RELOC indicates whether the initial value of EXP requires
5112 link-time relocations. */
5113
5114 static void ATTRIBUTE_UNUSED
5115 x86_64_elf_unique_section (tree decl, int reloc)
5116 {
5117 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5118 && ix86_in_large_data_p (decl))
5119 {
5120 const char *prefix = NULL;
5121 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
5122 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
5123
5124 switch (categorize_decl_for_section (decl, reloc))
5125 {
5126 case SECCAT_DATA:
5127 case SECCAT_DATA_REL:
5128 case SECCAT_DATA_REL_LOCAL:
5129 case SECCAT_DATA_REL_RO:
5130 case SECCAT_DATA_REL_RO_LOCAL:
5131 prefix = one_only ? ".ld" : ".ldata";
5132 break;
5133 case SECCAT_BSS:
5134 prefix = one_only ? ".lb" : ".lbss";
5135 break;
5136 case SECCAT_RODATA:
5137 case SECCAT_RODATA_MERGE_STR:
5138 case SECCAT_RODATA_MERGE_STR_INIT:
5139 case SECCAT_RODATA_MERGE_CONST:
5140 prefix = one_only ? ".lr" : ".lrodata";
5141 break;
5142 case SECCAT_SRODATA:
5143 case SECCAT_SDATA:
5144 case SECCAT_SBSS:
5145 gcc_unreachable ();
5146 case SECCAT_TEXT:
5147 case SECCAT_TDATA:
5148 case SECCAT_TBSS:
5149 /* We don't split these for medium model. Place them into
5150 default sections and hope for best. */
5151 break;
5152 }
5153 if (prefix)
5154 {
5155 const char *name, *linkonce;
5156 char *string;
5157
5158 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
5159 name = targetm.strip_name_encoding (name);
5160
5161 /* If we're using one_only, then there needs to be a .gnu.linkonce
5162 prefix to the section name. */
5163 linkonce = one_only ? ".gnu.linkonce" : "";
5164
5165 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
5166
5167 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
5168 return;
5169 }
5170 }
5171 default_unique_section (decl, reloc);
5172 }
5173
5174 #ifdef COMMON_ASM_OP
5175 /* This says how to output assembler code to declare an
5176 uninitialized external linkage data object.
5177
5178 For medium model x86-64 we need to use .largecomm opcode for
5179 large objects. */
5180 void
5181 x86_elf_aligned_common (FILE *file,
5182 const char *name, unsigned HOST_WIDE_INT size,
5183 int align)
5184 {
5185 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5186 && size > (unsigned int)ix86_section_threshold)
5187 fputs (".largecomm\t", file);
5188 else
5189 fputs (COMMON_ASM_OP, file);
5190 assemble_name (file, name);
5191 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
5192 size, align / BITS_PER_UNIT);
5193 }
5194 #endif
5195
5196 /* Utility function for targets to use in implementing
5197 ASM_OUTPUT_ALIGNED_BSS. */
5198
5199 void
5200 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
5201 const char *name, unsigned HOST_WIDE_INT size,
5202 int align)
5203 {
5204 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5205 && size > (unsigned int)ix86_section_threshold)
5206 switch_to_section (get_named_section (decl, ".lbss", 0));
5207 else
5208 switch_to_section (bss_section);
5209 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
5210 #ifdef ASM_DECLARE_OBJECT_NAME
5211 last_assemble_variable_decl = decl;
5212 ASM_DECLARE_OBJECT_NAME (file, name, decl);
5213 #else
5214 /* Standard thing is just output label for the object. */
5215 ASM_OUTPUT_LABEL (file, name);
5216 #endif /* ASM_DECLARE_OBJECT_NAME */
5217 ASM_OUTPUT_SKIP (file, size ? size : 1);
5218 }
5219 \f
5220 /* Decide whether we must probe the stack before any space allocation
5221 on this target. It's essentially TARGET_STACK_PROBE except when
5222 -fstack-check causes the stack to be already probed differently. */
5223
5224 bool
5225 ix86_target_stack_probe (void)
5226 {
5227 /* Do not probe the stack twice if static stack checking is enabled. */
5228 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5229 return false;
5230
5231 return TARGET_STACK_PROBE;
5232 }
5233 \f
5234 /* Decide whether we can make a sibling call to a function. DECL is the
5235 declaration of the function being targeted by the call and EXP is the
5236 CALL_EXPR representing the call. */
5237
5238 static bool
5239 ix86_function_ok_for_sibcall (tree decl, tree exp)
5240 {
5241 tree type, decl_or_type;
5242 rtx a, b;
5243
5244 /* If we are generating position-independent code, we cannot sibcall
5245 optimize any indirect call, or a direct call to a global function,
5246 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
5247 if (!TARGET_MACHO
5248 && !TARGET_64BIT
5249 && flag_pic
5250 && (!decl || !targetm.binds_local_p (decl)))
5251 return false;
5252
5253 /* If we need to align the outgoing stack, then sibcalling would
5254 unalign the stack, which may break the called function. */
5255 if (ix86_minimum_incoming_stack_boundary (true)
5256 < PREFERRED_STACK_BOUNDARY)
5257 return false;
5258
5259 if (decl)
5260 {
5261 decl_or_type = decl;
5262 type = TREE_TYPE (decl);
5263 }
5264 else
5265 {
5266 /* We're looking at the CALL_EXPR, we need the type of the function. */
5267 type = CALL_EXPR_FN (exp); /* pointer expression */
5268 type = TREE_TYPE (type); /* pointer type */
5269 type = TREE_TYPE (type); /* function type */
5270 decl_or_type = type;
5271 }
5272
5273 /* Check that the return value locations are the same. Like
5274 if we are returning floats on the 80387 register stack, we cannot
5275 make a sibcall from a function that doesn't return a float to a
5276 function that does or, conversely, from a function that does return
5277 a float to a function that doesn't; the necessary stack adjustment
5278 would not be executed. This is also the place we notice
5279 differences in the return value ABI. Note that it is ok for one
5280 of the functions to have void return type as long as the return
5281 value of the other is passed in a register. */
5282 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
5283 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5284 cfun->decl, false);
5285 if (STACK_REG_P (a) || STACK_REG_P (b))
5286 {
5287 if (!rtx_equal_p (a, b))
5288 return false;
5289 }
5290 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5291 ;
5292 else if (!rtx_equal_p (a, b))
5293 return false;
5294
5295 if (TARGET_64BIT)
5296 {
5297 /* The SYSV ABI has more call-clobbered registers;
5298 disallow sibcalls from MS to SYSV. */
5299 if (cfun->machine->call_abi == MS_ABI
5300 && ix86_function_type_abi (type) == SYSV_ABI)
5301 return false;
5302 }
5303 else
5304 {
5305 /* If this call is indirect, we'll need to be able to use a
5306 call-clobbered register for the address of the target function.
5307 Make sure that all such registers are not used for passing
5308 parameters. Note that DLLIMPORT functions are indirect. */
5309 if (!decl
5310 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
5311 {
5312 if (ix86_function_regparm (type, NULL) >= 3)
5313 {
5314 /* ??? Need to count the actual number of registers to be used,
5315 not the possible number of registers. Fix later. */
5316 return false;
5317 }
5318 }
5319 }
5320
5321 /* Otherwise okay. That also includes certain types of indirect calls. */
5322 return true;
5323 }
5324
5325 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5326 and "sseregparm" calling convention attributes;
5327 arguments as in struct attribute_spec.handler. */
5328
5329 static tree
5330 ix86_handle_cconv_attribute (tree *node, tree name,
5331 tree args,
5332 int flags ATTRIBUTE_UNUSED,
5333 bool *no_add_attrs)
5334 {
5335 if (TREE_CODE (*node) != FUNCTION_TYPE
5336 && TREE_CODE (*node) != METHOD_TYPE
5337 && TREE_CODE (*node) != FIELD_DECL
5338 && TREE_CODE (*node) != TYPE_DECL)
5339 {
5340 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5341 name);
5342 *no_add_attrs = true;
5343 return NULL_TREE;
5344 }
5345
5346 /* Can combine regparm with all attributes but fastcall, and thiscall. */
5347 if (is_attribute_p ("regparm", name))
5348 {
5349 tree cst;
5350
5351 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5352 {
5353 error ("fastcall and regparm attributes are not compatible");
5354 }
5355
5356 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5357 {
5358 error ("regparam and thiscall attributes are not compatible");
5359 }
5360
5361 cst = TREE_VALUE (args);
5362 if (TREE_CODE (cst) != INTEGER_CST)
5363 {
5364 warning (OPT_Wattributes,
5365 "%qE attribute requires an integer constant argument",
5366 name);
5367 *no_add_attrs = true;
5368 }
5369 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
5370 {
5371 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
5372 name, REGPARM_MAX);
5373 *no_add_attrs = true;
5374 }
5375
5376 return NULL_TREE;
5377 }
5378
5379 if (TARGET_64BIT)
5380 {
5381 /* Do not warn when emulating the MS ABI. */
5382 if ((TREE_CODE (*node) != FUNCTION_TYPE
5383 && TREE_CODE (*node) != METHOD_TYPE)
5384 || ix86_function_type_abi (*node) != MS_ABI)
5385 warning (OPT_Wattributes, "%qE attribute ignored",
5386 name);
5387 *no_add_attrs = true;
5388 return NULL_TREE;
5389 }
5390
5391 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5392 if (is_attribute_p ("fastcall", name))
5393 {
5394 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5395 {
5396 error ("fastcall and cdecl attributes are not compatible");
5397 }
5398 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5399 {
5400 error ("fastcall and stdcall attributes are not compatible");
5401 }
5402 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
5403 {
5404 error ("fastcall and regparm attributes are not compatible");
5405 }
5406 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5407 {
5408 error ("fastcall and thiscall attributes are not compatible");
5409 }
5410 }
5411
5412 /* Can combine stdcall with fastcall (redundant), regparm and
5413 sseregparm. */
5414 else if (is_attribute_p ("stdcall", name))
5415 {
5416 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5417 {
5418 error ("stdcall and cdecl attributes are not compatible");
5419 }
5420 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5421 {
5422 error ("stdcall and fastcall attributes are not compatible");
5423 }
5424 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5425 {
5426 error ("stdcall and thiscall attributes are not compatible");
5427 }
5428 }
5429
5430 /* Can combine cdecl with regparm and sseregparm. */
5431 else if (is_attribute_p ("cdecl", name))
5432 {
5433 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5434 {
5435 error ("stdcall and cdecl attributes are not compatible");
5436 }
5437 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5438 {
5439 error ("fastcall and cdecl attributes are not compatible");
5440 }
5441 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5442 {
5443 error ("cdecl and thiscall attributes are not compatible");
5444 }
5445 }
5446 else if (is_attribute_p ("thiscall", name))
5447 {
5448 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
5449 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
5450 name);
5451 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5452 {
5453 error ("stdcall and thiscall attributes are not compatible");
5454 }
5455 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5456 {
5457 error ("fastcall and thiscall attributes are not compatible");
5458 }
5459 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5460 {
5461 error ("cdecl and thiscall attributes are not compatible");
5462 }
5463 }
5464
5465 /* Can combine sseregparm with all attributes. */
5466
5467 return NULL_TREE;
5468 }
5469
5470 /* The transactional memory builtins are implicitly regparm or fastcall
5471 depending on the ABI. Override the generic do-nothing attribute that
5472 these builtins were declared with, and replace it with one of the two
5473 attributes that we expect elsewhere. */
5474
5475 static tree
5476 ix86_handle_tm_regparm_attribute (tree *node, tree name ATTRIBUTE_UNUSED,
5477 tree args ATTRIBUTE_UNUSED,
5478 int flags, bool *no_add_attrs)
5479 {
5480 tree alt;
5481
5482 /* In no case do we want to add the placeholder attribute. */
5483 *no_add_attrs = true;
5484
5485 /* The 64-bit ABI is unchanged for transactional memory. */
5486 if (TARGET_64BIT)
5487 return NULL_TREE;
5488
5489 /* ??? Is there a better way to validate 32-bit windows? We have
5490 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5491 if (CHECK_STACK_LIMIT > 0)
5492 alt = tree_cons (get_identifier ("fastcall"), NULL, NULL);
5493 else
5494 {
5495 alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL);
5496 alt = tree_cons (get_identifier ("regparm"), alt, NULL);
5497 }
5498 decl_attributes (node, alt, flags);
5499
5500 return NULL_TREE;
5501 }
5502
5503 /* This function determines from TYPE the calling-convention. */
5504
5505 unsigned int
5506 ix86_get_callcvt (const_tree type)
5507 {
5508 unsigned int ret = 0;
5509 bool is_stdarg;
5510 tree attrs;
5511
5512 if (TARGET_64BIT)
5513 return IX86_CALLCVT_CDECL;
5514
5515 attrs = TYPE_ATTRIBUTES (type);
5516 if (attrs != NULL_TREE)
5517 {
5518 if (lookup_attribute ("cdecl", attrs))
5519 ret |= IX86_CALLCVT_CDECL;
5520 else if (lookup_attribute ("stdcall", attrs))
5521 ret |= IX86_CALLCVT_STDCALL;
5522 else if (lookup_attribute ("fastcall", attrs))
5523 ret |= IX86_CALLCVT_FASTCALL;
5524 else if (lookup_attribute ("thiscall", attrs))
5525 ret |= IX86_CALLCVT_THISCALL;
5526
5527 /* Regparam isn't allowed for thiscall and fastcall. */
5528 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
5529 {
5530 if (lookup_attribute ("regparm", attrs))
5531 ret |= IX86_CALLCVT_REGPARM;
5532 if (lookup_attribute ("sseregparm", attrs))
5533 ret |= IX86_CALLCVT_SSEREGPARM;
5534 }
5535
5536 if (IX86_BASE_CALLCVT(ret) != 0)
5537 return ret;
5538 }
5539
5540 is_stdarg = stdarg_p (type);
5541 if (TARGET_RTD && !is_stdarg)
5542 return IX86_CALLCVT_STDCALL | ret;
5543
5544 if (ret != 0
5545 || is_stdarg
5546 || TREE_CODE (type) != METHOD_TYPE
5547 || ix86_function_type_abi (type) != MS_ABI)
5548 return IX86_CALLCVT_CDECL | ret;
5549
5550 return IX86_CALLCVT_THISCALL;
5551 }
5552
5553 /* Return 0 if the attributes for two types are incompatible, 1 if they
5554 are compatible, and 2 if they are nearly compatible (which causes a
5555 warning to be generated). */
5556
5557 static int
5558 ix86_comp_type_attributes (const_tree type1, const_tree type2)
5559 {
5560 unsigned int ccvt1, ccvt2;
5561
5562 if (TREE_CODE (type1) != FUNCTION_TYPE
5563 && TREE_CODE (type1) != METHOD_TYPE)
5564 return 1;
5565
5566 ccvt1 = ix86_get_callcvt (type1);
5567 ccvt2 = ix86_get_callcvt (type2);
5568 if (ccvt1 != ccvt2)
5569 return 0;
5570 if (ix86_function_regparm (type1, NULL)
5571 != ix86_function_regparm (type2, NULL))
5572 return 0;
5573
5574 return 1;
5575 }
5576 \f
5577 /* Return the regparm value for a function with the indicated TYPE and DECL.
5578 DECL may be NULL when calling function indirectly
5579 or considering a libcall. */
5580
5581 static int
5582 ix86_function_regparm (const_tree type, const_tree decl)
5583 {
5584 tree attr;
5585 int regparm;
5586 unsigned int ccvt;
5587
5588 if (TARGET_64BIT)
5589 return (ix86_function_type_abi (type) == SYSV_ABI
5590 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5591 ccvt = ix86_get_callcvt (type);
5592 regparm = ix86_regparm;
5593
5594 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
5595 {
5596 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5597 if (attr)
5598 {
5599 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5600 return regparm;
5601 }
5602 }
5603 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5604 return 2;
5605 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5606 return 1;
5607
5608 /* Use register calling convention for local functions when possible. */
5609 if (decl
5610 && TREE_CODE (decl) == FUNCTION_DECL
5611 /* Caller and callee must agree on the calling convention, so
5612 checking here just optimize means that with
5613 __attribute__((optimize (...))) caller could use regparm convention
5614 and callee not, or vice versa. Instead look at whether the callee
5615 is optimized or not. */
5616 && opt_for_fn (decl, optimize)
5617 && !(profile_flag && !flag_fentry))
5618 {
5619 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5620 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE (decl));
5621 if (i && i->local && i->can_change_signature)
5622 {
5623 int local_regparm, globals = 0, regno;
5624
5625 /* Make sure no regparm register is taken by a
5626 fixed register variable. */
5627 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
5628 if (fixed_regs[local_regparm])
5629 break;
5630
5631 /* We don't want to use regparm(3) for nested functions as
5632 these use a static chain pointer in the third argument. */
5633 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
5634 local_regparm = 2;
5635
5636 /* In 32-bit mode save a register for the split stack. */
5637 if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack)
5638 local_regparm = 2;
5639
5640 /* Each fixed register usage increases register pressure,
5641 so less registers should be used for argument passing.
5642 This functionality can be overriden by an explicit
5643 regparm value. */
5644 for (regno = AX_REG; regno <= DI_REG; regno++)
5645 if (fixed_regs[regno])
5646 globals++;
5647
5648 local_regparm
5649 = globals < local_regparm ? local_regparm - globals : 0;
5650
5651 if (local_regparm > regparm)
5652 regparm = local_regparm;
5653 }
5654 }
5655
5656 return regparm;
5657 }
5658
5659 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5660 DFmode (2) arguments in SSE registers for a function with the
5661 indicated TYPE and DECL. DECL may be NULL when calling function
5662 indirectly or considering a libcall. Otherwise return 0. */
5663
5664 static int
5665 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5666 {
5667 gcc_assert (!TARGET_64BIT);
5668
5669 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5670 by the sseregparm attribute. */
5671 if (TARGET_SSEREGPARM
5672 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5673 {
5674 if (!TARGET_SSE)
5675 {
5676 if (warn)
5677 {
5678 if (decl)
5679 error ("calling %qD with attribute sseregparm without "
5680 "SSE/SSE2 enabled", decl);
5681 else
5682 error ("calling %qT with attribute sseregparm without "
5683 "SSE/SSE2 enabled", type);
5684 }
5685 return 0;
5686 }
5687
5688 return 2;
5689 }
5690
5691 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5692 (and DFmode for SSE2) arguments in SSE registers. */
5693 if (decl && TARGET_SSE_MATH && optimize
5694 && !(profile_flag && !flag_fentry))
5695 {
5696 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5697 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
5698 if (i && i->local && i->can_change_signature)
5699 return TARGET_SSE2 ? 2 : 1;
5700 }
5701
5702 return 0;
5703 }
5704
5705 /* Return true if EAX is live at the start of the function. Used by
5706 ix86_expand_prologue to determine if we need special help before
5707 calling allocate_stack_worker. */
5708
5709 static bool
5710 ix86_eax_live_at_start_p (void)
5711 {
5712 /* Cheat. Don't bother working forward from ix86_function_regparm
5713 to the function type to whether an actual argument is located in
5714 eax. Instead just look at cfg info, which is still close enough
5715 to correct at this point. This gives false positives for broken
5716 functions that might use uninitialized data that happens to be
5717 allocated in eax, but who cares? */
5718 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
5719 }
5720
5721 static bool
5722 ix86_keep_aggregate_return_pointer (tree fntype)
5723 {
5724 tree attr;
5725
5726 if (!TARGET_64BIT)
5727 {
5728 attr = lookup_attribute ("callee_pop_aggregate_return",
5729 TYPE_ATTRIBUTES (fntype));
5730 if (attr)
5731 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
5732
5733 /* For 32-bit MS-ABI the default is to keep aggregate
5734 return pointer. */
5735 if (ix86_function_type_abi (fntype) == MS_ABI)
5736 return true;
5737 }
5738 return KEEP_AGGREGATE_RETURN_POINTER != 0;
5739 }
5740
5741 /* Value is the number of bytes of arguments automatically
5742 popped when returning from a subroutine call.
5743 FUNDECL is the declaration node of the function (as a tree),
5744 FUNTYPE is the data type of the function (as a tree),
5745 or for a library call it is an identifier node for the subroutine name.
5746 SIZE is the number of bytes of arguments passed on the stack.
5747
5748 On the 80386, the RTD insn may be used to pop them if the number
5749 of args is fixed, but if the number is variable then the caller
5750 must pop them all. RTD can't be used for library calls now
5751 because the library is compiled with the Unix compiler.
5752 Use of RTD is a selectable option, since it is incompatible with
5753 standard Unix calling sequences. If the option is not selected,
5754 the caller must always pop the args.
5755
5756 The attribute stdcall is equivalent to RTD on a per module basis. */
5757
5758 static int
5759 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5760 {
5761 unsigned int ccvt;
5762
5763 /* None of the 64-bit ABIs pop arguments. */
5764 if (TARGET_64BIT)
5765 return 0;
5766
5767 ccvt = ix86_get_callcvt (funtype);
5768
5769 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
5770 | IX86_CALLCVT_THISCALL)) != 0
5771 && ! stdarg_p (funtype))
5772 return size;
5773
5774 /* Lose any fake structure return argument if it is passed on the stack. */
5775 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5776 && !ix86_keep_aggregate_return_pointer (funtype))
5777 {
5778 int nregs = ix86_function_regparm (funtype, fundecl);
5779 if (nregs == 0)
5780 return GET_MODE_SIZE (Pmode);
5781 }
5782
5783 return 0;
5784 }
5785
5786 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
5787
5788 static bool
5789 ix86_legitimate_combined_insn (rtx insn)
5790 {
5791 /* Check operand constraints in case hard registers were propagated
5792 into insn pattern. This check prevents combine pass from
5793 generating insn patterns with invalid hard register operands.
5794 These invalid insns can eventually confuse reload to error out
5795 with a spill failure. See also PRs 46829 and 46843. */
5796 if ((INSN_CODE (insn) = recog (PATTERN (insn), insn, 0)) >= 0)
5797 {
5798 int i;
5799
5800 extract_insn (insn);
5801 preprocess_constraints ();
5802
5803 for (i = 0; i < recog_data.n_operands; i++)
5804 {
5805 rtx op = recog_data.operand[i];
5806 enum machine_mode mode = GET_MODE (op);
5807 struct operand_alternative *op_alt;
5808 int offset = 0;
5809 bool win;
5810 int j;
5811
5812 /* For pre-AVX disallow unaligned loads/stores where the
5813 instructions don't support it. */
5814 if (!TARGET_AVX
5815 && VECTOR_MODE_P (GET_MODE (op))
5816 && misaligned_operand (op, GET_MODE (op)))
5817 {
5818 int min_align = get_attr_ssememalign (insn);
5819 if (min_align == 0)
5820 return false;
5821 }
5822
5823 /* A unary operator may be accepted by the predicate, but it
5824 is irrelevant for matching constraints. */
5825 if (UNARY_P (op))
5826 op = XEXP (op, 0);
5827
5828 if (GET_CODE (op) == SUBREG)
5829 {
5830 if (REG_P (SUBREG_REG (op))
5831 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
5832 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
5833 GET_MODE (SUBREG_REG (op)),
5834 SUBREG_BYTE (op),
5835 GET_MODE (op));
5836 op = SUBREG_REG (op);
5837 }
5838
5839 if (!(REG_P (op) && HARD_REGISTER_P (op)))
5840 continue;
5841
5842 op_alt = recog_op_alt[i];
5843
5844 /* Operand has no constraints, anything is OK. */
5845 win = !recog_data.n_alternatives;
5846
5847 for (j = 0; j < recog_data.n_alternatives; j++)
5848 {
5849 if (op_alt[j].anything_ok
5850 || (op_alt[j].matches != -1
5851 && operands_match_p
5852 (recog_data.operand[i],
5853 recog_data.operand[op_alt[j].matches]))
5854 || reg_fits_class_p (op, op_alt[j].cl, offset, mode))
5855 {
5856 win = true;
5857 break;
5858 }
5859 }
5860
5861 if (!win)
5862 return false;
5863 }
5864 }
5865
5866 return true;
5867 }
5868 \f
5869 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
5870
5871 static unsigned HOST_WIDE_INT
5872 ix86_asan_shadow_offset (void)
5873 {
5874 return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44)
5875 : HOST_WIDE_INT_C (0x7fff8000))
5876 : (HOST_WIDE_INT_1 << 29);
5877 }
5878 \f
5879 /* Argument support functions. */
5880
5881 /* Return true when register may be used to pass function parameters. */
5882 bool
5883 ix86_function_arg_regno_p (int regno)
5884 {
5885 int i;
5886 const int *parm_regs;
5887
5888 if (!TARGET_64BIT)
5889 {
5890 if (TARGET_MACHO)
5891 return (regno < REGPARM_MAX
5892 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
5893 else
5894 return (regno < REGPARM_MAX
5895 || (TARGET_MMX && MMX_REGNO_P (regno)
5896 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
5897 || (TARGET_SSE && SSE_REGNO_P (regno)
5898 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
5899 }
5900
5901 if (TARGET_SSE && SSE_REGNO_P (regno)
5902 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
5903 return true;
5904
5905 /* TODO: The function should depend on current function ABI but
5906 builtins.c would need updating then. Therefore we use the
5907 default ABI. */
5908
5909 /* RAX is used as hidden argument to va_arg functions. */
5910 if (ix86_abi == SYSV_ABI && regno == AX_REG)
5911 return true;
5912
5913 if (ix86_abi == MS_ABI)
5914 parm_regs = x86_64_ms_abi_int_parameter_registers;
5915 else
5916 parm_regs = x86_64_int_parameter_registers;
5917 for (i = 0; i < (ix86_abi == MS_ABI
5918 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
5919 if (regno == parm_regs[i])
5920 return true;
5921 return false;
5922 }
5923
5924 /* Return if we do not know how to pass TYPE solely in registers. */
5925
5926 static bool
5927 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
5928 {
5929 if (must_pass_in_stack_var_size_or_pad (mode, type))
5930 return true;
5931
5932 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5933 The layout_type routine is crafty and tries to trick us into passing
5934 currently unsupported vector types on the stack by using TImode. */
5935 return (!TARGET_64BIT && mode == TImode
5936 && type && TREE_CODE (type) != VECTOR_TYPE);
5937 }
5938
5939 /* It returns the size, in bytes, of the area reserved for arguments passed
5940 in registers for the function represented by fndecl dependent to the used
5941 abi format. */
5942 int
5943 ix86_reg_parm_stack_space (const_tree fndecl)
5944 {
5945 enum calling_abi call_abi = SYSV_ABI;
5946 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
5947 call_abi = ix86_function_abi (fndecl);
5948 else
5949 call_abi = ix86_function_type_abi (fndecl);
5950 if (TARGET_64BIT && call_abi == MS_ABI)
5951 return 32;
5952 return 0;
5953 }
5954
5955 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5956 call abi used. */
5957 enum calling_abi
5958 ix86_function_type_abi (const_tree fntype)
5959 {
5960 if (fntype != NULL_TREE && TYPE_ATTRIBUTES (fntype) != NULL_TREE)
5961 {
5962 enum calling_abi abi = ix86_abi;
5963 if (abi == SYSV_ABI)
5964 {
5965 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
5966 abi = MS_ABI;
5967 }
5968 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
5969 abi = SYSV_ABI;
5970 return abi;
5971 }
5972 return ix86_abi;
5973 }
5974
5975 /* We add this as a workaround in order to use libc_has_function
5976 hook in i386.md. */
5977 bool
5978 ix86_libc_has_function (enum function_class fn_class)
5979 {
5980 return targetm.libc_has_function (fn_class);
5981 }
5982
5983 static bool
5984 ix86_function_ms_hook_prologue (const_tree fn)
5985 {
5986 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
5987 {
5988 if (decl_function_context (fn) != NULL_TREE)
5989 error_at (DECL_SOURCE_LOCATION (fn),
5990 "ms_hook_prologue is not compatible with nested function");
5991 else
5992 return true;
5993 }
5994 return false;
5995 }
5996
5997 static enum calling_abi
5998 ix86_function_abi (const_tree fndecl)
5999 {
6000 if (! fndecl)
6001 return ix86_abi;
6002 return ix86_function_type_abi (TREE_TYPE (fndecl));
6003 }
6004
6005 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
6006 call abi used. */
6007 enum calling_abi
6008 ix86_cfun_abi (void)
6009 {
6010 if (! cfun)
6011 return ix86_abi;
6012 return cfun->machine->call_abi;
6013 }
6014
6015 /* Write the extra assembler code needed to declare a function properly. */
6016
6017 void
6018 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
6019 tree decl)
6020 {
6021 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
6022
6023 if (is_ms_hook)
6024 {
6025 int i, filler_count = (TARGET_64BIT ? 32 : 16);
6026 unsigned int filler_cc = 0xcccccccc;
6027
6028 for (i = 0; i < filler_count; i += 4)
6029 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
6030 }
6031
6032 #ifdef SUBTARGET_ASM_UNWIND_INIT
6033 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
6034 #endif
6035
6036 ASM_OUTPUT_LABEL (asm_out_file, fname);
6037
6038 /* Output magic byte marker, if hot-patch attribute is set. */
6039 if (is_ms_hook)
6040 {
6041 if (TARGET_64BIT)
6042 {
6043 /* leaq [%rsp + 0], %rsp */
6044 asm_fprintf (asm_out_file, ASM_BYTE
6045 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
6046 }
6047 else
6048 {
6049 /* movl.s %edi, %edi
6050 push %ebp
6051 movl.s %esp, %ebp */
6052 asm_fprintf (asm_out_file, ASM_BYTE
6053 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
6054 }
6055 }
6056 }
6057
6058 /* regclass.c */
6059 extern void init_regs (void);
6060
6061 /* Implementation of call abi switching target hook. Specific to FNDECL
6062 the specific call register sets are set. See also
6063 ix86_conditional_register_usage for more details. */
6064 void
6065 ix86_call_abi_override (const_tree fndecl)
6066 {
6067 if (fndecl == NULL_TREE)
6068 cfun->machine->call_abi = ix86_abi;
6069 else
6070 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
6071 }
6072
6073 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
6074 expensive re-initialization of init_regs each time we switch function context
6075 since this is needed only during RTL expansion. */
6076 static void
6077 ix86_maybe_switch_abi (void)
6078 {
6079 if (TARGET_64BIT &&
6080 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
6081 reinit_regs ();
6082 }
6083
6084 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6085 for a call to a function whose data type is FNTYPE.
6086 For a library call, FNTYPE is 0. */
6087
6088 void
6089 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
6090 tree fntype, /* tree ptr for function decl */
6091 rtx libname, /* SYMBOL_REF of library name or 0 */
6092 tree fndecl,
6093 int caller)
6094 {
6095 struct cgraph_local_info *i;
6096
6097 memset (cum, 0, sizeof (*cum));
6098
6099 if (fndecl)
6100 {
6101 i = cgraph_local_info (fndecl);
6102 cum->call_abi = ix86_function_abi (fndecl);
6103 }
6104 else
6105 {
6106 i = NULL;
6107 cum->call_abi = ix86_function_type_abi (fntype);
6108 }
6109
6110 cum->caller = caller;
6111
6112 /* Set up the number of registers to use for passing arguments. */
6113 cum->nregs = ix86_regparm;
6114 if (TARGET_64BIT)
6115 {
6116 cum->nregs = (cum->call_abi == SYSV_ABI
6117 ? X86_64_REGPARM_MAX
6118 : X86_64_MS_REGPARM_MAX);
6119 }
6120 if (TARGET_SSE)
6121 {
6122 cum->sse_nregs = SSE_REGPARM_MAX;
6123 if (TARGET_64BIT)
6124 {
6125 cum->sse_nregs = (cum->call_abi == SYSV_ABI
6126 ? X86_64_SSE_REGPARM_MAX
6127 : X86_64_MS_SSE_REGPARM_MAX);
6128 }
6129 }
6130 if (TARGET_MMX)
6131 cum->mmx_nregs = MMX_REGPARM_MAX;
6132 cum->warn_avx = true;
6133 cum->warn_sse = true;
6134 cum->warn_mmx = true;
6135
6136 /* Because type might mismatch in between caller and callee, we need to
6137 use actual type of function for local calls.
6138 FIXME: cgraph_analyze can be told to actually record if function uses
6139 va_start so for local functions maybe_vaarg can be made aggressive
6140 helping K&R code.
6141 FIXME: once typesytem is fixed, we won't need this code anymore. */
6142 if (i && i->local && i->can_change_signature)
6143 fntype = TREE_TYPE (fndecl);
6144 cum->maybe_vaarg = (fntype
6145 ? (!prototype_p (fntype) || stdarg_p (fntype))
6146 : !libname);
6147
6148 if (!TARGET_64BIT)
6149 {
6150 /* If there are variable arguments, then we won't pass anything
6151 in registers in 32-bit mode. */
6152 if (stdarg_p (fntype))
6153 {
6154 cum->nregs = 0;
6155 cum->sse_nregs = 0;
6156 cum->mmx_nregs = 0;
6157 cum->warn_avx = 0;
6158 cum->warn_sse = 0;
6159 cum->warn_mmx = 0;
6160 return;
6161 }
6162
6163 /* Use ecx and edx registers if function has fastcall attribute,
6164 else look for regparm information. */
6165 if (fntype)
6166 {
6167 unsigned int ccvt = ix86_get_callcvt (fntype);
6168 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
6169 {
6170 cum->nregs = 1;
6171 cum->fastcall = 1; /* Same first register as in fastcall. */
6172 }
6173 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
6174 {
6175 cum->nregs = 2;
6176 cum->fastcall = 1;
6177 }
6178 else
6179 cum->nregs = ix86_function_regparm (fntype, fndecl);
6180 }
6181
6182 /* Set up the number of SSE registers used for passing SFmode
6183 and DFmode arguments. Warn for mismatching ABI. */
6184 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
6185 }
6186 }
6187
6188 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
6189 But in the case of vector types, it is some vector mode.
6190
6191 When we have only some of our vector isa extensions enabled, then there
6192 are some modes for which vector_mode_supported_p is false. For these
6193 modes, the generic vector support in gcc will choose some non-vector mode
6194 in order to implement the type. By computing the natural mode, we'll
6195 select the proper ABI location for the operand and not depend on whatever
6196 the middle-end decides to do with these vector types.
6197
6198 The midde-end can't deal with the vector types > 16 bytes. In this
6199 case, we return the original mode and warn ABI change if CUM isn't
6200 NULL.
6201
6202 If INT_RETURN is true, warn ABI change if the vector mode isn't
6203 available for function return value. */
6204
6205 static enum machine_mode
6206 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
6207 bool in_return)
6208 {
6209 enum machine_mode mode = TYPE_MODE (type);
6210
6211 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
6212 {
6213 HOST_WIDE_INT size = int_size_in_bytes (type);
6214 if ((size == 8 || size == 16 || size == 32)
6215 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
6216 && TYPE_VECTOR_SUBPARTS (type) > 1)
6217 {
6218 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
6219
6220 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6221 mode = MIN_MODE_VECTOR_FLOAT;
6222 else
6223 mode = MIN_MODE_VECTOR_INT;
6224
6225 /* Get the mode which has this inner mode and number of units. */
6226 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
6227 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
6228 && GET_MODE_INNER (mode) == innermode)
6229 {
6230 if (size == 32 && !TARGET_AVX)
6231 {
6232 static bool warnedavx;
6233 static bool warnedavx_ret;
6234
6235 if (cum
6236 && !warnedavx
6237 && cum->warn_avx)
6238 {
6239 warnedavx = true;
6240 warning (0, "AVX vector argument without AVX "
6241 "enabled changes the ABI");
6242 }
6243 else if (in_return & !warnedavx_ret)
6244 {
6245 warnedavx_ret = true;
6246 warning (0, "AVX vector return without AVX "
6247 "enabled changes the ABI");
6248 }
6249
6250 return TYPE_MODE (type);
6251 }
6252 else if (((size == 8 && TARGET_64BIT) || size == 16)
6253 && !TARGET_SSE)
6254 {
6255 static bool warnedsse;
6256 static bool warnedsse_ret;
6257
6258 if (cum
6259 && !warnedsse
6260 && cum->warn_sse)
6261 {
6262 warnedsse = true;
6263 warning (0, "SSE vector argument without SSE "
6264 "enabled changes the ABI");
6265 }
6266 else if (!TARGET_64BIT
6267 && in_return
6268 & !warnedsse_ret)
6269 {
6270 warnedsse_ret = true;
6271 warning (0, "SSE vector return without SSE "
6272 "enabled changes the ABI");
6273 }
6274 }
6275 else if ((size == 8 && !TARGET_64BIT) && !TARGET_MMX)
6276 {
6277 static bool warnedmmx;
6278 static bool warnedmmx_ret;
6279
6280 if (cum
6281 && !warnedmmx
6282 && cum->warn_mmx)
6283 {
6284 warnedmmx = true;
6285 warning (0, "MMX vector argument without MMX "
6286 "enabled changes the ABI");
6287 }
6288 else if (in_return & !warnedmmx_ret)
6289 {
6290 warnedmmx_ret = true;
6291 warning (0, "MMX vector return without MMX "
6292 "enabled changes the ABI");
6293 }
6294 }
6295 return mode;
6296 }
6297
6298 gcc_unreachable ();
6299 }
6300 }
6301
6302 return mode;
6303 }
6304
6305 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
6306 this may not agree with the mode that the type system has chosen for the
6307 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
6308 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
6309
6310 static rtx
6311 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
6312 unsigned int regno)
6313 {
6314 rtx tmp;
6315
6316 if (orig_mode != BLKmode)
6317 tmp = gen_rtx_REG (orig_mode, regno);
6318 else
6319 {
6320 tmp = gen_rtx_REG (mode, regno);
6321 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
6322 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
6323 }
6324
6325 return tmp;
6326 }
6327
6328 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
6329 of this code is to classify each 8bytes of incoming argument by the register
6330 class and assign registers accordingly. */
6331
6332 /* Return the union class of CLASS1 and CLASS2.
6333 See the x86-64 PS ABI for details. */
6334
6335 static enum x86_64_reg_class
6336 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
6337 {
6338 /* Rule #1: If both classes are equal, this is the resulting class. */
6339 if (class1 == class2)
6340 return class1;
6341
6342 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
6343 the other class. */
6344 if (class1 == X86_64_NO_CLASS)
6345 return class2;
6346 if (class2 == X86_64_NO_CLASS)
6347 return class1;
6348
6349 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
6350 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
6351 return X86_64_MEMORY_CLASS;
6352
6353 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
6354 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
6355 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
6356 return X86_64_INTEGERSI_CLASS;
6357 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
6358 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
6359 return X86_64_INTEGER_CLASS;
6360
6361 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
6362 MEMORY is used. */
6363 if (class1 == X86_64_X87_CLASS
6364 || class1 == X86_64_X87UP_CLASS
6365 || class1 == X86_64_COMPLEX_X87_CLASS
6366 || class2 == X86_64_X87_CLASS
6367 || class2 == X86_64_X87UP_CLASS
6368 || class2 == X86_64_COMPLEX_X87_CLASS)
6369 return X86_64_MEMORY_CLASS;
6370
6371 /* Rule #6: Otherwise class SSE is used. */
6372 return X86_64_SSE_CLASS;
6373 }
6374
6375 /* Classify the argument of type TYPE and mode MODE.
6376 CLASSES will be filled by the register class used to pass each word
6377 of the operand. The number of words is returned. In case the parameter
6378 should be passed in memory, 0 is returned. As a special case for zero
6379 sized containers, classes[0] will be NO_CLASS and 1 is returned.
6380
6381 BIT_OFFSET is used internally for handling records and specifies offset
6382 of the offset in bits modulo 512 to avoid overflow cases.
6383
6384 See the x86-64 PS ABI for details.
6385 */
6386
6387 static int
6388 classify_argument (enum machine_mode mode, const_tree type,
6389 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
6390 {
6391 HOST_WIDE_INT bytes =
6392 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6393 int words
6394 = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6395
6396 /* Variable sized entities are always passed/returned in memory. */
6397 if (bytes < 0)
6398 return 0;
6399
6400 if (mode != VOIDmode
6401 && targetm.calls.must_pass_in_stack (mode, type))
6402 return 0;
6403
6404 if (type && AGGREGATE_TYPE_P (type))
6405 {
6406 int i;
6407 tree field;
6408 enum x86_64_reg_class subclasses[MAX_CLASSES];
6409
6410 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
6411 if (bytes > 32)
6412 return 0;
6413
6414 for (i = 0; i < words; i++)
6415 classes[i] = X86_64_NO_CLASS;
6416
6417 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
6418 signalize memory class, so handle it as special case. */
6419 if (!words)
6420 {
6421 classes[0] = X86_64_NO_CLASS;
6422 return 1;
6423 }
6424
6425 /* Classify each field of record and merge classes. */
6426 switch (TREE_CODE (type))
6427 {
6428 case RECORD_TYPE:
6429 /* And now merge the fields of structure. */
6430 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6431 {
6432 if (TREE_CODE (field) == FIELD_DECL)
6433 {
6434 int num;
6435
6436 if (TREE_TYPE (field) == error_mark_node)
6437 continue;
6438
6439 /* Bitfields are always classified as integer. Handle them
6440 early, since later code would consider them to be
6441 misaligned integers. */
6442 if (DECL_BIT_FIELD (field))
6443 {
6444 for (i = (int_bit_position (field)
6445 + (bit_offset % 64)) / 8 / 8;
6446 i < ((int_bit_position (field) + (bit_offset % 64))
6447 + tree_to_shwi (DECL_SIZE (field))
6448 + 63) / 8 / 8; i++)
6449 classes[i] =
6450 merge_classes (X86_64_INTEGER_CLASS,
6451 classes[i]);
6452 }
6453 else
6454 {
6455 int pos;
6456
6457 type = TREE_TYPE (field);
6458
6459 /* Flexible array member is ignored. */
6460 if (TYPE_MODE (type) == BLKmode
6461 && TREE_CODE (type) == ARRAY_TYPE
6462 && TYPE_SIZE (type) == NULL_TREE
6463 && TYPE_DOMAIN (type) != NULL_TREE
6464 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
6465 == NULL_TREE))
6466 {
6467 static bool warned;
6468
6469 if (!warned && warn_psabi)
6470 {
6471 warned = true;
6472 inform (input_location,
6473 "the ABI of passing struct with"
6474 " a flexible array member has"
6475 " changed in GCC 4.4");
6476 }
6477 continue;
6478 }
6479 num = classify_argument (TYPE_MODE (type), type,
6480 subclasses,
6481 (int_bit_position (field)
6482 + bit_offset) % 512);
6483 if (!num)
6484 return 0;
6485 pos = (int_bit_position (field)
6486 + (bit_offset % 64)) / 8 / 8;
6487 for (i = 0; i < num && (i + pos) < words; i++)
6488 classes[i + pos] =
6489 merge_classes (subclasses[i], classes[i + pos]);
6490 }
6491 }
6492 }
6493 break;
6494
6495 case ARRAY_TYPE:
6496 /* Arrays are handled as small records. */
6497 {
6498 int num;
6499 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
6500 TREE_TYPE (type), subclasses, bit_offset);
6501 if (!num)
6502 return 0;
6503
6504 /* The partial classes are now full classes. */
6505 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
6506 subclasses[0] = X86_64_SSE_CLASS;
6507 if (subclasses[0] == X86_64_INTEGERSI_CLASS
6508 && !((bit_offset % 64) == 0 && bytes == 4))
6509 subclasses[0] = X86_64_INTEGER_CLASS;
6510
6511 for (i = 0; i < words; i++)
6512 classes[i] = subclasses[i % num];
6513
6514 break;
6515 }
6516 case UNION_TYPE:
6517 case QUAL_UNION_TYPE:
6518 /* Unions are similar to RECORD_TYPE but offset is always 0.
6519 */
6520 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6521 {
6522 if (TREE_CODE (field) == FIELD_DECL)
6523 {
6524 int num;
6525
6526 if (TREE_TYPE (field) == error_mark_node)
6527 continue;
6528
6529 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
6530 TREE_TYPE (field), subclasses,
6531 bit_offset);
6532 if (!num)
6533 return 0;
6534 for (i = 0; i < num; i++)
6535 classes[i] = merge_classes (subclasses[i], classes[i]);
6536 }
6537 }
6538 break;
6539
6540 default:
6541 gcc_unreachable ();
6542 }
6543
6544 if (words > 2)
6545 {
6546 /* When size > 16 bytes, if the first one isn't
6547 X86_64_SSE_CLASS or any other ones aren't
6548 X86_64_SSEUP_CLASS, everything should be passed in
6549 memory. */
6550 if (classes[0] != X86_64_SSE_CLASS)
6551 return 0;
6552
6553 for (i = 1; i < words; i++)
6554 if (classes[i] != X86_64_SSEUP_CLASS)
6555 return 0;
6556 }
6557
6558 /* Final merger cleanup. */
6559 for (i = 0; i < words; i++)
6560 {
6561 /* If one class is MEMORY, everything should be passed in
6562 memory. */
6563 if (classes[i] == X86_64_MEMORY_CLASS)
6564 return 0;
6565
6566 /* The X86_64_SSEUP_CLASS should be always preceded by
6567 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6568 if (classes[i] == X86_64_SSEUP_CLASS
6569 && classes[i - 1] != X86_64_SSE_CLASS
6570 && classes[i - 1] != X86_64_SSEUP_CLASS)
6571 {
6572 /* The first one should never be X86_64_SSEUP_CLASS. */
6573 gcc_assert (i != 0);
6574 classes[i] = X86_64_SSE_CLASS;
6575 }
6576
6577 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6578 everything should be passed in memory. */
6579 if (classes[i] == X86_64_X87UP_CLASS
6580 && (classes[i - 1] != X86_64_X87_CLASS))
6581 {
6582 static bool warned;
6583
6584 /* The first one should never be X86_64_X87UP_CLASS. */
6585 gcc_assert (i != 0);
6586 if (!warned && warn_psabi)
6587 {
6588 warned = true;
6589 inform (input_location,
6590 "the ABI of passing union with long double"
6591 " has changed in GCC 4.4");
6592 }
6593 return 0;
6594 }
6595 }
6596 return words;
6597 }
6598
6599 /* Compute alignment needed. We align all types to natural boundaries with
6600 exception of XFmode that is aligned to 64bits. */
6601 if (mode != VOIDmode && mode != BLKmode)
6602 {
6603 int mode_alignment = GET_MODE_BITSIZE (mode);
6604
6605 if (mode == XFmode)
6606 mode_alignment = 128;
6607 else if (mode == XCmode)
6608 mode_alignment = 256;
6609 if (COMPLEX_MODE_P (mode))
6610 mode_alignment /= 2;
6611 /* Misaligned fields are always returned in memory. */
6612 if (bit_offset % mode_alignment)
6613 return 0;
6614 }
6615
6616 /* for V1xx modes, just use the base mode */
6617 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
6618 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
6619 mode = GET_MODE_INNER (mode);
6620
6621 /* Classification of atomic types. */
6622 switch (mode)
6623 {
6624 case SDmode:
6625 case DDmode:
6626 classes[0] = X86_64_SSE_CLASS;
6627 return 1;
6628 case TDmode:
6629 classes[0] = X86_64_SSE_CLASS;
6630 classes[1] = X86_64_SSEUP_CLASS;
6631 return 2;
6632 case DImode:
6633 case SImode:
6634 case HImode:
6635 case QImode:
6636 case CSImode:
6637 case CHImode:
6638 case CQImode:
6639 {
6640 int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
6641
6642 /* Analyze last 128 bits only. */
6643 size = (size - 1) & 0x7f;
6644
6645 if (size < 32)
6646 {
6647 classes[0] = X86_64_INTEGERSI_CLASS;
6648 return 1;
6649 }
6650 else if (size < 64)
6651 {
6652 classes[0] = X86_64_INTEGER_CLASS;
6653 return 1;
6654 }
6655 else if (size < 64+32)
6656 {
6657 classes[0] = X86_64_INTEGER_CLASS;
6658 classes[1] = X86_64_INTEGERSI_CLASS;
6659 return 2;
6660 }
6661 else if (size < 64+64)
6662 {
6663 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6664 return 2;
6665 }
6666 else
6667 gcc_unreachable ();
6668 }
6669 case CDImode:
6670 case TImode:
6671 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6672 return 2;
6673 case COImode:
6674 case OImode:
6675 /* OImode shouldn't be used directly. */
6676 gcc_unreachable ();
6677 case CTImode:
6678 return 0;
6679 case SFmode:
6680 if (!(bit_offset % 64))
6681 classes[0] = X86_64_SSESF_CLASS;
6682 else
6683 classes[0] = X86_64_SSE_CLASS;
6684 return 1;
6685 case DFmode:
6686 classes[0] = X86_64_SSEDF_CLASS;
6687 return 1;
6688 case XFmode:
6689 classes[0] = X86_64_X87_CLASS;
6690 classes[1] = X86_64_X87UP_CLASS;
6691 return 2;
6692 case TFmode:
6693 classes[0] = X86_64_SSE_CLASS;
6694 classes[1] = X86_64_SSEUP_CLASS;
6695 return 2;
6696 case SCmode:
6697 classes[0] = X86_64_SSE_CLASS;
6698 if (!(bit_offset % 64))
6699 return 1;
6700 else
6701 {
6702 static bool warned;
6703
6704 if (!warned && warn_psabi)
6705 {
6706 warned = true;
6707 inform (input_location,
6708 "the ABI of passing structure with complex float"
6709 " member has changed in GCC 4.4");
6710 }
6711 classes[1] = X86_64_SSESF_CLASS;
6712 return 2;
6713 }
6714 case DCmode:
6715 classes[0] = X86_64_SSEDF_CLASS;
6716 classes[1] = X86_64_SSEDF_CLASS;
6717 return 2;
6718 case XCmode:
6719 classes[0] = X86_64_COMPLEX_X87_CLASS;
6720 return 1;
6721 case TCmode:
6722 /* This modes is larger than 16 bytes. */
6723 return 0;
6724 case V8SFmode:
6725 case V8SImode:
6726 case V32QImode:
6727 case V16HImode:
6728 case V4DFmode:
6729 case V4DImode:
6730 classes[0] = X86_64_SSE_CLASS;
6731 classes[1] = X86_64_SSEUP_CLASS;
6732 classes[2] = X86_64_SSEUP_CLASS;
6733 classes[3] = X86_64_SSEUP_CLASS;
6734 return 4;
6735 case V8DFmode:
6736 case V16SFmode:
6737 case V8DImode:
6738 case V16SImode:
6739 case V32HImode:
6740 case V64QImode:
6741 classes[0] = X86_64_SSE_CLASS;
6742 classes[1] = X86_64_SSEUP_CLASS;
6743 classes[2] = X86_64_SSEUP_CLASS;
6744 classes[3] = X86_64_SSEUP_CLASS;
6745 classes[4] = X86_64_SSEUP_CLASS;
6746 classes[5] = X86_64_SSEUP_CLASS;
6747 classes[6] = X86_64_SSEUP_CLASS;
6748 classes[7] = X86_64_SSEUP_CLASS;
6749 return 8;
6750 case V4SFmode:
6751 case V4SImode:
6752 case V16QImode:
6753 case V8HImode:
6754 case V2DFmode:
6755 case V2DImode:
6756 classes[0] = X86_64_SSE_CLASS;
6757 classes[1] = X86_64_SSEUP_CLASS;
6758 return 2;
6759 case V1TImode:
6760 case V1DImode:
6761 case V2SFmode:
6762 case V2SImode:
6763 case V4HImode:
6764 case V8QImode:
6765 classes[0] = X86_64_SSE_CLASS;
6766 return 1;
6767 case BLKmode:
6768 case VOIDmode:
6769 return 0;
6770 default:
6771 gcc_assert (VECTOR_MODE_P (mode));
6772
6773 if (bytes > 16)
6774 return 0;
6775
6776 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
6777
6778 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
6779 classes[0] = X86_64_INTEGERSI_CLASS;
6780 else
6781 classes[0] = X86_64_INTEGER_CLASS;
6782 classes[1] = X86_64_INTEGER_CLASS;
6783 return 1 + (bytes > 8);
6784 }
6785 }
6786
6787 /* Examine the argument and return set number of register required in each
6788 class. Return 0 iff parameter should be passed in memory. */
6789 static int
6790 examine_argument (enum machine_mode mode, const_tree type, int in_return,
6791 int *int_nregs, int *sse_nregs)
6792 {
6793 enum x86_64_reg_class regclass[MAX_CLASSES];
6794 int n = classify_argument (mode, type, regclass, 0);
6795
6796 *int_nregs = 0;
6797 *sse_nregs = 0;
6798 if (!n)
6799 return 0;
6800 for (n--; n >= 0; n--)
6801 switch (regclass[n])
6802 {
6803 case X86_64_INTEGER_CLASS:
6804 case X86_64_INTEGERSI_CLASS:
6805 (*int_nregs)++;
6806 break;
6807 case X86_64_SSE_CLASS:
6808 case X86_64_SSESF_CLASS:
6809 case X86_64_SSEDF_CLASS:
6810 (*sse_nregs)++;
6811 break;
6812 case X86_64_NO_CLASS:
6813 case X86_64_SSEUP_CLASS:
6814 break;
6815 case X86_64_X87_CLASS:
6816 case X86_64_X87UP_CLASS:
6817 if (!in_return)
6818 return 0;
6819 break;
6820 case X86_64_COMPLEX_X87_CLASS:
6821 return in_return ? 2 : 0;
6822 case X86_64_MEMORY_CLASS:
6823 gcc_unreachable ();
6824 }
6825 return 1;
6826 }
6827
6828 /* Construct container for the argument used by GCC interface. See
6829 FUNCTION_ARG for the detailed description. */
6830
6831 static rtx
6832 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
6833 const_tree type, int in_return, int nintregs, int nsseregs,
6834 const int *intreg, int sse_regno)
6835 {
6836 /* The following variables hold the static issued_error state. */
6837 static bool issued_sse_arg_error;
6838 static bool issued_sse_ret_error;
6839 static bool issued_x87_ret_error;
6840
6841 enum machine_mode tmpmode;
6842 int bytes =
6843 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6844 enum x86_64_reg_class regclass[MAX_CLASSES];
6845 int n;
6846 int i;
6847 int nexps = 0;
6848 int needed_sseregs, needed_intregs;
6849 rtx exp[MAX_CLASSES];
6850 rtx ret;
6851
6852 n = classify_argument (mode, type, regclass, 0);
6853 if (!n)
6854 return NULL;
6855 if (!examine_argument (mode, type, in_return, &needed_intregs,
6856 &needed_sseregs))
6857 return NULL;
6858 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
6859 return NULL;
6860
6861 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6862 some less clueful developer tries to use floating-point anyway. */
6863 if (needed_sseregs && !TARGET_SSE)
6864 {
6865 if (in_return)
6866 {
6867 if (!issued_sse_ret_error)
6868 {
6869 error ("SSE register return with SSE disabled");
6870 issued_sse_ret_error = true;
6871 }
6872 }
6873 else if (!issued_sse_arg_error)
6874 {
6875 error ("SSE register argument with SSE disabled");
6876 issued_sse_arg_error = true;
6877 }
6878 return NULL;
6879 }
6880
6881 /* Likewise, error if the ABI requires us to return values in the
6882 x87 registers and the user specified -mno-80387. */
6883 if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
6884 for (i = 0; i < n; i++)
6885 if (regclass[i] == X86_64_X87_CLASS
6886 || regclass[i] == X86_64_X87UP_CLASS
6887 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
6888 {
6889 if (!issued_x87_ret_error)
6890 {
6891 error ("x87 register return with x87 disabled");
6892 issued_x87_ret_error = true;
6893 }
6894 return NULL;
6895 }
6896
6897 /* First construct simple cases. Avoid SCmode, since we want to use
6898 single register to pass this type. */
6899 if (n == 1 && mode != SCmode)
6900 switch (regclass[0])
6901 {
6902 case X86_64_INTEGER_CLASS:
6903 case X86_64_INTEGERSI_CLASS:
6904 return gen_rtx_REG (mode, intreg[0]);
6905 case X86_64_SSE_CLASS:
6906 case X86_64_SSESF_CLASS:
6907 case X86_64_SSEDF_CLASS:
6908 if (mode != BLKmode)
6909 return gen_reg_or_parallel (mode, orig_mode,
6910 SSE_REGNO (sse_regno));
6911 break;
6912 case X86_64_X87_CLASS:
6913 case X86_64_COMPLEX_X87_CLASS:
6914 return gen_rtx_REG (mode, FIRST_STACK_REG);
6915 case X86_64_NO_CLASS:
6916 /* Zero sized array, struct or class. */
6917 return NULL;
6918 default:
6919 gcc_unreachable ();
6920 }
6921 if (n == 2
6922 && regclass[0] == X86_64_SSE_CLASS
6923 && regclass[1] == X86_64_SSEUP_CLASS
6924 && mode != BLKmode)
6925 return gen_reg_or_parallel (mode, orig_mode,
6926 SSE_REGNO (sse_regno));
6927 if (n == 4
6928 && regclass[0] == X86_64_SSE_CLASS
6929 && regclass[1] == X86_64_SSEUP_CLASS
6930 && regclass[2] == X86_64_SSEUP_CLASS
6931 && regclass[3] == X86_64_SSEUP_CLASS
6932 && mode != BLKmode)
6933 return gen_reg_or_parallel (mode, orig_mode,
6934 SSE_REGNO (sse_regno));
6935 if (n == 8
6936 && regclass[0] == X86_64_SSE_CLASS
6937 && regclass[1] == X86_64_SSEUP_CLASS
6938 && regclass[2] == X86_64_SSEUP_CLASS
6939 && regclass[3] == X86_64_SSEUP_CLASS
6940 && regclass[4] == X86_64_SSEUP_CLASS
6941 && regclass[5] == X86_64_SSEUP_CLASS
6942 && regclass[6] == X86_64_SSEUP_CLASS
6943 && regclass[7] == X86_64_SSEUP_CLASS
6944 && mode != BLKmode)
6945 return gen_reg_or_parallel (mode, orig_mode,
6946 SSE_REGNO (sse_regno));
6947 if (n == 2
6948 && regclass[0] == X86_64_X87_CLASS
6949 && regclass[1] == X86_64_X87UP_CLASS)
6950 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
6951
6952 if (n == 2
6953 && regclass[0] == X86_64_INTEGER_CLASS
6954 && regclass[1] == X86_64_INTEGER_CLASS
6955 && (mode == CDImode || mode == TImode || mode == TFmode)
6956 && intreg[0] + 1 == intreg[1])
6957 return gen_rtx_REG (mode, intreg[0]);
6958
6959 /* Otherwise figure out the entries of the PARALLEL. */
6960 for (i = 0; i < n; i++)
6961 {
6962 int pos;
6963
6964 switch (regclass[i])
6965 {
6966 case X86_64_NO_CLASS:
6967 break;
6968 case X86_64_INTEGER_CLASS:
6969 case X86_64_INTEGERSI_CLASS:
6970 /* Merge TImodes on aligned occasions here too. */
6971 if (i * 8 + 8 > bytes)
6972 tmpmode
6973 = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
6974 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
6975 tmpmode = SImode;
6976 else
6977 tmpmode = DImode;
6978 /* We've requested 24 bytes we
6979 don't have mode for. Use DImode. */
6980 if (tmpmode == BLKmode)
6981 tmpmode = DImode;
6982 exp [nexps++]
6983 = gen_rtx_EXPR_LIST (VOIDmode,
6984 gen_rtx_REG (tmpmode, *intreg),
6985 GEN_INT (i*8));
6986 intreg++;
6987 break;
6988 case X86_64_SSESF_CLASS:
6989 exp [nexps++]
6990 = gen_rtx_EXPR_LIST (VOIDmode,
6991 gen_rtx_REG (SFmode,
6992 SSE_REGNO (sse_regno)),
6993 GEN_INT (i*8));
6994 sse_regno++;
6995 break;
6996 case X86_64_SSEDF_CLASS:
6997 exp [nexps++]
6998 = gen_rtx_EXPR_LIST (VOIDmode,
6999 gen_rtx_REG (DFmode,
7000 SSE_REGNO (sse_regno)),
7001 GEN_INT (i*8));
7002 sse_regno++;
7003 break;
7004 case X86_64_SSE_CLASS:
7005 pos = i;
7006 switch (n)
7007 {
7008 case 1:
7009 tmpmode = DImode;
7010 break;
7011 case 2:
7012 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
7013 {
7014 tmpmode = TImode;
7015 i++;
7016 }
7017 else
7018 tmpmode = DImode;
7019 break;
7020 case 4:
7021 gcc_assert (i == 0
7022 && regclass[1] == X86_64_SSEUP_CLASS
7023 && regclass[2] == X86_64_SSEUP_CLASS
7024 && regclass[3] == X86_64_SSEUP_CLASS);
7025 tmpmode = OImode;
7026 i += 3;
7027 break;
7028 case 8:
7029 gcc_assert (i == 0
7030 && regclass[1] == X86_64_SSEUP_CLASS
7031 && regclass[2] == X86_64_SSEUP_CLASS
7032 && regclass[3] == X86_64_SSEUP_CLASS
7033 && regclass[4] == X86_64_SSEUP_CLASS
7034 && regclass[5] == X86_64_SSEUP_CLASS
7035 && regclass[6] == X86_64_SSEUP_CLASS
7036 && regclass[7] == X86_64_SSEUP_CLASS);
7037 tmpmode = XImode;
7038 i += 7;
7039 break;
7040 default:
7041 gcc_unreachable ();
7042 }
7043 exp [nexps++]
7044 = gen_rtx_EXPR_LIST (VOIDmode,
7045 gen_rtx_REG (tmpmode,
7046 SSE_REGNO (sse_regno)),
7047 GEN_INT (pos*8));
7048 sse_regno++;
7049 break;
7050 default:
7051 gcc_unreachable ();
7052 }
7053 }
7054
7055 /* Empty aligned struct, union or class. */
7056 if (nexps == 0)
7057 return NULL;
7058
7059 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
7060 for (i = 0; i < nexps; i++)
7061 XVECEXP (ret, 0, i) = exp [i];
7062 return ret;
7063 }
7064
7065 /* Update the data in CUM to advance over an argument of mode MODE
7066 and data type TYPE. (TYPE is null for libcalls where that information
7067 may not be available.) */
7068
7069 static void
7070 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7071 const_tree type, HOST_WIDE_INT bytes,
7072 HOST_WIDE_INT words)
7073 {
7074 switch (mode)
7075 {
7076 default:
7077 break;
7078
7079 case BLKmode:
7080 if (bytes < 0)
7081 break;
7082 /* FALLTHRU */
7083
7084 case DImode:
7085 case SImode:
7086 case HImode:
7087 case QImode:
7088 cum->words += words;
7089 cum->nregs -= words;
7090 cum->regno += words;
7091
7092 if (cum->nregs <= 0)
7093 {
7094 cum->nregs = 0;
7095 cum->regno = 0;
7096 }
7097 break;
7098
7099 case OImode:
7100 /* OImode shouldn't be used directly. */
7101 gcc_unreachable ();
7102
7103 case DFmode:
7104 if (cum->float_in_sse < 2)
7105 break;
7106 case SFmode:
7107 if (cum->float_in_sse < 1)
7108 break;
7109 /* FALLTHRU */
7110
7111 case V8SFmode:
7112 case V8SImode:
7113 case V64QImode:
7114 case V32HImode:
7115 case V16SImode:
7116 case V8DImode:
7117 case V16SFmode:
7118 case V8DFmode:
7119 case V32QImode:
7120 case V16HImode:
7121 case V4DFmode:
7122 case V4DImode:
7123 case TImode:
7124 case V16QImode:
7125 case V8HImode:
7126 case V4SImode:
7127 case V2DImode:
7128 case V4SFmode:
7129 case V2DFmode:
7130 if (!type || !AGGREGATE_TYPE_P (type))
7131 {
7132 cum->sse_words += words;
7133 cum->sse_nregs -= 1;
7134 cum->sse_regno += 1;
7135 if (cum->sse_nregs <= 0)
7136 {
7137 cum->sse_nregs = 0;
7138 cum->sse_regno = 0;
7139 }
7140 }
7141 break;
7142
7143 case V8QImode:
7144 case V4HImode:
7145 case V2SImode:
7146 case V2SFmode:
7147 case V1TImode:
7148 case V1DImode:
7149 if (!type || !AGGREGATE_TYPE_P (type))
7150 {
7151 cum->mmx_words += words;
7152 cum->mmx_nregs -= 1;
7153 cum->mmx_regno += 1;
7154 if (cum->mmx_nregs <= 0)
7155 {
7156 cum->mmx_nregs = 0;
7157 cum->mmx_regno = 0;
7158 }
7159 }
7160 break;
7161 }
7162 }
7163
7164 static void
7165 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7166 const_tree type, HOST_WIDE_INT words, bool named)
7167 {
7168 int int_nregs, sse_nregs;
7169
7170 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
7171 if (!named && (VALID_AVX512F_REG_MODE (mode)
7172 || VALID_AVX256_REG_MODE (mode)))
7173 return;
7174
7175 if (examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
7176 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
7177 {
7178 cum->nregs -= int_nregs;
7179 cum->sse_nregs -= sse_nregs;
7180 cum->regno += int_nregs;
7181 cum->sse_regno += sse_nregs;
7182 }
7183 else
7184 {
7185 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
7186 cum->words = (cum->words + align - 1) & ~(align - 1);
7187 cum->words += words;
7188 }
7189 }
7190
7191 static void
7192 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
7193 HOST_WIDE_INT words)
7194 {
7195 /* Otherwise, this should be passed indirect. */
7196 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
7197
7198 cum->words += words;
7199 if (cum->nregs > 0)
7200 {
7201 cum->nregs -= 1;
7202 cum->regno += 1;
7203 }
7204 }
7205
7206 /* Update the data in CUM to advance over an argument of mode MODE and
7207 data type TYPE. (TYPE is null for libcalls where that information
7208 may not be available.) */
7209
7210 static void
7211 ix86_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
7212 const_tree type, bool named)
7213 {
7214 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7215 HOST_WIDE_INT bytes, words;
7216
7217 if (mode == BLKmode)
7218 bytes = int_size_in_bytes (type);
7219 else
7220 bytes = GET_MODE_SIZE (mode);
7221 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7222
7223 if (type)
7224 mode = type_natural_mode (type, NULL, false);
7225
7226 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7227 function_arg_advance_ms_64 (cum, bytes, words);
7228 else if (TARGET_64BIT)
7229 function_arg_advance_64 (cum, mode, type, words, named);
7230 else
7231 function_arg_advance_32 (cum, mode, type, bytes, words);
7232 }
7233
7234 /* Define where to put the arguments to a function.
7235 Value is zero to push the argument on the stack,
7236 or a hard register in which to store the argument.
7237
7238 MODE is the argument's machine mode.
7239 TYPE is the data type of the argument (as a tree).
7240 This is null for libcalls where that information may
7241 not be available.
7242 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7243 the preceding args and about the function being called.
7244 NAMED is nonzero if this argument is a named parameter
7245 (otherwise it is an extra parameter matching an ellipsis). */
7246
7247 static rtx
7248 function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7249 enum machine_mode orig_mode, const_tree type,
7250 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
7251 {
7252 /* Avoid the AL settings for the Unix64 ABI. */
7253 if (mode == VOIDmode)
7254 return constm1_rtx;
7255
7256 switch (mode)
7257 {
7258 default:
7259 break;
7260
7261 case BLKmode:
7262 if (bytes < 0)
7263 break;
7264 /* FALLTHRU */
7265 case DImode:
7266 case SImode:
7267 case HImode:
7268 case QImode:
7269 if (words <= cum->nregs)
7270 {
7271 int regno = cum->regno;
7272
7273 /* Fastcall allocates the first two DWORD (SImode) or
7274 smaller arguments to ECX and EDX if it isn't an
7275 aggregate type . */
7276 if (cum->fastcall)
7277 {
7278 if (mode == BLKmode
7279 || mode == DImode
7280 || (type && AGGREGATE_TYPE_P (type)))
7281 break;
7282
7283 /* ECX not EAX is the first allocated register. */
7284 if (regno == AX_REG)
7285 regno = CX_REG;
7286 }
7287 return gen_rtx_REG (mode, regno);
7288 }
7289 break;
7290
7291 case DFmode:
7292 if (cum->float_in_sse < 2)
7293 break;
7294 case SFmode:
7295 if (cum->float_in_sse < 1)
7296 break;
7297 /* FALLTHRU */
7298 case TImode:
7299 /* In 32bit, we pass TImode in xmm registers. */
7300 case V16QImode:
7301 case V8HImode:
7302 case V4SImode:
7303 case V2DImode:
7304 case V4SFmode:
7305 case V2DFmode:
7306 if (!type || !AGGREGATE_TYPE_P (type))
7307 {
7308 if (cum->sse_nregs)
7309 return gen_reg_or_parallel (mode, orig_mode,
7310 cum->sse_regno + FIRST_SSE_REG);
7311 }
7312 break;
7313
7314 case OImode:
7315 case XImode:
7316 /* OImode and XImode shouldn't be used directly. */
7317 gcc_unreachable ();
7318
7319 case V64QImode:
7320 case V32HImode:
7321 case V16SImode:
7322 case V8DImode:
7323 case V16SFmode:
7324 case V8DFmode:
7325 case V8SFmode:
7326 case V8SImode:
7327 case V32QImode:
7328 case V16HImode:
7329 case V4DFmode:
7330 case V4DImode:
7331 if (!type || !AGGREGATE_TYPE_P (type))
7332 {
7333 if (cum->sse_nregs)
7334 return gen_reg_or_parallel (mode, orig_mode,
7335 cum->sse_regno + FIRST_SSE_REG);
7336 }
7337 break;
7338
7339 case V8QImode:
7340 case V4HImode:
7341 case V2SImode:
7342 case V2SFmode:
7343 case V1TImode:
7344 case V1DImode:
7345 if (!type || !AGGREGATE_TYPE_P (type))
7346 {
7347 if (cum->mmx_nregs)
7348 return gen_reg_or_parallel (mode, orig_mode,
7349 cum->mmx_regno + FIRST_MMX_REG);
7350 }
7351 break;
7352 }
7353
7354 return NULL_RTX;
7355 }
7356
7357 static rtx
7358 function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7359 enum machine_mode orig_mode, const_tree type, bool named)
7360 {
7361 /* Handle a hidden AL argument containing number of registers
7362 for varargs x86-64 functions. */
7363 if (mode == VOIDmode)
7364 return GEN_INT (cum->maybe_vaarg
7365 ? (cum->sse_nregs < 0
7366 ? X86_64_SSE_REGPARM_MAX
7367 : cum->sse_regno)
7368 : -1);
7369
7370 switch (mode)
7371 {
7372 default:
7373 break;
7374
7375 case V8SFmode:
7376 case V8SImode:
7377 case V32QImode:
7378 case V16HImode:
7379 case V4DFmode:
7380 case V4DImode:
7381 case V16SFmode:
7382 case V16SImode:
7383 case V64QImode:
7384 case V32HImode:
7385 case V8DFmode:
7386 case V8DImode:
7387 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
7388 if (!named)
7389 return NULL;
7390 break;
7391 }
7392
7393 return construct_container (mode, orig_mode, type, 0, cum->nregs,
7394 cum->sse_nregs,
7395 &x86_64_int_parameter_registers [cum->regno],
7396 cum->sse_regno);
7397 }
7398
7399 static rtx
7400 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7401 enum machine_mode orig_mode, bool named,
7402 HOST_WIDE_INT bytes)
7403 {
7404 unsigned int regno;
7405
7406 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
7407 We use value of -2 to specify that current function call is MSABI. */
7408 if (mode == VOIDmode)
7409 return GEN_INT (-2);
7410
7411 /* If we've run out of registers, it goes on the stack. */
7412 if (cum->nregs == 0)
7413 return NULL_RTX;
7414
7415 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
7416
7417 /* Only floating point modes are passed in anything but integer regs. */
7418 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
7419 {
7420 if (named)
7421 regno = cum->regno + FIRST_SSE_REG;
7422 else
7423 {
7424 rtx t1, t2;
7425
7426 /* Unnamed floating parameters are passed in both the
7427 SSE and integer registers. */
7428 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
7429 t2 = gen_rtx_REG (mode, regno);
7430 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
7431 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
7432 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
7433 }
7434 }
7435 /* Handle aggregated types passed in register. */
7436 if (orig_mode == BLKmode)
7437 {
7438 if (bytes > 0 && bytes <= 8)
7439 mode = (bytes > 4 ? DImode : SImode);
7440 if (mode == BLKmode)
7441 mode = DImode;
7442 }
7443
7444 return gen_reg_or_parallel (mode, orig_mode, regno);
7445 }
7446
7447 /* Return where to put the arguments to a function.
7448 Return zero to push the argument on the stack, or a hard register in which to store the argument.
7449
7450 MODE is the argument's machine mode. TYPE is the data type of the
7451 argument. It is null for libcalls where that information may not be
7452 available. CUM gives information about the preceding args and about
7453 the function being called. NAMED is nonzero if this argument is a
7454 named parameter (otherwise it is an extra parameter matching an
7455 ellipsis). */
7456
7457 static rtx
7458 ix86_function_arg (cumulative_args_t cum_v, enum machine_mode omode,
7459 const_tree type, bool named)
7460 {
7461 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7462 enum machine_mode mode = omode;
7463 HOST_WIDE_INT bytes, words;
7464 rtx arg;
7465
7466 if (mode == BLKmode)
7467 bytes = int_size_in_bytes (type);
7468 else
7469 bytes = GET_MODE_SIZE (mode);
7470 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7471
7472 /* To simplify the code below, represent vector types with a vector mode
7473 even if MMX/SSE are not active. */
7474 if (type && TREE_CODE (type) == VECTOR_TYPE)
7475 mode = type_natural_mode (type, cum, false);
7476
7477 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7478 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
7479 else if (TARGET_64BIT)
7480 arg = function_arg_64 (cum, mode, omode, type, named);
7481 else
7482 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
7483
7484 return arg;
7485 }
7486
7487 /* A C expression that indicates when an argument must be passed by
7488 reference. If nonzero for an argument, a copy of that argument is
7489 made in memory and a pointer to the argument is passed instead of
7490 the argument itself. The pointer is passed in whatever way is
7491 appropriate for passing a pointer to that type. */
7492
7493 static bool
7494 ix86_pass_by_reference (cumulative_args_t cum_v, enum machine_mode mode,
7495 const_tree type, bool named ATTRIBUTE_UNUSED)
7496 {
7497 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7498
7499 /* See Windows x64 Software Convention. */
7500 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7501 {
7502 int msize = (int) GET_MODE_SIZE (mode);
7503 if (type)
7504 {
7505 /* Arrays are passed by reference. */
7506 if (TREE_CODE (type) == ARRAY_TYPE)
7507 return true;
7508
7509 if (AGGREGATE_TYPE_P (type))
7510 {
7511 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
7512 are passed by reference. */
7513 msize = int_size_in_bytes (type);
7514 }
7515 }
7516
7517 /* __m128 is passed by reference. */
7518 switch (msize) {
7519 case 1: case 2: case 4: case 8:
7520 break;
7521 default:
7522 return true;
7523 }
7524 }
7525 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
7526 return 1;
7527
7528 return 0;
7529 }
7530
7531 /* Return true when TYPE should be 128bit aligned for 32bit argument
7532 passing ABI. XXX: This function is obsolete and is only used for
7533 checking psABI compatibility with previous versions of GCC. */
7534
7535 static bool
7536 ix86_compat_aligned_value_p (const_tree type)
7537 {
7538 enum machine_mode mode = TYPE_MODE (type);
7539 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
7540 || mode == TDmode
7541 || mode == TFmode
7542 || mode == TCmode)
7543 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
7544 return true;
7545 if (TYPE_ALIGN (type) < 128)
7546 return false;
7547
7548 if (AGGREGATE_TYPE_P (type))
7549 {
7550 /* Walk the aggregates recursively. */
7551 switch (TREE_CODE (type))
7552 {
7553 case RECORD_TYPE:
7554 case UNION_TYPE:
7555 case QUAL_UNION_TYPE:
7556 {
7557 tree field;
7558
7559 /* Walk all the structure fields. */
7560 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7561 {
7562 if (TREE_CODE (field) == FIELD_DECL
7563 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
7564 return true;
7565 }
7566 break;
7567 }
7568
7569 case ARRAY_TYPE:
7570 /* Just for use if some languages passes arrays by value. */
7571 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
7572 return true;
7573 break;
7574
7575 default:
7576 gcc_unreachable ();
7577 }
7578 }
7579 return false;
7580 }
7581
7582 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
7583 XXX: This function is obsolete and is only used for checking psABI
7584 compatibility with previous versions of GCC. */
7585
7586 static unsigned int
7587 ix86_compat_function_arg_boundary (enum machine_mode mode,
7588 const_tree type, unsigned int align)
7589 {
7590 /* In 32bit, only _Decimal128 and __float128 are aligned to their
7591 natural boundaries. */
7592 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
7593 {
7594 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
7595 make an exception for SSE modes since these require 128bit
7596 alignment.
7597
7598 The handling here differs from field_alignment. ICC aligns MMX
7599 arguments to 4 byte boundaries, while structure fields are aligned
7600 to 8 byte boundaries. */
7601 if (!type)
7602 {
7603 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
7604 align = PARM_BOUNDARY;
7605 }
7606 else
7607 {
7608 if (!ix86_compat_aligned_value_p (type))
7609 align = PARM_BOUNDARY;
7610 }
7611 }
7612 if (align > BIGGEST_ALIGNMENT)
7613 align = BIGGEST_ALIGNMENT;
7614 return align;
7615 }
7616
7617 /* Return true when TYPE should be 128bit aligned for 32bit argument
7618 passing ABI. */
7619
7620 static bool
7621 ix86_contains_aligned_value_p (const_tree type)
7622 {
7623 enum machine_mode mode = TYPE_MODE (type);
7624
7625 if (mode == XFmode || mode == XCmode)
7626 return false;
7627
7628 if (TYPE_ALIGN (type) < 128)
7629 return false;
7630
7631 if (AGGREGATE_TYPE_P (type))
7632 {
7633 /* Walk the aggregates recursively. */
7634 switch (TREE_CODE (type))
7635 {
7636 case RECORD_TYPE:
7637 case UNION_TYPE:
7638 case QUAL_UNION_TYPE:
7639 {
7640 tree field;
7641
7642 /* Walk all the structure fields. */
7643 for (field = TYPE_FIELDS (type);
7644 field;
7645 field = DECL_CHAIN (field))
7646 {
7647 if (TREE_CODE (field) == FIELD_DECL
7648 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
7649 return true;
7650 }
7651 break;
7652 }
7653
7654 case ARRAY_TYPE:
7655 /* Just for use if some languages passes arrays by value. */
7656 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
7657 return true;
7658 break;
7659
7660 default:
7661 gcc_unreachable ();
7662 }
7663 }
7664 else
7665 return TYPE_ALIGN (type) >= 128;
7666
7667 return false;
7668 }
7669
7670 /* Gives the alignment boundary, in bits, of an argument with the
7671 specified mode and type. */
7672
7673 static unsigned int
7674 ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
7675 {
7676 unsigned int align;
7677 if (type)
7678 {
7679 /* Since the main variant type is used for call, we convert it to
7680 the main variant type. */
7681 type = TYPE_MAIN_VARIANT (type);
7682 align = TYPE_ALIGN (type);
7683 }
7684 else
7685 align = GET_MODE_ALIGNMENT (mode);
7686 if (align < PARM_BOUNDARY)
7687 align = PARM_BOUNDARY;
7688 else
7689 {
7690 static bool warned;
7691 unsigned int saved_align = align;
7692
7693 if (!TARGET_64BIT)
7694 {
7695 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7696 if (!type)
7697 {
7698 if (mode == XFmode || mode == XCmode)
7699 align = PARM_BOUNDARY;
7700 }
7701 else if (!ix86_contains_aligned_value_p (type))
7702 align = PARM_BOUNDARY;
7703
7704 if (align < 128)
7705 align = PARM_BOUNDARY;
7706 }
7707
7708 if (warn_psabi
7709 && !warned
7710 && align != ix86_compat_function_arg_boundary (mode, type,
7711 saved_align))
7712 {
7713 warned = true;
7714 inform (input_location,
7715 "The ABI for passing parameters with %d-byte"
7716 " alignment has changed in GCC 4.6",
7717 align / BITS_PER_UNIT);
7718 }
7719 }
7720
7721 return align;
7722 }
7723
7724 /* Return true if N is a possible register number of function value. */
7725
7726 static bool
7727 ix86_function_value_regno_p (const unsigned int regno)
7728 {
7729 switch (regno)
7730 {
7731 case AX_REG:
7732 case DX_REG:
7733 return true;
7734 case DI_REG:
7735 case SI_REG:
7736 return TARGET_64BIT && ix86_abi != MS_ABI;
7737
7738 /* Complex values are returned in %st(0)/%st(1) pair. */
7739 case ST0_REG:
7740 case ST1_REG:
7741 /* TODO: The function should depend on current function ABI but
7742 builtins.c would need updating then. Therefore we use the
7743 default ABI. */
7744 if (TARGET_64BIT && ix86_abi == MS_ABI)
7745 return false;
7746 return TARGET_FLOAT_RETURNS_IN_80387;
7747
7748 /* Complex values are returned in %xmm0/%xmm1 pair. */
7749 case XMM0_REG:
7750 case XMM1_REG:
7751 return TARGET_SSE;
7752
7753 case MM0_REG:
7754 if (TARGET_MACHO || TARGET_64BIT)
7755 return false;
7756 return TARGET_MMX;
7757 }
7758
7759 return false;
7760 }
7761
7762 /* Define how to find the value returned by a function.
7763 VALTYPE is the data type of the value (as a tree).
7764 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7765 otherwise, FUNC is 0. */
7766
7767 static rtx
7768 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
7769 const_tree fntype, const_tree fn)
7770 {
7771 unsigned int regno;
7772
7773 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7774 we normally prevent this case when mmx is not available. However
7775 some ABIs may require the result to be returned like DImode. */
7776 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7777 regno = FIRST_MMX_REG;
7778
7779 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7780 we prevent this case when sse is not available. However some ABIs
7781 may require the result to be returned like integer TImode. */
7782 else if (mode == TImode
7783 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7784 regno = FIRST_SSE_REG;
7785
7786 /* 32-byte vector modes in %ymm0. */
7787 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
7788 regno = FIRST_SSE_REG;
7789
7790 /* 64-byte vector modes in %zmm0. */
7791 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
7792 regno = FIRST_SSE_REG;
7793
7794 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7795 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
7796 regno = FIRST_FLOAT_REG;
7797 else
7798 /* Most things go in %eax. */
7799 regno = AX_REG;
7800
7801 /* Override FP return register with %xmm0 for local functions when
7802 SSE math is enabled or for functions with sseregparm attribute. */
7803 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
7804 {
7805 int sse_level = ix86_function_sseregparm (fntype, fn, false);
7806 if ((sse_level >= 1 && mode == SFmode)
7807 || (sse_level == 2 && mode == DFmode))
7808 regno = FIRST_SSE_REG;
7809 }
7810
7811 /* OImode shouldn't be used directly. */
7812 gcc_assert (mode != OImode);
7813
7814 return gen_rtx_REG (orig_mode, regno);
7815 }
7816
7817 static rtx
7818 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
7819 const_tree valtype)
7820 {
7821 rtx ret;
7822
7823 /* Handle libcalls, which don't provide a type node. */
7824 if (valtype == NULL)
7825 {
7826 unsigned int regno;
7827
7828 switch (mode)
7829 {
7830 case SFmode:
7831 case SCmode:
7832 case DFmode:
7833 case DCmode:
7834 case TFmode:
7835 case SDmode:
7836 case DDmode:
7837 case TDmode:
7838 regno = FIRST_SSE_REG;
7839 break;
7840 case XFmode:
7841 case XCmode:
7842 regno = FIRST_FLOAT_REG;
7843 break;
7844 case TCmode:
7845 return NULL;
7846 default:
7847 regno = AX_REG;
7848 }
7849
7850 return gen_rtx_REG (mode, regno);
7851 }
7852 else if (POINTER_TYPE_P (valtype))
7853 {
7854 /* Pointers are always returned in word_mode. */
7855 mode = word_mode;
7856 }
7857
7858 ret = construct_container (mode, orig_mode, valtype, 1,
7859 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
7860 x86_64_int_return_registers, 0);
7861
7862 /* For zero sized structures, construct_container returns NULL, but we
7863 need to keep rest of compiler happy by returning meaningful value. */
7864 if (!ret)
7865 ret = gen_rtx_REG (orig_mode, AX_REG);
7866
7867 return ret;
7868 }
7869
7870 static rtx
7871 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode,
7872 const_tree valtype)
7873 {
7874 unsigned int regno = AX_REG;
7875
7876 if (TARGET_SSE)
7877 {
7878 switch (GET_MODE_SIZE (mode))
7879 {
7880 case 16:
7881 if (valtype != NULL_TREE
7882 && !VECTOR_INTEGER_TYPE_P (valtype)
7883 && !VECTOR_INTEGER_TYPE_P (valtype)
7884 && !INTEGRAL_TYPE_P (valtype)
7885 && !VECTOR_FLOAT_TYPE_P (valtype))
7886 break;
7887 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7888 && !COMPLEX_MODE_P (mode))
7889 regno = FIRST_SSE_REG;
7890 break;
7891 case 8:
7892 case 4:
7893 if (mode == SFmode || mode == DFmode)
7894 regno = FIRST_SSE_REG;
7895 break;
7896 default:
7897 break;
7898 }
7899 }
7900 return gen_rtx_REG (orig_mode, regno);
7901 }
7902
7903 static rtx
7904 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
7905 enum machine_mode orig_mode, enum machine_mode mode)
7906 {
7907 const_tree fn, fntype;
7908
7909 fn = NULL_TREE;
7910 if (fntype_or_decl && DECL_P (fntype_or_decl))
7911 fn = fntype_or_decl;
7912 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
7913
7914 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
7915 return function_value_ms_64 (orig_mode, mode, valtype);
7916 else if (TARGET_64BIT)
7917 return function_value_64 (orig_mode, mode, valtype);
7918 else
7919 return function_value_32 (orig_mode, mode, fntype, fn);
7920 }
7921
7922 static rtx
7923 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
7924 bool outgoing ATTRIBUTE_UNUSED)
7925 {
7926 enum machine_mode mode, orig_mode;
7927
7928 orig_mode = TYPE_MODE (valtype);
7929 mode = type_natural_mode (valtype, NULL, true);
7930 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
7931 }
7932
7933 /* Pointer function arguments and return values are promoted to
7934 word_mode. */
7935
7936 static enum machine_mode
7937 ix86_promote_function_mode (const_tree type, enum machine_mode mode,
7938 int *punsignedp, const_tree fntype,
7939 int for_return)
7940 {
7941 if (type != NULL_TREE && POINTER_TYPE_P (type))
7942 {
7943 *punsignedp = POINTERS_EXTEND_UNSIGNED;
7944 return word_mode;
7945 }
7946 return default_promote_function_mode (type, mode, punsignedp, fntype,
7947 for_return);
7948 }
7949
7950 /* Return true if a structure, union or array with MODE containing FIELD
7951 should be accessed using BLKmode. */
7952
7953 static bool
7954 ix86_member_type_forces_blk (const_tree field, enum machine_mode mode)
7955 {
7956 /* Union with XFmode must be in BLKmode. */
7957 return (mode == XFmode
7958 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
7959 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
7960 }
7961
7962 rtx
7963 ix86_libcall_value (enum machine_mode mode)
7964 {
7965 return ix86_function_value_1 (NULL, NULL, mode, mode);
7966 }
7967
7968 /* Return true iff type is returned in memory. */
7969
7970 static bool ATTRIBUTE_UNUSED
7971 return_in_memory_32 (const_tree type, enum machine_mode mode)
7972 {
7973 HOST_WIDE_INT size;
7974
7975 if (mode == BLKmode)
7976 return true;
7977
7978 size = int_size_in_bytes (type);
7979
7980 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
7981 return false;
7982
7983 if (VECTOR_MODE_P (mode) || mode == TImode)
7984 {
7985 /* User-created vectors small enough to fit in EAX. */
7986 if (size < 8)
7987 return false;
7988
7989 /* MMX/3dNow values are returned in MM0,
7990 except when it doesn't exits or the ABI prescribes otherwise. */
7991 if (size == 8)
7992 return !TARGET_MMX || TARGET_VECT8_RETURNS;
7993
7994 /* SSE values are returned in XMM0, except when it doesn't exist. */
7995 if (size == 16)
7996 return !TARGET_SSE;
7997
7998 /* AVX values are returned in YMM0, except when it doesn't exist. */
7999 if (size == 32)
8000 return !TARGET_AVX;
8001
8002 /* AVX512F values are returned in ZMM0, except when it doesn't exist. */
8003 if (size == 64)
8004 return !TARGET_AVX512F;
8005 }
8006
8007 if (mode == XFmode)
8008 return false;
8009
8010 if (size > 12)
8011 return true;
8012
8013 /* OImode shouldn't be used directly. */
8014 gcc_assert (mode != OImode);
8015
8016 return false;
8017 }
8018
8019 static bool ATTRIBUTE_UNUSED
8020 return_in_memory_64 (const_tree type, enum machine_mode mode)
8021 {
8022 int needed_intregs, needed_sseregs;
8023 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
8024 }
8025
8026 static bool ATTRIBUTE_UNUSED
8027 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
8028 {
8029 HOST_WIDE_INT size = int_size_in_bytes (type);
8030
8031 /* __m128 is returned in xmm0. */
8032 if ((!type || VECTOR_INTEGER_TYPE_P (type) || INTEGRAL_TYPE_P (type)
8033 || VECTOR_FLOAT_TYPE_P (type))
8034 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8035 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
8036 return false;
8037
8038 /* Otherwise, the size must be exactly in [1248]. */
8039 return size != 1 && size != 2 && size != 4 && size != 8;
8040 }
8041
8042 static bool
8043 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
8044 {
8045 #ifdef SUBTARGET_RETURN_IN_MEMORY
8046 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
8047 #else
8048 const enum machine_mode mode = type_natural_mode (type, NULL, true);
8049
8050 if (TARGET_64BIT)
8051 {
8052 if (ix86_function_type_abi (fntype) == MS_ABI)
8053 return return_in_memory_ms_64 (type, mode);
8054 else
8055 return return_in_memory_64 (type, mode);
8056 }
8057 else
8058 return return_in_memory_32 (type, mode);
8059 #endif
8060 }
8061
8062 \f
8063 /* Create the va_list data type. */
8064
8065 /* Returns the calling convention specific va_list date type.
8066 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
8067
8068 static tree
8069 ix86_build_builtin_va_list_abi (enum calling_abi abi)
8070 {
8071 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
8072
8073 /* For i386 we use plain pointer to argument area. */
8074 if (!TARGET_64BIT || abi == MS_ABI)
8075 return build_pointer_type (char_type_node);
8076
8077 record = lang_hooks.types.make_type (RECORD_TYPE);
8078 type_decl = build_decl (BUILTINS_LOCATION,
8079 TYPE_DECL, get_identifier ("__va_list_tag"), record);
8080
8081 f_gpr = build_decl (BUILTINS_LOCATION,
8082 FIELD_DECL, get_identifier ("gp_offset"),
8083 unsigned_type_node);
8084 f_fpr = build_decl (BUILTINS_LOCATION,
8085 FIELD_DECL, get_identifier ("fp_offset"),
8086 unsigned_type_node);
8087 f_ovf = build_decl (BUILTINS_LOCATION,
8088 FIELD_DECL, get_identifier ("overflow_arg_area"),
8089 ptr_type_node);
8090 f_sav = build_decl (BUILTINS_LOCATION,
8091 FIELD_DECL, get_identifier ("reg_save_area"),
8092 ptr_type_node);
8093
8094 va_list_gpr_counter_field = f_gpr;
8095 va_list_fpr_counter_field = f_fpr;
8096
8097 DECL_FIELD_CONTEXT (f_gpr) = record;
8098 DECL_FIELD_CONTEXT (f_fpr) = record;
8099 DECL_FIELD_CONTEXT (f_ovf) = record;
8100 DECL_FIELD_CONTEXT (f_sav) = record;
8101
8102 TYPE_STUB_DECL (record) = type_decl;
8103 TYPE_NAME (record) = type_decl;
8104 TYPE_FIELDS (record) = f_gpr;
8105 DECL_CHAIN (f_gpr) = f_fpr;
8106 DECL_CHAIN (f_fpr) = f_ovf;
8107 DECL_CHAIN (f_ovf) = f_sav;
8108
8109 layout_type (record);
8110
8111 /* The correct type is an array type of one element. */
8112 return build_array_type (record, build_index_type (size_zero_node));
8113 }
8114
8115 /* Setup the builtin va_list data type and for 64-bit the additional
8116 calling convention specific va_list data types. */
8117
8118 static tree
8119 ix86_build_builtin_va_list (void)
8120 {
8121 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
8122
8123 /* Initialize abi specific va_list builtin types. */
8124 if (TARGET_64BIT)
8125 {
8126 tree t;
8127 if (ix86_abi == MS_ABI)
8128 {
8129 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
8130 if (TREE_CODE (t) != RECORD_TYPE)
8131 t = build_variant_type_copy (t);
8132 sysv_va_list_type_node = t;
8133 }
8134 else
8135 {
8136 t = ret;
8137 if (TREE_CODE (t) != RECORD_TYPE)
8138 t = build_variant_type_copy (t);
8139 sysv_va_list_type_node = t;
8140 }
8141 if (ix86_abi != MS_ABI)
8142 {
8143 t = ix86_build_builtin_va_list_abi (MS_ABI);
8144 if (TREE_CODE (t) != RECORD_TYPE)
8145 t = build_variant_type_copy (t);
8146 ms_va_list_type_node = t;
8147 }
8148 else
8149 {
8150 t = ret;
8151 if (TREE_CODE (t) != RECORD_TYPE)
8152 t = build_variant_type_copy (t);
8153 ms_va_list_type_node = t;
8154 }
8155 }
8156
8157 return ret;
8158 }
8159
8160 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
8161
8162 static void
8163 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
8164 {
8165 rtx save_area, mem;
8166 alias_set_type set;
8167 int i, max;
8168
8169 /* GPR size of varargs save area. */
8170 if (cfun->va_list_gpr_size)
8171 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
8172 else
8173 ix86_varargs_gpr_size = 0;
8174
8175 /* FPR size of varargs save area. We don't need it if we don't pass
8176 anything in SSE registers. */
8177 if (TARGET_SSE && cfun->va_list_fpr_size)
8178 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
8179 else
8180 ix86_varargs_fpr_size = 0;
8181
8182 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
8183 return;
8184
8185 save_area = frame_pointer_rtx;
8186 set = get_varargs_alias_set ();
8187
8188 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
8189 if (max > X86_64_REGPARM_MAX)
8190 max = X86_64_REGPARM_MAX;
8191
8192 for (i = cum->regno; i < max; i++)
8193 {
8194 mem = gen_rtx_MEM (word_mode,
8195 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
8196 MEM_NOTRAP_P (mem) = 1;
8197 set_mem_alias_set (mem, set);
8198 emit_move_insn (mem,
8199 gen_rtx_REG (word_mode,
8200 x86_64_int_parameter_registers[i]));
8201 }
8202
8203 if (ix86_varargs_fpr_size)
8204 {
8205 enum machine_mode smode;
8206 rtx label, test;
8207
8208 /* Now emit code to save SSE registers. The AX parameter contains number
8209 of SSE parameter registers used to call this function, though all we
8210 actually check here is the zero/non-zero status. */
8211
8212 label = gen_label_rtx ();
8213 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
8214 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
8215 label));
8216
8217 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
8218 we used movdqa (i.e. TImode) instead? Perhaps even better would
8219 be if we could determine the real mode of the data, via a hook
8220 into pass_stdarg. Ignore all that for now. */
8221 smode = V4SFmode;
8222 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
8223 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
8224
8225 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
8226 if (max > X86_64_SSE_REGPARM_MAX)
8227 max = X86_64_SSE_REGPARM_MAX;
8228
8229 for (i = cum->sse_regno; i < max; ++i)
8230 {
8231 mem = plus_constant (Pmode, save_area,
8232 i * 16 + ix86_varargs_gpr_size);
8233 mem = gen_rtx_MEM (smode, mem);
8234 MEM_NOTRAP_P (mem) = 1;
8235 set_mem_alias_set (mem, set);
8236 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
8237
8238 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
8239 }
8240
8241 emit_label (label);
8242 }
8243 }
8244
8245 static void
8246 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
8247 {
8248 alias_set_type set = get_varargs_alias_set ();
8249 int i;
8250
8251 /* Reset to zero, as there might be a sysv vaarg used
8252 before. */
8253 ix86_varargs_gpr_size = 0;
8254 ix86_varargs_fpr_size = 0;
8255
8256 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
8257 {
8258 rtx reg, mem;
8259
8260 mem = gen_rtx_MEM (Pmode,
8261 plus_constant (Pmode, virtual_incoming_args_rtx,
8262 i * UNITS_PER_WORD));
8263 MEM_NOTRAP_P (mem) = 1;
8264 set_mem_alias_set (mem, set);
8265
8266 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
8267 emit_move_insn (mem, reg);
8268 }
8269 }
8270
8271 static void
8272 ix86_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
8273 tree type, int *pretend_size ATTRIBUTE_UNUSED,
8274 int no_rtl)
8275 {
8276 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8277 CUMULATIVE_ARGS next_cum;
8278 tree fntype;
8279
8280 /* This argument doesn't appear to be used anymore. Which is good,
8281 because the old code here didn't suppress rtl generation. */
8282 gcc_assert (!no_rtl);
8283
8284 if (!TARGET_64BIT)
8285 return;
8286
8287 fntype = TREE_TYPE (current_function_decl);
8288
8289 /* For varargs, we do not want to skip the dummy va_dcl argument.
8290 For stdargs, we do want to skip the last named argument. */
8291 next_cum = *cum;
8292 if (stdarg_p (fntype))
8293 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
8294 true);
8295
8296 if (cum->call_abi == MS_ABI)
8297 setup_incoming_varargs_ms_64 (&next_cum);
8298 else
8299 setup_incoming_varargs_64 (&next_cum);
8300 }
8301
8302 /* Checks if TYPE is of kind va_list char *. */
8303
8304 static bool
8305 is_va_list_char_pointer (tree type)
8306 {
8307 tree canonic;
8308
8309 /* For 32-bit it is always true. */
8310 if (!TARGET_64BIT)
8311 return true;
8312 canonic = ix86_canonical_va_list_type (type);
8313 return (canonic == ms_va_list_type_node
8314 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
8315 }
8316
8317 /* Implement va_start. */
8318
8319 static void
8320 ix86_va_start (tree valist, rtx nextarg)
8321 {
8322 HOST_WIDE_INT words, n_gpr, n_fpr;
8323 tree f_gpr, f_fpr, f_ovf, f_sav;
8324 tree gpr, fpr, ovf, sav, t;
8325 tree type;
8326 rtx ovf_rtx;
8327
8328 if (flag_split_stack
8329 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8330 {
8331 unsigned int scratch_regno;
8332
8333 /* When we are splitting the stack, we can't refer to the stack
8334 arguments using internal_arg_pointer, because they may be on
8335 the old stack. The split stack prologue will arrange to
8336 leave a pointer to the old stack arguments in a scratch
8337 register, which we here copy to a pseudo-register. The split
8338 stack prologue can't set the pseudo-register directly because
8339 it (the prologue) runs before any registers have been saved. */
8340
8341 scratch_regno = split_stack_prologue_scratch_regno ();
8342 if (scratch_regno != INVALID_REGNUM)
8343 {
8344 rtx reg, seq;
8345
8346 reg = gen_reg_rtx (Pmode);
8347 cfun->machine->split_stack_varargs_pointer = reg;
8348
8349 start_sequence ();
8350 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
8351 seq = get_insns ();
8352 end_sequence ();
8353
8354 push_topmost_sequence ();
8355 emit_insn_after (seq, entry_of_function ());
8356 pop_topmost_sequence ();
8357 }
8358 }
8359
8360 /* Only 64bit target needs something special. */
8361 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8362 {
8363 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8364 std_expand_builtin_va_start (valist, nextarg);
8365 else
8366 {
8367 rtx va_r, next;
8368
8369 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
8370 next = expand_binop (ptr_mode, add_optab,
8371 cfun->machine->split_stack_varargs_pointer,
8372 crtl->args.arg_offset_rtx,
8373 NULL_RTX, 0, OPTAB_LIB_WIDEN);
8374 convert_move (va_r, next, 0);
8375 }
8376 return;
8377 }
8378
8379 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8380 f_fpr = DECL_CHAIN (f_gpr);
8381 f_ovf = DECL_CHAIN (f_fpr);
8382 f_sav = DECL_CHAIN (f_ovf);
8383
8384 valist = build_simple_mem_ref (valist);
8385 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
8386 /* The following should be folded into the MEM_REF offset. */
8387 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
8388 f_gpr, NULL_TREE);
8389 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
8390 f_fpr, NULL_TREE);
8391 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
8392 f_ovf, NULL_TREE);
8393 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
8394 f_sav, NULL_TREE);
8395
8396 /* Count number of gp and fp argument registers used. */
8397 words = crtl->args.info.words;
8398 n_gpr = crtl->args.info.regno;
8399 n_fpr = crtl->args.info.sse_regno;
8400
8401 if (cfun->va_list_gpr_size)
8402 {
8403 type = TREE_TYPE (gpr);
8404 t = build2 (MODIFY_EXPR, type,
8405 gpr, build_int_cst (type, n_gpr * 8));
8406 TREE_SIDE_EFFECTS (t) = 1;
8407 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8408 }
8409
8410 if (TARGET_SSE && cfun->va_list_fpr_size)
8411 {
8412 type = TREE_TYPE (fpr);
8413 t = build2 (MODIFY_EXPR, type, fpr,
8414 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
8415 TREE_SIDE_EFFECTS (t) = 1;
8416 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8417 }
8418
8419 /* Find the overflow area. */
8420 type = TREE_TYPE (ovf);
8421 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8422 ovf_rtx = crtl->args.internal_arg_pointer;
8423 else
8424 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
8425 t = make_tree (type, ovf_rtx);
8426 if (words != 0)
8427 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
8428 t = build2 (MODIFY_EXPR, type, ovf, t);
8429 TREE_SIDE_EFFECTS (t) = 1;
8430 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8431
8432 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
8433 {
8434 /* Find the register save area.
8435 Prologue of the function save it right above stack frame. */
8436 type = TREE_TYPE (sav);
8437 t = make_tree (type, frame_pointer_rtx);
8438 if (!ix86_varargs_gpr_size)
8439 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
8440 t = build2 (MODIFY_EXPR, type, sav, t);
8441 TREE_SIDE_EFFECTS (t) = 1;
8442 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8443 }
8444 }
8445
8446 /* Implement va_arg. */
8447
8448 static tree
8449 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
8450 gimple_seq *post_p)
8451 {
8452 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
8453 tree f_gpr, f_fpr, f_ovf, f_sav;
8454 tree gpr, fpr, ovf, sav, t;
8455 int size, rsize;
8456 tree lab_false, lab_over = NULL_TREE;
8457 tree addr, t2;
8458 rtx container;
8459 int indirect_p = 0;
8460 tree ptrtype;
8461 enum machine_mode nat_mode;
8462 unsigned int arg_boundary;
8463
8464 /* Only 64bit target needs something special. */
8465 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8466 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
8467
8468 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8469 f_fpr = DECL_CHAIN (f_gpr);
8470 f_ovf = DECL_CHAIN (f_fpr);
8471 f_sav = DECL_CHAIN (f_ovf);
8472
8473 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
8474 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
8475 valist = build_va_arg_indirect_ref (valist);
8476 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
8477 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
8478 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
8479
8480 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
8481 if (indirect_p)
8482 type = build_pointer_type (type);
8483 size = int_size_in_bytes (type);
8484 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
8485
8486 nat_mode = type_natural_mode (type, NULL, false);
8487 switch (nat_mode)
8488 {
8489 case V8SFmode:
8490 case V8SImode:
8491 case V32QImode:
8492 case V16HImode:
8493 case V4DFmode:
8494 case V4DImode:
8495 case V16SFmode:
8496 case V16SImode:
8497 case V64QImode:
8498 case V32HImode:
8499 case V8DFmode:
8500 case V8DImode:
8501 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
8502 if (!TARGET_64BIT_MS_ABI)
8503 {
8504 container = NULL;
8505 break;
8506 }
8507
8508 default:
8509 container = construct_container (nat_mode, TYPE_MODE (type),
8510 type, 0, X86_64_REGPARM_MAX,
8511 X86_64_SSE_REGPARM_MAX, intreg,
8512 0);
8513 break;
8514 }
8515
8516 /* Pull the value out of the saved registers. */
8517
8518 addr = create_tmp_var (ptr_type_node, "addr");
8519
8520 if (container)
8521 {
8522 int needed_intregs, needed_sseregs;
8523 bool need_temp;
8524 tree int_addr, sse_addr;
8525
8526 lab_false = create_artificial_label (UNKNOWN_LOCATION);
8527 lab_over = create_artificial_label (UNKNOWN_LOCATION);
8528
8529 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
8530
8531 need_temp = (!REG_P (container)
8532 && ((needed_intregs && TYPE_ALIGN (type) > 64)
8533 || TYPE_ALIGN (type) > 128));
8534
8535 /* In case we are passing structure, verify that it is consecutive block
8536 on the register save area. If not we need to do moves. */
8537 if (!need_temp && !REG_P (container))
8538 {
8539 /* Verify that all registers are strictly consecutive */
8540 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
8541 {
8542 int i;
8543
8544 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8545 {
8546 rtx slot = XVECEXP (container, 0, i);
8547 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
8548 || INTVAL (XEXP (slot, 1)) != i * 16)
8549 need_temp = 1;
8550 }
8551 }
8552 else
8553 {
8554 int i;
8555
8556 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8557 {
8558 rtx slot = XVECEXP (container, 0, i);
8559 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
8560 || INTVAL (XEXP (slot, 1)) != i * 8)
8561 need_temp = 1;
8562 }
8563 }
8564 }
8565 if (!need_temp)
8566 {
8567 int_addr = addr;
8568 sse_addr = addr;
8569 }
8570 else
8571 {
8572 int_addr = create_tmp_var (ptr_type_node, "int_addr");
8573 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
8574 }
8575
8576 /* First ensure that we fit completely in registers. */
8577 if (needed_intregs)
8578 {
8579 t = build_int_cst (TREE_TYPE (gpr),
8580 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
8581 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
8582 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8583 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8584 gimplify_and_add (t, pre_p);
8585 }
8586 if (needed_sseregs)
8587 {
8588 t = build_int_cst (TREE_TYPE (fpr),
8589 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
8590 + X86_64_REGPARM_MAX * 8);
8591 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
8592 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8593 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8594 gimplify_and_add (t, pre_p);
8595 }
8596
8597 /* Compute index to start of area used for integer regs. */
8598 if (needed_intregs)
8599 {
8600 /* int_addr = gpr + sav; */
8601 t = fold_build_pointer_plus (sav, gpr);
8602 gimplify_assign (int_addr, t, pre_p);
8603 }
8604 if (needed_sseregs)
8605 {
8606 /* sse_addr = fpr + sav; */
8607 t = fold_build_pointer_plus (sav, fpr);
8608 gimplify_assign (sse_addr, t, pre_p);
8609 }
8610 if (need_temp)
8611 {
8612 int i, prev_size = 0;
8613 tree temp = create_tmp_var (type, "va_arg_tmp");
8614
8615 /* addr = &temp; */
8616 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
8617 gimplify_assign (addr, t, pre_p);
8618
8619 for (i = 0; i < XVECLEN (container, 0); i++)
8620 {
8621 rtx slot = XVECEXP (container, 0, i);
8622 rtx reg = XEXP (slot, 0);
8623 enum machine_mode mode = GET_MODE (reg);
8624 tree piece_type;
8625 tree addr_type;
8626 tree daddr_type;
8627 tree src_addr, src;
8628 int src_offset;
8629 tree dest_addr, dest;
8630 int cur_size = GET_MODE_SIZE (mode);
8631
8632 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
8633 prev_size = INTVAL (XEXP (slot, 1));
8634 if (prev_size + cur_size > size)
8635 {
8636 cur_size = size - prev_size;
8637 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
8638 if (mode == BLKmode)
8639 mode = QImode;
8640 }
8641 piece_type = lang_hooks.types.type_for_mode (mode, 1);
8642 if (mode == GET_MODE (reg))
8643 addr_type = build_pointer_type (piece_type);
8644 else
8645 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8646 true);
8647 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8648 true);
8649
8650 if (SSE_REGNO_P (REGNO (reg)))
8651 {
8652 src_addr = sse_addr;
8653 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
8654 }
8655 else
8656 {
8657 src_addr = int_addr;
8658 src_offset = REGNO (reg) * 8;
8659 }
8660 src_addr = fold_convert (addr_type, src_addr);
8661 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
8662
8663 dest_addr = fold_convert (daddr_type, addr);
8664 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
8665 if (cur_size == GET_MODE_SIZE (mode))
8666 {
8667 src = build_va_arg_indirect_ref (src_addr);
8668 dest = build_va_arg_indirect_ref (dest_addr);
8669
8670 gimplify_assign (dest, src, pre_p);
8671 }
8672 else
8673 {
8674 tree copy
8675 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
8676 3, dest_addr, src_addr,
8677 size_int (cur_size));
8678 gimplify_and_add (copy, pre_p);
8679 }
8680 prev_size += cur_size;
8681 }
8682 }
8683
8684 if (needed_intregs)
8685 {
8686 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
8687 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
8688 gimplify_assign (gpr, t, pre_p);
8689 }
8690
8691 if (needed_sseregs)
8692 {
8693 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
8694 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
8695 gimplify_assign (fpr, t, pre_p);
8696 }
8697
8698 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
8699
8700 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
8701 }
8702
8703 /* ... otherwise out of the overflow area. */
8704
8705 /* When we align parameter on stack for caller, if the parameter
8706 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
8707 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
8708 here with caller. */
8709 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
8710 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
8711 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
8712
8713 /* Care for on-stack alignment if needed. */
8714 if (arg_boundary <= 64 || size == 0)
8715 t = ovf;
8716 else
8717 {
8718 HOST_WIDE_INT align = arg_boundary / 8;
8719 t = fold_build_pointer_plus_hwi (ovf, align - 1);
8720 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
8721 build_int_cst (TREE_TYPE (t), -align));
8722 }
8723
8724 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
8725 gimplify_assign (addr, t, pre_p);
8726
8727 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
8728 gimplify_assign (unshare_expr (ovf), t, pre_p);
8729
8730 if (container)
8731 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
8732
8733 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
8734 addr = fold_convert (ptrtype, addr);
8735
8736 if (indirect_p)
8737 addr = build_va_arg_indirect_ref (addr);
8738 return build_va_arg_indirect_ref (addr);
8739 }
8740 \f
8741 /* Return true if OPNUM's MEM should be matched
8742 in movabs* patterns. */
8743
8744 bool
8745 ix86_check_movabs (rtx insn, int opnum)
8746 {
8747 rtx set, mem;
8748
8749 set = PATTERN (insn);
8750 if (GET_CODE (set) == PARALLEL)
8751 set = XVECEXP (set, 0, 0);
8752 gcc_assert (GET_CODE (set) == SET);
8753 mem = XEXP (set, opnum);
8754 while (GET_CODE (mem) == SUBREG)
8755 mem = SUBREG_REG (mem);
8756 gcc_assert (MEM_P (mem));
8757 return volatile_ok || !MEM_VOLATILE_P (mem);
8758 }
8759 \f
8760 /* Initialize the table of extra 80387 mathematical constants. */
8761
8762 static void
8763 init_ext_80387_constants (void)
8764 {
8765 static const char * cst[5] =
8766 {
8767 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8768 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8769 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8770 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8771 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8772 };
8773 int i;
8774
8775 for (i = 0; i < 5; i++)
8776 {
8777 real_from_string (&ext_80387_constants_table[i], cst[i]);
8778 /* Ensure each constant is rounded to XFmode precision. */
8779 real_convert (&ext_80387_constants_table[i],
8780 XFmode, &ext_80387_constants_table[i]);
8781 }
8782
8783 ext_80387_constants_init = 1;
8784 }
8785
8786 /* Return non-zero if the constant is something that
8787 can be loaded with a special instruction. */
8788
8789 int
8790 standard_80387_constant_p (rtx x)
8791 {
8792 enum machine_mode mode = GET_MODE (x);
8793
8794 REAL_VALUE_TYPE r;
8795
8796 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
8797 return -1;
8798
8799 if (x == CONST0_RTX (mode))
8800 return 1;
8801 if (x == CONST1_RTX (mode))
8802 return 2;
8803
8804 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8805
8806 /* For XFmode constants, try to find a special 80387 instruction when
8807 optimizing for size or on those CPUs that benefit from them. */
8808 if (mode == XFmode
8809 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
8810 {
8811 int i;
8812
8813 if (! ext_80387_constants_init)
8814 init_ext_80387_constants ();
8815
8816 for (i = 0; i < 5; i++)
8817 if (real_identical (&r, &ext_80387_constants_table[i]))
8818 return i + 3;
8819 }
8820
8821 /* Load of the constant -0.0 or -1.0 will be split as
8822 fldz;fchs or fld1;fchs sequence. */
8823 if (real_isnegzero (&r))
8824 return 8;
8825 if (real_identical (&r, &dconstm1))
8826 return 9;
8827
8828 return 0;
8829 }
8830
8831 /* Return the opcode of the special instruction to be used to load
8832 the constant X. */
8833
8834 const char *
8835 standard_80387_constant_opcode (rtx x)
8836 {
8837 switch (standard_80387_constant_p (x))
8838 {
8839 case 1:
8840 return "fldz";
8841 case 2:
8842 return "fld1";
8843 case 3:
8844 return "fldlg2";
8845 case 4:
8846 return "fldln2";
8847 case 5:
8848 return "fldl2e";
8849 case 6:
8850 return "fldl2t";
8851 case 7:
8852 return "fldpi";
8853 case 8:
8854 case 9:
8855 return "#";
8856 default:
8857 gcc_unreachable ();
8858 }
8859 }
8860
8861 /* Return the CONST_DOUBLE representing the 80387 constant that is
8862 loaded by the specified special instruction. The argument IDX
8863 matches the return value from standard_80387_constant_p. */
8864
8865 rtx
8866 standard_80387_constant_rtx (int idx)
8867 {
8868 int i;
8869
8870 if (! ext_80387_constants_init)
8871 init_ext_80387_constants ();
8872
8873 switch (idx)
8874 {
8875 case 3:
8876 case 4:
8877 case 5:
8878 case 6:
8879 case 7:
8880 i = idx - 3;
8881 break;
8882
8883 default:
8884 gcc_unreachable ();
8885 }
8886
8887 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
8888 XFmode);
8889 }
8890
8891 /* Return 1 if X is all 0s and 2 if x is all 1s
8892 in supported SSE/AVX vector mode. */
8893
8894 int
8895 standard_sse_constant_p (rtx x)
8896 {
8897 enum machine_mode mode = GET_MODE (x);
8898
8899 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
8900 return 1;
8901 if (vector_all_ones_operand (x, mode))
8902 switch (mode)
8903 {
8904 case V16QImode:
8905 case V8HImode:
8906 case V4SImode:
8907 case V2DImode:
8908 if (TARGET_SSE2)
8909 return 2;
8910 case V32QImode:
8911 case V16HImode:
8912 case V8SImode:
8913 case V4DImode:
8914 if (TARGET_AVX2)
8915 return 2;
8916 case V64QImode:
8917 case V32HImode:
8918 case V16SImode:
8919 case V8DImode:
8920 if (TARGET_AVX512F)
8921 return 2;
8922 default:
8923 break;
8924 }
8925
8926 return 0;
8927 }
8928
8929 /* Return the opcode of the special instruction to be used to load
8930 the constant X. */
8931
8932 const char *
8933 standard_sse_constant_opcode (rtx insn, rtx x)
8934 {
8935 switch (standard_sse_constant_p (x))
8936 {
8937 case 1:
8938 switch (get_attr_mode (insn))
8939 {
8940 case MODE_XI:
8941 case MODE_V16SF:
8942 return "vpxord\t%g0, %g0, %g0";
8943 case MODE_V8DF:
8944 return "vpxorq\t%g0, %g0, %g0";
8945 case MODE_TI:
8946 return "%vpxor\t%0, %d0";
8947 case MODE_V2DF:
8948 return "%vxorpd\t%0, %d0";
8949 case MODE_V4SF:
8950 return "%vxorps\t%0, %d0";
8951
8952 case MODE_OI:
8953 return "vpxor\t%x0, %x0, %x0";
8954 case MODE_V4DF:
8955 return "vxorpd\t%x0, %x0, %x0";
8956 case MODE_V8SF:
8957 return "vxorps\t%x0, %x0, %x0";
8958
8959 default:
8960 break;
8961 }
8962
8963 case 2:
8964 if (get_attr_mode (insn) == MODE_XI
8965 || get_attr_mode (insn) == MODE_V8DF
8966 || get_attr_mode (insn) == MODE_V16SF)
8967 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
8968 if (TARGET_AVX)
8969 return "vpcmpeqd\t%0, %0, %0";
8970 else
8971 return "pcmpeqd\t%0, %0";
8972
8973 default:
8974 break;
8975 }
8976 gcc_unreachable ();
8977 }
8978
8979 /* Returns true if OP contains a symbol reference */
8980
8981 bool
8982 symbolic_reference_mentioned_p (rtx op)
8983 {
8984 const char *fmt;
8985 int i;
8986
8987 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
8988 return true;
8989
8990 fmt = GET_RTX_FORMAT (GET_CODE (op));
8991 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
8992 {
8993 if (fmt[i] == 'E')
8994 {
8995 int j;
8996
8997 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
8998 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
8999 return true;
9000 }
9001
9002 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
9003 return true;
9004 }
9005
9006 return false;
9007 }
9008
9009 /* Return true if it is appropriate to emit `ret' instructions in the
9010 body of a function. Do this only if the epilogue is simple, needing a
9011 couple of insns. Prior to reloading, we can't tell how many registers
9012 must be saved, so return false then. Return false if there is no frame
9013 marker to de-allocate. */
9014
9015 bool
9016 ix86_can_use_return_insn_p (void)
9017 {
9018 struct ix86_frame frame;
9019
9020 if (! reload_completed || frame_pointer_needed)
9021 return 0;
9022
9023 /* Don't allow more than 32k pop, since that's all we can do
9024 with one instruction. */
9025 if (crtl->args.pops_args && crtl->args.size >= 32768)
9026 return 0;
9027
9028 ix86_compute_frame_layout (&frame);
9029 return (frame.stack_pointer_offset == UNITS_PER_WORD
9030 && (frame.nregs + frame.nsseregs) == 0);
9031 }
9032 \f
9033 /* Value should be nonzero if functions must have frame pointers.
9034 Zero means the frame pointer need not be set up (and parms may
9035 be accessed via the stack pointer) in functions that seem suitable. */
9036
9037 static bool
9038 ix86_frame_pointer_required (void)
9039 {
9040 /* If we accessed previous frames, then the generated code expects
9041 to be able to access the saved ebp value in our frame. */
9042 if (cfun->machine->accesses_prev_frame)
9043 return true;
9044
9045 /* Several x86 os'es need a frame pointer for other reasons,
9046 usually pertaining to setjmp. */
9047 if (SUBTARGET_FRAME_POINTER_REQUIRED)
9048 return true;
9049
9050 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
9051 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
9052 return true;
9053
9054 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
9055 allocation is 4GB. */
9056 if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
9057 return true;
9058
9059 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
9060 turns off the frame pointer by default. Turn it back on now if
9061 we've not got a leaf function. */
9062 if (TARGET_OMIT_LEAF_FRAME_POINTER
9063 && (!crtl->is_leaf
9064 || ix86_current_function_calls_tls_descriptor))
9065 return true;
9066
9067 if (crtl->profile && !flag_fentry)
9068 return true;
9069
9070 return false;
9071 }
9072
9073 /* Record that the current function accesses previous call frames. */
9074
9075 void
9076 ix86_setup_frame_addresses (void)
9077 {
9078 cfun->machine->accesses_prev_frame = 1;
9079 }
9080 \f
9081 #ifndef USE_HIDDEN_LINKONCE
9082 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
9083 # define USE_HIDDEN_LINKONCE 1
9084 # else
9085 # define USE_HIDDEN_LINKONCE 0
9086 # endif
9087 #endif
9088
9089 static int pic_labels_used;
9090
9091 /* Fills in the label name that should be used for a pc thunk for
9092 the given register. */
9093
9094 static void
9095 get_pc_thunk_name (char name[32], unsigned int regno)
9096 {
9097 gcc_assert (!TARGET_64BIT);
9098
9099 if (USE_HIDDEN_LINKONCE)
9100 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
9101 else
9102 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
9103 }
9104
9105
9106 /* This function generates code for -fpic that loads %ebx with
9107 the return address of the caller and then returns. */
9108
9109 static void
9110 ix86_code_end (void)
9111 {
9112 rtx xops[2];
9113 int regno;
9114
9115 for (regno = AX_REG; regno <= SP_REG; regno++)
9116 {
9117 char name[32];
9118 tree decl;
9119
9120 if (!(pic_labels_used & (1 << regno)))
9121 continue;
9122
9123 get_pc_thunk_name (name, regno);
9124
9125 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
9126 get_identifier (name),
9127 build_function_type_list (void_type_node, NULL_TREE));
9128 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
9129 NULL_TREE, void_type_node);
9130 TREE_PUBLIC (decl) = 1;
9131 TREE_STATIC (decl) = 1;
9132 DECL_IGNORED_P (decl) = 1;
9133
9134 #if TARGET_MACHO
9135 if (TARGET_MACHO)
9136 {
9137 switch_to_section (darwin_sections[text_coal_section]);
9138 fputs ("\t.weak_definition\t", asm_out_file);
9139 assemble_name (asm_out_file, name);
9140 fputs ("\n\t.private_extern\t", asm_out_file);
9141 assemble_name (asm_out_file, name);
9142 putc ('\n', asm_out_file);
9143 ASM_OUTPUT_LABEL (asm_out_file, name);
9144 DECL_WEAK (decl) = 1;
9145 }
9146 else
9147 #endif
9148 if (USE_HIDDEN_LINKONCE)
9149 {
9150 DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
9151
9152 targetm.asm_out.unique_section (decl, 0);
9153 switch_to_section (get_named_section (decl, NULL, 0));
9154
9155 targetm.asm_out.globalize_label (asm_out_file, name);
9156 fputs ("\t.hidden\t", asm_out_file);
9157 assemble_name (asm_out_file, name);
9158 putc ('\n', asm_out_file);
9159 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
9160 }
9161 else
9162 {
9163 switch_to_section (text_section);
9164 ASM_OUTPUT_LABEL (asm_out_file, name);
9165 }
9166
9167 DECL_INITIAL (decl) = make_node (BLOCK);
9168 current_function_decl = decl;
9169 init_function_start (decl);
9170 first_function_block_is_cold = false;
9171 /* Make sure unwind info is emitted for the thunk if needed. */
9172 final_start_function (emit_barrier (), asm_out_file, 1);
9173
9174 /* Pad stack IP move with 4 instructions (two NOPs count
9175 as one instruction). */
9176 if (TARGET_PAD_SHORT_FUNCTION)
9177 {
9178 int i = 8;
9179
9180 while (i--)
9181 fputs ("\tnop\n", asm_out_file);
9182 }
9183
9184 xops[0] = gen_rtx_REG (Pmode, regno);
9185 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
9186 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
9187 fputs ("\tret\n", asm_out_file);
9188 final_end_function ();
9189 init_insn_lengths ();
9190 free_after_compilation (cfun);
9191 set_cfun (NULL);
9192 current_function_decl = NULL;
9193 }
9194
9195 if (flag_split_stack)
9196 file_end_indicate_split_stack ();
9197 }
9198
9199 /* Emit code for the SET_GOT patterns. */
9200
9201 const char *
9202 output_set_got (rtx dest, rtx label)
9203 {
9204 rtx xops[3];
9205
9206 xops[0] = dest;
9207
9208 if (TARGET_VXWORKS_RTP && flag_pic)
9209 {
9210 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
9211 xops[2] = gen_rtx_MEM (Pmode,
9212 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
9213 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
9214
9215 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
9216 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
9217 an unadorned address. */
9218 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
9219 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
9220 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
9221 return "";
9222 }
9223
9224 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
9225
9226 if (!flag_pic)
9227 {
9228 if (TARGET_MACHO)
9229 /* We don't need a pic base, we're not producing pic. */
9230 gcc_unreachable ();
9231
9232 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
9233 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
9234 targetm.asm_out.internal_label (asm_out_file, "L",
9235 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
9236 }
9237 else
9238 {
9239 char name[32];
9240 get_pc_thunk_name (name, REGNO (dest));
9241 pic_labels_used |= 1 << REGNO (dest);
9242
9243 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
9244 xops[2] = gen_rtx_MEM (QImode, xops[2]);
9245 output_asm_insn ("call\t%X2", xops);
9246
9247 #if TARGET_MACHO
9248 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
9249 This is what will be referenced by the Mach-O PIC subsystem. */
9250 if (machopic_should_output_picbase_label () || !label)
9251 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
9252
9253 /* When we are restoring the pic base at the site of a nonlocal label,
9254 and we decided to emit the pic base above, we will still output a
9255 local label used for calculating the correction offset (even though
9256 the offset will be 0 in that case). */
9257 if (label)
9258 targetm.asm_out.internal_label (asm_out_file, "L",
9259 CODE_LABEL_NUMBER (label));
9260 #endif
9261 }
9262
9263 if (!TARGET_MACHO)
9264 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
9265
9266 return "";
9267 }
9268
9269 /* Generate an "push" pattern for input ARG. */
9270
9271 static rtx
9272 gen_push (rtx arg)
9273 {
9274 struct machine_function *m = cfun->machine;
9275
9276 if (m->fs.cfa_reg == stack_pointer_rtx)
9277 m->fs.cfa_offset += UNITS_PER_WORD;
9278 m->fs.sp_offset += UNITS_PER_WORD;
9279
9280 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9281 arg = gen_rtx_REG (word_mode, REGNO (arg));
9282
9283 return gen_rtx_SET (VOIDmode,
9284 gen_rtx_MEM (word_mode,
9285 gen_rtx_PRE_DEC (Pmode,
9286 stack_pointer_rtx)),
9287 arg);
9288 }
9289
9290 /* Generate an "pop" pattern for input ARG. */
9291
9292 static rtx
9293 gen_pop (rtx arg)
9294 {
9295 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9296 arg = gen_rtx_REG (word_mode, REGNO (arg));
9297
9298 return gen_rtx_SET (VOIDmode,
9299 arg,
9300 gen_rtx_MEM (word_mode,
9301 gen_rtx_POST_INC (Pmode,
9302 stack_pointer_rtx)));
9303 }
9304
9305 /* Return >= 0 if there is an unused call-clobbered register available
9306 for the entire function. */
9307
9308 static unsigned int
9309 ix86_select_alt_pic_regnum (void)
9310 {
9311 if (crtl->is_leaf
9312 && !crtl->profile
9313 && !ix86_current_function_calls_tls_descriptor)
9314 {
9315 int i, drap;
9316 /* Can't use the same register for both PIC and DRAP. */
9317 if (crtl->drap_reg)
9318 drap = REGNO (crtl->drap_reg);
9319 else
9320 drap = -1;
9321 for (i = 2; i >= 0; --i)
9322 if (i != drap && !df_regs_ever_live_p (i))
9323 return i;
9324 }
9325
9326 return INVALID_REGNUM;
9327 }
9328
9329 /* Return TRUE if we need to save REGNO. */
9330
9331 static bool
9332 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
9333 {
9334 if (pic_offset_table_rtx
9335 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
9336 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
9337 || crtl->profile
9338 || crtl->calls_eh_return
9339 || crtl->uses_const_pool
9340 || cfun->has_nonlocal_label))
9341 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
9342
9343 if (crtl->calls_eh_return && maybe_eh_return)
9344 {
9345 unsigned i;
9346 for (i = 0; ; i++)
9347 {
9348 unsigned test = EH_RETURN_DATA_REGNO (i);
9349 if (test == INVALID_REGNUM)
9350 break;
9351 if (test == regno)
9352 return true;
9353 }
9354 }
9355
9356 if (crtl->drap_reg
9357 && regno == REGNO (crtl->drap_reg)
9358 && !cfun->machine->no_drap_save_restore)
9359 return true;
9360
9361 return (df_regs_ever_live_p (regno)
9362 && !call_used_regs[regno]
9363 && !fixed_regs[regno]
9364 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
9365 }
9366
9367 /* Return number of saved general prupose registers. */
9368
9369 static int
9370 ix86_nsaved_regs (void)
9371 {
9372 int nregs = 0;
9373 int regno;
9374
9375 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9376 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9377 nregs ++;
9378 return nregs;
9379 }
9380
9381 /* Return number of saved SSE registrers. */
9382
9383 static int
9384 ix86_nsaved_sseregs (void)
9385 {
9386 int nregs = 0;
9387 int regno;
9388
9389 if (!TARGET_64BIT_MS_ABI)
9390 return 0;
9391 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9392 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9393 nregs ++;
9394 return nregs;
9395 }
9396
9397 /* Given FROM and TO register numbers, say whether this elimination is
9398 allowed. If stack alignment is needed, we can only replace argument
9399 pointer with hard frame pointer, or replace frame pointer with stack
9400 pointer. Otherwise, frame pointer elimination is automatically
9401 handled and all other eliminations are valid. */
9402
9403 static bool
9404 ix86_can_eliminate (const int from, const int to)
9405 {
9406 if (stack_realign_fp)
9407 return ((from == ARG_POINTER_REGNUM
9408 && to == HARD_FRAME_POINTER_REGNUM)
9409 || (from == FRAME_POINTER_REGNUM
9410 && to == STACK_POINTER_REGNUM));
9411 else
9412 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
9413 }
9414
9415 /* Return the offset between two registers, one to be eliminated, and the other
9416 its replacement, at the start of a routine. */
9417
9418 HOST_WIDE_INT
9419 ix86_initial_elimination_offset (int from, int to)
9420 {
9421 struct ix86_frame frame;
9422 ix86_compute_frame_layout (&frame);
9423
9424 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9425 return frame.hard_frame_pointer_offset;
9426 else if (from == FRAME_POINTER_REGNUM
9427 && to == HARD_FRAME_POINTER_REGNUM)
9428 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
9429 else
9430 {
9431 gcc_assert (to == STACK_POINTER_REGNUM);
9432
9433 if (from == ARG_POINTER_REGNUM)
9434 return frame.stack_pointer_offset;
9435
9436 gcc_assert (from == FRAME_POINTER_REGNUM);
9437 return frame.stack_pointer_offset - frame.frame_pointer_offset;
9438 }
9439 }
9440
9441 /* In a dynamically-aligned function, we can't know the offset from
9442 stack pointer to frame pointer, so we must ensure that setjmp
9443 eliminates fp against the hard fp (%ebp) rather than trying to
9444 index from %esp up to the top of the frame across a gap that is
9445 of unknown (at compile-time) size. */
9446 static rtx
9447 ix86_builtin_setjmp_frame_value (void)
9448 {
9449 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
9450 }
9451
9452 /* When using -fsplit-stack, the allocation routines set a field in
9453 the TCB to the bottom of the stack plus this much space, measured
9454 in bytes. */
9455
9456 #define SPLIT_STACK_AVAILABLE 256
9457
9458 /* Fill structure ix86_frame about frame of currently computed function. */
9459
9460 static void
9461 ix86_compute_frame_layout (struct ix86_frame *frame)
9462 {
9463 unsigned HOST_WIDE_INT stack_alignment_needed;
9464 HOST_WIDE_INT offset;
9465 unsigned HOST_WIDE_INT preferred_alignment;
9466 HOST_WIDE_INT size = get_frame_size ();
9467 HOST_WIDE_INT to_allocate;
9468
9469 frame->nregs = ix86_nsaved_regs ();
9470 frame->nsseregs = ix86_nsaved_sseregs ();
9471
9472 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
9473 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
9474
9475 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
9476 function prologues and leaf. */
9477 if ((TARGET_64BIT_MS_ABI && preferred_alignment < 16)
9478 && (!crtl->is_leaf || cfun->calls_alloca != 0
9479 || ix86_current_function_calls_tls_descriptor))
9480 {
9481 preferred_alignment = 16;
9482 stack_alignment_needed = 16;
9483 crtl->preferred_stack_boundary = 128;
9484 crtl->stack_alignment_needed = 128;
9485 }
9486
9487 gcc_assert (!size || stack_alignment_needed);
9488 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
9489 gcc_assert (preferred_alignment <= stack_alignment_needed);
9490
9491 /* For SEH we have to limit the amount of code movement into the prologue.
9492 At present we do this via a BLOCKAGE, at which point there's very little
9493 scheduling that can be done, which means that there's very little point
9494 in doing anything except PUSHs. */
9495 if (TARGET_SEH)
9496 cfun->machine->use_fast_prologue_epilogue = false;
9497
9498 /* During reload iteration the amount of registers saved can change.
9499 Recompute the value as needed. Do not recompute when amount of registers
9500 didn't change as reload does multiple calls to the function and does not
9501 expect the decision to change within single iteration. */
9502 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun))
9503 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
9504 {
9505 int count = frame->nregs;
9506 struct cgraph_node *node = cgraph_get_node (current_function_decl);
9507
9508 cfun->machine->use_fast_prologue_epilogue_nregs = count;
9509
9510 /* The fast prologue uses move instead of push to save registers. This
9511 is significantly longer, but also executes faster as modern hardware
9512 can execute the moves in parallel, but can't do that for push/pop.
9513
9514 Be careful about choosing what prologue to emit: When function takes
9515 many instructions to execute we may use slow version as well as in
9516 case function is known to be outside hot spot (this is known with
9517 feedback only). Weight the size of function by number of registers
9518 to save as it is cheap to use one or two push instructions but very
9519 slow to use many of them. */
9520 if (count)
9521 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
9522 if (node->frequency < NODE_FREQUENCY_NORMAL
9523 || (flag_branch_probabilities
9524 && node->frequency < NODE_FREQUENCY_HOT))
9525 cfun->machine->use_fast_prologue_epilogue = false;
9526 else
9527 cfun->machine->use_fast_prologue_epilogue
9528 = !expensive_function_p (count);
9529 }
9530
9531 frame->save_regs_using_mov
9532 = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
9533 /* If static stack checking is enabled and done with probes,
9534 the registers need to be saved before allocating the frame. */
9535 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
9536
9537 /* Skip return address. */
9538 offset = UNITS_PER_WORD;
9539
9540 /* Skip pushed static chain. */
9541 if (ix86_static_chain_on_stack)
9542 offset += UNITS_PER_WORD;
9543
9544 /* Skip saved base pointer. */
9545 if (frame_pointer_needed)
9546 offset += UNITS_PER_WORD;
9547 frame->hfp_save_offset = offset;
9548
9549 /* The traditional frame pointer location is at the top of the frame. */
9550 frame->hard_frame_pointer_offset = offset;
9551
9552 /* Register save area */
9553 offset += frame->nregs * UNITS_PER_WORD;
9554 frame->reg_save_offset = offset;
9555
9556 /* On SEH target, registers are pushed just before the frame pointer
9557 location. */
9558 if (TARGET_SEH)
9559 frame->hard_frame_pointer_offset = offset;
9560
9561 /* Align and set SSE register save area. */
9562 if (frame->nsseregs)
9563 {
9564 /* The only ABI that has saved SSE registers (Win64) also has a
9565 16-byte aligned default stack, and thus we don't need to be
9566 within the re-aligned local stack frame to save them. */
9567 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
9568 offset = (offset + 16 - 1) & -16;
9569 offset += frame->nsseregs * 16;
9570 }
9571 frame->sse_reg_save_offset = offset;
9572
9573 /* The re-aligned stack starts here. Values before this point are not
9574 directly comparable with values below this point. In order to make
9575 sure that no value happens to be the same before and after, force
9576 the alignment computation below to add a non-zero value. */
9577 if (stack_realign_fp)
9578 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
9579
9580 /* Va-arg area */
9581 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
9582 offset += frame->va_arg_size;
9583
9584 /* Align start of frame for local function. */
9585 if (stack_realign_fp
9586 || offset != frame->sse_reg_save_offset
9587 || size != 0
9588 || !crtl->is_leaf
9589 || cfun->calls_alloca
9590 || ix86_current_function_calls_tls_descriptor)
9591 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
9592
9593 /* Frame pointer points here. */
9594 frame->frame_pointer_offset = offset;
9595
9596 offset += size;
9597
9598 /* Add outgoing arguments area. Can be skipped if we eliminated
9599 all the function calls as dead code.
9600 Skipping is however impossible when function calls alloca. Alloca
9601 expander assumes that last crtl->outgoing_args_size
9602 of stack frame are unused. */
9603 if (ACCUMULATE_OUTGOING_ARGS
9604 && (!crtl->is_leaf || cfun->calls_alloca
9605 || ix86_current_function_calls_tls_descriptor))
9606 {
9607 offset += crtl->outgoing_args_size;
9608 frame->outgoing_arguments_size = crtl->outgoing_args_size;
9609 }
9610 else
9611 frame->outgoing_arguments_size = 0;
9612
9613 /* Align stack boundary. Only needed if we're calling another function
9614 or using alloca. */
9615 if (!crtl->is_leaf || cfun->calls_alloca
9616 || ix86_current_function_calls_tls_descriptor)
9617 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
9618
9619 /* We've reached end of stack frame. */
9620 frame->stack_pointer_offset = offset;
9621
9622 /* Size prologue needs to allocate. */
9623 to_allocate = offset - frame->sse_reg_save_offset;
9624
9625 if ((!to_allocate && frame->nregs <= 1)
9626 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
9627 frame->save_regs_using_mov = false;
9628
9629 if (ix86_using_red_zone ()
9630 && crtl->sp_is_unchanging
9631 && crtl->is_leaf
9632 && !ix86_current_function_calls_tls_descriptor)
9633 {
9634 frame->red_zone_size = to_allocate;
9635 if (frame->save_regs_using_mov)
9636 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
9637 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
9638 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
9639 }
9640 else
9641 frame->red_zone_size = 0;
9642 frame->stack_pointer_offset -= frame->red_zone_size;
9643
9644 /* The SEH frame pointer location is near the bottom of the frame.
9645 This is enforced by the fact that the difference between the
9646 stack pointer and the frame pointer is limited to 240 bytes in
9647 the unwind data structure. */
9648 if (TARGET_SEH)
9649 {
9650 HOST_WIDE_INT diff;
9651
9652 /* If we can leave the frame pointer where it is, do so. Also, returns
9653 the establisher frame for __builtin_frame_address (0). */
9654 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
9655 if (diff <= SEH_MAX_FRAME_SIZE
9656 && (diff > 240 || (diff & 15) != 0)
9657 && !crtl->accesses_prior_frames)
9658 {
9659 /* Ideally we'd determine what portion of the local stack frame
9660 (within the constraint of the lowest 240) is most heavily used.
9661 But without that complication, simply bias the frame pointer
9662 by 128 bytes so as to maximize the amount of the local stack
9663 frame that is addressable with 8-bit offsets. */
9664 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
9665 }
9666 }
9667 }
9668
9669 /* This is semi-inlined memory_address_length, but simplified
9670 since we know that we're always dealing with reg+offset, and
9671 to avoid having to create and discard all that rtl. */
9672
9673 static inline int
9674 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
9675 {
9676 int len = 4;
9677
9678 if (offset == 0)
9679 {
9680 /* EBP and R13 cannot be encoded without an offset. */
9681 len = (regno == BP_REG || regno == R13_REG);
9682 }
9683 else if (IN_RANGE (offset, -128, 127))
9684 len = 1;
9685
9686 /* ESP and R12 must be encoded with a SIB byte. */
9687 if (regno == SP_REG || regno == R12_REG)
9688 len++;
9689
9690 return len;
9691 }
9692
9693 /* Return an RTX that points to CFA_OFFSET within the stack frame.
9694 The valid base registers are taken from CFUN->MACHINE->FS. */
9695
9696 static rtx
9697 choose_baseaddr (HOST_WIDE_INT cfa_offset)
9698 {
9699 const struct machine_function *m = cfun->machine;
9700 rtx base_reg = NULL;
9701 HOST_WIDE_INT base_offset = 0;
9702
9703 if (m->use_fast_prologue_epilogue)
9704 {
9705 /* Choose the base register most likely to allow the most scheduling
9706 opportunities. Generally FP is valid throughout the function,
9707 while DRAP must be reloaded within the epilogue. But choose either
9708 over the SP due to increased encoding size. */
9709
9710 if (m->fs.fp_valid)
9711 {
9712 base_reg = hard_frame_pointer_rtx;
9713 base_offset = m->fs.fp_offset - cfa_offset;
9714 }
9715 else if (m->fs.drap_valid)
9716 {
9717 base_reg = crtl->drap_reg;
9718 base_offset = 0 - cfa_offset;
9719 }
9720 else if (m->fs.sp_valid)
9721 {
9722 base_reg = stack_pointer_rtx;
9723 base_offset = m->fs.sp_offset - cfa_offset;
9724 }
9725 }
9726 else
9727 {
9728 HOST_WIDE_INT toffset;
9729 int len = 16, tlen;
9730
9731 /* Choose the base register with the smallest address encoding.
9732 With a tie, choose FP > DRAP > SP. */
9733 if (m->fs.sp_valid)
9734 {
9735 base_reg = stack_pointer_rtx;
9736 base_offset = m->fs.sp_offset - cfa_offset;
9737 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
9738 }
9739 if (m->fs.drap_valid)
9740 {
9741 toffset = 0 - cfa_offset;
9742 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
9743 if (tlen <= len)
9744 {
9745 base_reg = crtl->drap_reg;
9746 base_offset = toffset;
9747 len = tlen;
9748 }
9749 }
9750 if (m->fs.fp_valid)
9751 {
9752 toffset = m->fs.fp_offset - cfa_offset;
9753 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
9754 if (tlen <= len)
9755 {
9756 base_reg = hard_frame_pointer_rtx;
9757 base_offset = toffset;
9758 len = tlen;
9759 }
9760 }
9761 }
9762 gcc_assert (base_reg != NULL);
9763
9764 return plus_constant (Pmode, base_reg, base_offset);
9765 }
9766
9767 /* Emit code to save registers in the prologue. */
9768
9769 static void
9770 ix86_emit_save_regs (void)
9771 {
9772 unsigned int regno;
9773 rtx insn;
9774
9775 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
9776 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9777 {
9778 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
9779 RTX_FRAME_RELATED_P (insn) = 1;
9780 }
9781 }
9782
9783 /* Emit a single register save at CFA - CFA_OFFSET. */
9784
9785 static void
9786 ix86_emit_save_reg_using_mov (enum machine_mode mode, unsigned int regno,
9787 HOST_WIDE_INT cfa_offset)
9788 {
9789 struct machine_function *m = cfun->machine;
9790 rtx reg = gen_rtx_REG (mode, regno);
9791 rtx mem, addr, base, insn;
9792
9793 addr = choose_baseaddr (cfa_offset);
9794 mem = gen_frame_mem (mode, addr);
9795
9796 /* For SSE saves, we need to indicate the 128-bit alignment. */
9797 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
9798
9799 insn = emit_move_insn (mem, reg);
9800 RTX_FRAME_RELATED_P (insn) = 1;
9801
9802 base = addr;
9803 if (GET_CODE (base) == PLUS)
9804 base = XEXP (base, 0);
9805 gcc_checking_assert (REG_P (base));
9806
9807 /* When saving registers into a re-aligned local stack frame, avoid
9808 any tricky guessing by dwarf2out. */
9809 if (m->fs.realigned)
9810 {
9811 gcc_checking_assert (stack_realign_drap);
9812
9813 if (regno == REGNO (crtl->drap_reg))
9814 {
9815 /* A bit of a hack. We force the DRAP register to be saved in
9816 the re-aligned stack frame, which provides us with a copy
9817 of the CFA that will last past the prologue. Install it. */
9818 gcc_checking_assert (cfun->machine->fs.fp_valid);
9819 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
9820 cfun->machine->fs.fp_offset - cfa_offset);
9821 mem = gen_rtx_MEM (mode, addr);
9822 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
9823 }
9824 else
9825 {
9826 /* The frame pointer is a stable reference within the
9827 aligned frame. Use it. */
9828 gcc_checking_assert (cfun->machine->fs.fp_valid);
9829 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
9830 cfun->machine->fs.fp_offset - cfa_offset);
9831 mem = gen_rtx_MEM (mode, addr);
9832 add_reg_note (insn, REG_CFA_EXPRESSION,
9833 gen_rtx_SET (VOIDmode, mem, reg));
9834 }
9835 }
9836
9837 /* The memory may not be relative to the current CFA register,
9838 which means that we may need to generate a new pattern for
9839 use by the unwind info. */
9840 else if (base != m->fs.cfa_reg)
9841 {
9842 addr = plus_constant (Pmode, m->fs.cfa_reg,
9843 m->fs.cfa_offset - cfa_offset);
9844 mem = gen_rtx_MEM (mode, addr);
9845 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
9846 }
9847 }
9848
9849 /* Emit code to save registers using MOV insns.
9850 First register is stored at CFA - CFA_OFFSET. */
9851 static void
9852 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
9853 {
9854 unsigned int regno;
9855
9856 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9857 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9858 {
9859 ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
9860 cfa_offset -= UNITS_PER_WORD;
9861 }
9862 }
9863
9864 /* Emit code to save SSE registers using MOV insns.
9865 First register is stored at CFA - CFA_OFFSET. */
9866 static void
9867 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
9868 {
9869 unsigned int regno;
9870
9871 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9872 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9873 {
9874 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
9875 cfa_offset -= 16;
9876 }
9877 }
9878
9879 static GTY(()) rtx queued_cfa_restores;
9880
9881 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9882 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9883 Don't add the note if the previously saved value will be left untouched
9884 within stack red-zone till return, as unwinders can find the same value
9885 in the register and on the stack. */
9886
9887 static void
9888 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
9889 {
9890 if (!crtl->shrink_wrapped
9891 && cfa_offset <= cfun->machine->fs.red_zone_offset)
9892 return;
9893
9894 if (insn)
9895 {
9896 add_reg_note (insn, REG_CFA_RESTORE, reg);
9897 RTX_FRAME_RELATED_P (insn) = 1;
9898 }
9899 else
9900 queued_cfa_restores
9901 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
9902 }
9903
9904 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9905
9906 static void
9907 ix86_add_queued_cfa_restore_notes (rtx insn)
9908 {
9909 rtx last;
9910 if (!queued_cfa_restores)
9911 return;
9912 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
9913 ;
9914 XEXP (last, 1) = REG_NOTES (insn);
9915 REG_NOTES (insn) = queued_cfa_restores;
9916 queued_cfa_restores = NULL_RTX;
9917 RTX_FRAME_RELATED_P (insn) = 1;
9918 }
9919
9920 /* Expand prologue or epilogue stack adjustment.
9921 The pattern exist to put a dependency on all ebp-based memory accesses.
9922 STYLE should be negative if instructions should be marked as frame related,
9923 zero if %r11 register is live and cannot be freely used and positive
9924 otherwise. */
9925
9926 static void
9927 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
9928 int style, bool set_cfa)
9929 {
9930 struct machine_function *m = cfun->machine;
9931 rtx insn;
9932 bool add_frame_related_expr = false;
9933
9934 if (Pmode == SImode)
9935 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
9936 else if (x86_64_immediate_operand (offset, DImode))
9937 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
9938 else
9939 {
9940 rtx tmp;
9941 /* r11 is used by indirect sibcall return as well, set before the
9942 epilogue and used after the epilogue. */
9943 if (style)
9944 tmp = gen_rtx_REG (DImode, R11_REG);
9945 else
9946 {
9947 gcc_assert (src != hard_frame_pointer_rtx
9948 && dest != hard_frame_pointer_rtx);
9949 tmp = hard_frame_pointer_rtx;
9950 }
9951 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
9952 if (style < 0)
9953 add_frame_related_expr = true;
9954
9955 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
9956 }
9957
9958 insn = emit_insn (insn);
9959 if (style >= 0)
9960 ix86_add_queued_cfa_restore_notes (insn);
9961
9962 if (set_cfa)
9963 {
9964 rtx r;
9965
9966 gcc_assert (m->fs.cfa_reg == src);
9967 m->fs.cfa_offset += INTVAL (offset);
9968 m->fs.cfa_reg = dest;
9969
9970 r = gen_rtx_PLUS (Pmode, src, offset);
9971 r = gen_rtx_SET (VOIDmode, dest, r);
9972 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
9973 RTX_FRAME_RELATED_P (insn) = 1;
9974 }
9975 else if (style < 0)
9976 {
9977 RTX_FRAME_RELATED_P (insn) = 1;
9978 if (add_frame_related_expr)
9979 {
9980 rtx r = gen_rtx_PLUS (Pmode, src, offset);
9981 r = gen_rtx_SET (VOIDmode, dest, r);
9982 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
9983 }
9984 }
9985
9986 if (dest == stack_pointer_rtx)
9987 {
9988 HOST_WIDE_INT ooffset = m->fs.sp_offset;
9989 bool valid = m->fs.sp_valid;
9990
9991 if (src == hard_frame_pointer_rtx)
9992 {
9993 valid = m->fs.fp_valid;
9994 ooffset = m->fs.fp_offset;
9995 }
9996 else if (src == crtl->drap_reg)
9997 {
9998 valid = m->fs.drap_valid;
9999 ooffset = 0;
10000 }
10001 else
10002 {
10003 /* Else there are two possibilities: SP itself, which we set
10004 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
10005 taken care of this by hand along the eh_return path. */
10006 gcc_checking_assert (src == stack_pointer_rtx
10007 || offset == const0_rtx);
10008 }
10009
10010 m->fs.sp_offset = ooffset - INTVAL (offset);
10011 m->fs.sp_valid = valid;
10012 }
10013 }
10014
10015 /* Find an available register to be used as dynamic realign argument
10016 pointer regsiter. Such a register will be written in prologue and
10017 used in begin of body, so it must not be
10018 1. parameter passing register.
10019 2. GOT pointer.
10020 We reuse static-chain register if it is available. Otherwise, we
10021 use DI for i386 and R13 for x86-64. We chose R13 since it has
10022 shorter encoding.
10023
10024 Return: the regno of chosen register. */
10025
10026 static unsigned int
10027 find_drap_reg (void)
10028 {
10029 tree decl = cfun->decl;
10030
10031 if (TARGET_64BIT)
10032 {
10033 /* Use R13 for nested function or function need static chain.
10034 Since function with tail call may use any caller-saved
10035 registers in epilogue, DRAP must not use caller-saved
10036 register in such case. */
10037 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10038 return R13_REG;
10039
10040 return R10_REG;
10041 }
10042 else
10043 {
10044 /* Use DI for nested function or function need static chain.
10045 Since function with tail call may use any caller-saved
10046 registers in epilogue, DRAP must not use caller-saved
10047 register in such case. */
10048 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10049 return DI_REG;
10050
10051 /* Reuse static chain register if it isn't used for parameter
10052 passing. */
10053 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
10054 {
10055 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
10056 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
10057 return CX_REG;
10058 }
10059 return DI_REG;
10060 }
10061 }
10062
10063 /* Return minimum incoming stack alignment. */
10064
10065 static unsigned int
10066 ix86_minimum_incoming_stack_boundary (bool sibcall)
10067 {
10068 unsigned int incoming_stack_boundary;
10069
10070 /* Prefer the one specified at command line. */
10071 if (ix86_user_incoming_stack_boundary)
10072 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
10073 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
10074 if -mstackrealign is used, it isn't used for sibcall check and
10075 estimated stack alignment is 128bit. */
10076 else if (!sibcall
10077 && !TARGET_64BIT
10078 && ix86_force_align_arg_pointer
10079 && crtl->stack_alignment_estimated == 128)
10080 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10081 else
10082 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
10083
10084 /* Incoming stack alignment can be changed on individual functions
10085 via force_align_arg_pointer attribute. We use the smallest
10086 incoming stack boundary. */
10087 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
10088 && lookup_attribute (ix86_force_align_arg_pointer_string,
10089 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
10090 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10091
10092 /* The incoming stack frame has to be aligned at least at
10093 parm_stack_boundary. */
10094 if (incoming_stack_boundary < crtl->parm_stack_boundary)
10095 incoming_stack_boundary = crtl->parm_stack_boundary;
10096
10097 /* Stack at entrance of main is aligned by runtime. We use the
10098 smallest incoming stack boundary. */
10099 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
10100 && DECL_NAME (current_function_decl)
10101 && MAIN_NAME_P (DECL_NAME (current_function_decl))
10102 && DECL_FILE_SCOPE_P (current_function_decl))
10103 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
10104
10105 return incoming_stack_boundary;
10106 }
10107
10108 /* Update incoming stack boundary and estimated stack alignment. */
10109
10110 static void
10111 ix86_update_stack_boundary (void)
10112 {
10113 ix86_incoming_stack_boundary
10114 = ix86_minimum_incoming_stack_boundary (false);
10115
10116 /* x86_64 vararg needs 16byte stack alignment for register save
10117 area. */
10118 if (TARGET_64BIT
10119 && cfun->stdarg
10120 && crtl->stack_alignment_estimated < 128)
10121 crtl->stack_alignment_estimated = 128;
10122 }
10123
10124 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
10125 needed or an rtx for DRAP otherwise. */
10126
10127 static rtx
10128 ix86_get_drap_rtx (void)
10129 {
10130 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
10131 crtl->need_drap = true;
10132
10133 if (stack_realign_drap)
10134 {
10135 /* Assign DRAP to vDRAP and returns vDRAP */
10136 unsigned int regno = find_drap_reg ();
10137 rtx drap_vreg;
10138 rtx arg_ptr;
10139 rtx seq, insn;
10140
10141 arg_ptr = gen_rtx_REG (Pmode, regno);
10142 crtl->drap_reg = arg_ptr;
10143
10144 start_sequence ();
10145 drap_vreg = copy_to_reg (arg_ptr);
10146 seq = get_insns ();
10147 end_sequence ();
10148
10149 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
10150 if (!optimize)
10151 {
10152 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
10153 RTX_FRAME_RELATED_P (insn) = 1;
10154 }
10155 return drap_vreg;
10156 }
10157 else
10158 return NULL;
10159 }
10160
10161 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
10162
10163 static rtx
10164 ix86_internal_arg_pointer (void)
10165 {
10166 return virtual_incoming_args_rtx;
10167 }
10168
10169 struct scratch_reg {
10170 rtx reg;
10171 bool saved;
10172 };
10173
10174 /* Return a short-lived scratch register for use on function entry.
10175 In 32-bit mode, it is valid only after the registers are saved
10176 in the prologue. This register must be released by means of
10177 release_scratch_register_on_entry once it is dead. */
10178
10179 static void
10180 get_scratch_register_on_entry (struct scratch_reg *sr)
10181 {
10182 int regno;
10183
10184 sr->saved = false;
10185
10186 if (TARGET_64BIT)
10187 {
10188 /* We always use R11 in 64-bit mode. */
10189 regno = R11_REG;
10190 }
10191 else
10192 {
10193 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
10194 bool fastcall_p
10195 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10196 bool thiscall_p
10197 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10198 bool static_chain_p = DECL_STATIC_CHAIN (decl);
10199 int regparm = ix86_function_regparm (fntype, decl);
10200 int drap_regno
10201 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
10202
10203 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
10204 for the static chain register. */
10205 if ((regparm < 1 || (fastcall_p && !static_chain_p))
10206 && drap_regno != AX_REG)
10207 regno = AX_REG;
10208 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
10209 for the static chain register. */
10210 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
10211 regno = AX_REG;
10212 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
10213 regno = DX_REG;
10214 /* ecx is the static chain register. */
10215 else if (regparm < 3 && !fastcall_p && !thiscall_p
10216 && !static_chain_p
10217 && drap_regno != CX_REG)
10218 regno = CX_REG;
10219 else if (ix86_save_reg (BX_REG, true))
10220 regno = BX_REG;
10221 /* esi is the static chain register. */
10222 else if (!(regparm == 3 && static_chain_p)
10223 && ix86_save_reg (SI_REG, true))
10224 regno = SI_REG;
10225 else if (ix86_save_reg (DI_REG, true))
10226 regno = DI_REG;
10227 else
10228 {
10229 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
10230 sr->saved = true;
10231 }
10232 }
10233
10234 sr->reg = gen_rtx_REG (Pmode, regno);
10235 if (sr->saved)
10236 {
10237 rtx insn = emit_insn (gen_push (sr->reg));
10238 RTX_FRAME_RELATED_P (insn) = 1;
10239 }
10240 }
10241
10242 /* Release a scratch register obtained from the preceding function. */
10243
10244 static void
10245 release_scratch_register_on_entry (struct scratch_reg *sr)
10246 {
10247 if (sr->saved)
10248 {
10249 struct machine_function *m = cfun->machine;
10250 rtx x, insn = emit_insn (gen_pop (sr->reg));
10251
10252 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
10253 RTX_FRAME_RELATED_P (insn) = 1;
10254 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
10255 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10256 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
10257 m->fs.sp_offset -= UNITS_PER_WORD;
10258 }
10259 }
10260
10261 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
10262
10263 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
10264
10265 static void
10266 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
10267 {
10268 /* We skip the probe for the first interval + a small dope of 4 words and
10269 probe that many bytes past the specified size to maintain a protection
10270 area at the botton of the stack. */
10271 const int dope = 4 * UNITS_PER_WORD;
10272 rtx size_rtx = GEN_INT (size), last;
10273
10274 /* See if we have a constant small number of probes to generate. If so,
10275 that's the easy case. The run-time loop is made up of 11 insns in the
10276 generic case while the compile-time loop is made up of 3+2*(n-1) insns
10277 for n # of intervals. */
10278 if (size <= 5 * PROBE_INTERVAL)
10279 {
10280 HOST_WIDE_INT i, adjust;
10281 bool first_probe = true;
10282
10283 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
10284 values of N from 1 until it exceeds SIZE. If only one probe is
10285 needed, this will not generate any code. Then adjust and probe
10286 to PROBE_INTERVAL + SIZE. */
10287 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10288 {
10289 if (first_probe)
10290 {
10291 adjust = 2 * PROBE_INTERVAL + dope;
10292 first_probe = false;
10293 }
10294 else
10295 adjust = PROBE_INTERVAL;
10296
10297 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10298 plus_constant (Pmode, stack_pointer_rtx,
10299 -adjust)));
10300 emit_stack_probe (stack_pointer_rtx);
10301 }
10302
10303 if (first_probe)
10304 adjust = size + PROBE_INTERVAL + dope;
10305 else
10306 adjust = size + PROBE_INTERVAL - i;
10307
10308 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10309 plus_constant (Pmode, stack_pointer_rtx,
10310 -adjust)));
10311 emit_stack_probe (stack_pointer_rtx);
10312
10313 /* Adjust back to account for the additional first interval. */
10314 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10315 plus_constant (Pmode, stack_pointer_rtx,
10316 PROBE_INTERVAL + dope)));
10317 }
10318
10319 /* Otherwise, do the same as above, but in a loop. Note that we must be
10320 extra careful with variables wrapping around because we might be at
10321 the very top (or the very bottom) of the address space and we have
10322 to be able to handle this case properly; in particular, we use an
10323 equality test for the loop condition. */
10324 else
10325 {
10326 HOST_WIDE_INT rounded_size;
10327 struct scratch_reg sr;
10328
10329 get_scratch_register_on_entry (&sr);
10330
10331
10332 /* Step 1: round SIZE to the previous multiple of the interval. */
10333
10334 rounded_size = size & -PROBE_INTERVAL;
10335
10336
10337 /* Step 2: compute initial and final value of the loop counter. */
10338
10339 /* SP = SP_0 + PROBE_INTERVAL. */
10340 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10341 plus_constant (Pmode, stack_pointer_rtx,
10342 - (PROBE_INTERVAL + dope))));
10343
10344 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
10345 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
10346 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
10347 gen_rtx_PLUS (Pmode, sr.reg,
10348 stack_pointer_rtx)));
10349
10350
10351 /* Step 3: the loop
10352
10353 while (SP != LAST_ADDR)
10354 {
10355 SP = SP + PROBE_INTERVAL
10356 probe at SP
10357 }
10358
10359 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
10360 values of N from 1 until it is equal to ROUNDED_SIZE. */
10361
10362 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
10363
10364
10365 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
10366 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
10367
10368 if (size != rounded_size)
10369 {
10370 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10371 plus_constant (Pmode, stack_pointer_rtx,
10372 rounded_size - size)));
10373 emit_stack_probe (stack_pointer_rtx);
10374 }
10375
10376 /* Adjust back to account for the additional first interval. */
10377 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10378 plus_constant (Pmode, stack_pointer_rtx,
10379 PROBE_INTERVAL + dope)));
10380
10381 release_scratch_register_on_entry (&sr);
10382 }
10383
10384 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
10385
10386 /* Even if the stack pointer isn't the CFA register, we need to correctly
10387 describe the adjustments made to it, in particular differentiate the
10388 frame-related ones from the frame-unrelated ones. */
10389 if (size > 0)
10390 {
10391 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
10392 XVECEXP (expr, 0, 0)
10393 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10394 plus_constant (Pmode, stack_pointer_rtx, -size));
10395 XVECEXP (expr, 0, 1)
10396 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10397 plus_constant (Pmode, stack_pointer_rtx,
10398 PROBE_INTERVAL + dope + size));
10399 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
10400 RTX_FRAME_RELATED_P (last) = 1;
10401
10402 cfun->machine->fs.sp_offset += size;
10403 }
10404
10405 /* Make sure nothing is scheduled before we are done. */
10406 emit_insn (gen_blockage ());
10407 }
10408
10409 /* Adjust the stack pointer up to REG while probing it. */
10410
10411 const char *
10412 output_adjust_stack_and_probe (rtx reg)
10413 {
10414 static int labelno = 0;
10415 char loop_lab[32], end_lab[32];
10416 rtx xops[2];
10417
10418 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10419 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10420
10421 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10422
10423 /* Jump to END_LAB if SP == LAST_ADDR. */
10424 xops[0] = stack_pointer_rtx;
10425 xops[1] = reg;
10426 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10427 fputs ("\tje\t", asm_out_file);
10428 assemble_name_raw (asm_out_file, end_lab);
10429 fputc ('\n', asm_out_file);
10430
10431 /* SP = SP + PROBE_INTERVAL. */
10432 xops[1] = GEN_INT (PROBE_INTERVAL);
10433 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10434
10435 /* Probe at SP. */
10436 xops[1] = const0_rtx;
10437 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
10438
10439 fprintf (asm_out_file, "\tjmp\t");
10440 assemble_name_raw (asm_out_file, loop_lab);
10441 fputc ('\n', asm_out_file);
10442
10443 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10444
10445 return "";
10446 }
10447
10448 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
10449 inclusive. These are offsets from the current stack pointer. */
10450
10451 static void
10452 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
10453 {
10454 /* See if we have a constant small number of probes to generate. If so,
10455 that's the easy case. The run-time loop is made up of 7 insns in the
10456 generic case while the compile-time loop is made up of n insns for n #
10457 of intervals. */
10458 if (size <= 7 * PROBE_INTERVAL)
10459 {
10460 HOST_WIDE_INT i;
10461
10462 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
10463 it exceeds SIZE. If only one probe is needed, this will not
10464 generate any code. Then probe at FIRST + SIZE. */
10465 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10466 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
10467 -(first + i)));
10468
10469 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
10470 -(first + size)));
10471 }
10472
10473 /* Otherwise, do the same as above, but in a loop. Note that we must be
10474 extra careful with variables wrapping around because we might be at
10475 the very top (or the very bottom) of the address space and we have
10476 to be able to handle this case properly; in particular, we use an
10477 equality test for the loop condition. */
10478 else
10479 {
10480 HOST_WIDE_INT rounded_size, last;
10481 struct scratch_reg sr;
10482
10483 get_scratch_register_on_entry (&sr);
10484
10485
10486 /* Step 1: round SIZE to the previous multiple of the interval. */
10487
10488 rounded_size = size & -PROBE_INTERVAL;
10489
10490
10491 /* Step 2: compute initial and final value of the loop counter. */
10492
10493 /* TEST_OFFSET = FIRST. */
10494 emit_move_insn (sr.reg, GEN_INT (-first));
10495
10496 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
10497 last = first + rounded_size;
10498
10499
10500 /* Step 3: the loop
10501
10502 while (TEST_ADDR != LAST_ADDR)
10503 {
10504 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
10505 probe at TEST_ADDR
10506 }
10507
10508 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
10509 until it is equal to ROUNDED_SIZE. */
10510
10511 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
10512
10513
10514 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
10515 that SIZE is equal to ROUNDED_SIZE. */
10516
10517 if (size != rounded_size)
10518 emit_stack_probe (plus_constant (Pmode,
10519 gen_rtx_PLUS (Pmode,
10520 stack_pointer_rtx,
10521 sr.reg),
10522 rounded_size - size));
10523
10524 release_scratch_register_on_entry (&sr);
10525 }
10526
10527 /* Make sure nothing is scheduled before we are done. */
10528 emit_insn (gen_blockage ());
10529 }
10530
10531 /* Probe a range of stack addresses from REG to END, inclusive. These are
10532 offsets from the current stack pointer. */
10533
10534 const char *
10535 output_probe_stack_range (rtx reg, rtx end)
10536 {
10537 static int labelno = 0;
10538 char loop_lab[32], end_lab[32];
10539 rtx xops[3];
10540
10541 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10542 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10543
10544 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10545
10546 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
10547 xops[0] = reg;
10548 xops[1] = end;
10549 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10550 fputs ("\tje\t", asm_out_file);
10551 assemble_name_raw (asm_out_file, end_lab);
10552 fputc ('\n', asm_out_file);
10553
10554 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
10555 xops[1] = GEN_INT (PROBE_INTERVAL);
10556 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10557
10558 /* Probe at TEST_ADDR. */
10559 xops[0] = stack_pointer_rtx;
10560 xops[1] = reg;
10561 xops[2] = const0_rtx;
10562 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
10563
10564 fprintf (asm_out_file, "\tjmp\t");
10565 assemble_name_raw (asm_out_file, loop_lab);
10566 fputc ('\n', asm_out_file);
10567
10568 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10569
10570 return "";
10571 }
10572
10573 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
10574 to be generated in correct form. */
10575 static void
10576 ix86_finalize_stack_realign_flags (void)
10577 {
10578 /* Check if stack realign is really needed after reload, and
10579 stores result in cfun */
10580 unsigned int incoming_stack_boundary
10581 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
10582 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
10583 unsigned int stack_realign = (incoming_stack_boundary
10584 < (crtl->is_leaf
10585 ? crtl->max_used_stack_slot_alignment
10586 : crtl->stack_alignment_needed));
10587
10588 if (crtl->stack_realign_finalized)
10589 {
10590 /* After stack_realign_needed is finalized, we can't no longer
10591 change it. */
10592 gcc_assert (crtl->stack_realign_needed == stack_realign);
10593 return;
10594 }
10595
10596 /* If the only reason for frame_pointer_needed is that we conservatively
10597 assumed stack realignment might be needed, but in the end nothing that
10598 needed the stack alignment had been spilled, clear frame_pointer_needed
10599 and say we don't need stack realignment. */
10600 if (stack_realign
10601 && frame_pointer_needed
10602 && crtl->is_leaf
10603 && flag_omit_frame_pointer
10604 && crtl->sp_is_unchanging
10605 && !ix86_current_function_calls_tls_descriptor
10606 && !crtl->accesses_prior_frames
10607 && !cfun->calls_alloca
10608 && !crtl->calls_eh_return
10609 && !(flag_stack_check && STACK_CHECK_MOVING_SP)
10610 && !ix86_frame_pointer_required ()
10611 && get_frame_size () == 0
10612 && ix86_nsaved_sseregs () == 0
10613 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
10614 {
10615 HARD_REG_SET set_up_by_prologue, prologue_used;
10616 basic_block bb;
10617
10618 CLEAR_HARD_REG_SET (prologue_used);
10619 CLEAR_HARD_REG_SET (set_up_by_prologue);
10620 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
10621 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
10622 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
10623 HARD_FRAME_POINTER_REGNUM);
10624 FOR_EACH_BB_FN (bb, cfun)
10625 {
10626 rtx insn;
10627 FOR_BB_INSNS (bb, insn)
10628 if (NONDEBUG_INSN_P (insn)
10629 && requires_stack_frame_p (insn, prologue_used,
10630 set_up_by_prologue))
10631 {
10632 crtl->stack_realign_needed = stack_realign;
10633 crtl->stack_realign_finalized = true;
10634 return;
10635 }
10636 }
10637
10638 /* If drap has been set, but it actually isn't live at the start
10639 of the function, there is no reason to set it up. */
10640 if (crtl->drap_reg)
10641 {
10642 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
10643 if (! REGNO_REG_SET_P (DF_LR_IN (bb), REGNO (crtl->drap_reg)))
10644 {
10645 crtl->drap_reg = NULL_RTX;
10646 crtl->need_drap = false;
10647 }
10648 }
10649 else
10650 cfun->machine->no_drap_save_restore = true;
10651
10652 frame_pointer_needed = false;
10653 stack_realign = false;
10654 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
10655 crtl->stack_alignment_needed = incoming_stack_boundary;
10656 crtl->stack_alignment_estimated = incoming_stack_boundary;
10657 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
10658 crtl->preferred_stack_boundary = incoming_stack_boundary;
10659 df_finish_pass (true);
10660 df_scan_alloc (NULL);
10661 df_scan_blocks ();
10662 df_compute_regs_ever_live (true);
10663 df_analyze ();
10664 }
10665
10666 crtl->stack_realign_needed = stack_realign;
10667 crtl->stack_realign_finalized = true;
10668 }
10669
10670 /* Expand the prologue into a bunch of separate insns. */
10671
10672 void
10673 ix86_expand_prologue (void)
10674 {
10675 struct machine_function *m = cfun->machine;
10676 rtx insn, t;
10677 bool pic_reg_used;
10678 struct ix86_frame frame;
10679 HOST_WIDE_INT allocate;
10680 bool int_registers_saved;
10681 bool sse_registers_saved;
10682
10683 ix86_finalize_stack_realign_flags ();
10684
10685 /* DRAP should not coexist with stack_realign_fp */
10686 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
10687
10688 memset (&m->fs, 0, sizeof (m->fs));
10689
10690 /* Initialize CFA state for before the prologue. */
10691 m->fs.cfa_reg = stack_pointer_rtx;
10692 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
10693
10694 /* Track SP offset to the CFA. We continue tracking this after we've
10695 swapped the CFA register away from SP. In the case of re-alignment
10696 this is fudged; we're interested to offsets within the local frame. */
10697 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10698 m->fs.sp_valid = true;
10699
10700 ix86_compute_frame_layout (&frame);
10701
10702 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
10703 {
10704 /* We should have already generated an error for any use of
10705 ms_hook on a nested function. */
10706 gcc_checking_assert (!ix86_static_chain_on_stack);
10707
10708 /* Check if profiling is active and we shall use profiling before
10709 prologue variant. If so sorry. */
10710 if (crtl->profile && flag_fentry != 0)
10711 sorry ("ms_hook_prologue attribute isn%'t compatible "
10712 "with -mfentry for 32-bit");
10713
10714 /* In ix86_asm_output_function_label we emitted:
10715 8b ff movl.s %edi,%edi
10716 55 push %ebp
10717 8b ec movl.s %esp,%ebp
10718
10719 This matches the hookable function prologue in Win32 API
10720 functions in Microsoft Windows XP Service Pack 2 and newer.
10721 Wine uses this to enable Windows apps to hook the Win32 API
10722 functions provided by Wine.
10723
10724 What that means is that we've already set up the frame pointer. */
10725
10726 if (frame_pointer_needed
10727 && !(crtl->drap_reg && crtl->stack_realign_needed))
10728 {
10729 rtx push, mov;
10730
10731 /* We've decided to use the frame pointer already set up.
10732 Describe this to the unwinder by pretending that both
10733 push and mov insns happen right here.
10734
10735 Putting the unwind info here at the end of the ms_hook
10736 is done so that we can make absolutely certain we get
10737 the required byte sequence at the start of the function,
10738 rather than relying on an assembler that can produce
10739 the exact encoding required.
10740
10741 However it does mean (in the unpatched case) that we have
10742 a 1 insn window where the asynchronous unwind info is
10743 incorrect. However, if we placed the unwind info at
10744 its correct location we would have incorrect unwind info
10745 in the patched case. Which is probably all moot since
10746 I don't expect Wine generates dwarf2 unwind info for the
10747 system libraries that use this feature. */
10748
10749 insn = emit_insn (gen_blockage ());
10750
10751 push = gen_push (hard_frame_pointer_rtx);
10752 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
10753 stack_pointer_rtx);
10754 RTX_FRAME_RELATED_P (push) = 1;
10755 RTX_FRAME_RELATED_P (mov) = 1;
10756
10757 RTX_FRAME_RELATED_P (insn) = 1;
10758 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10759 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
10760
10761 /* Note that gen_push incremented m->fs.cfa_offset, even
10762 though we didn't emit the push insn here. */
10763 m->fs.cfa_reg = hard_frame_pointer_rtx;
10764 m->fs.fp_offset = m->fs.cfa_offset;
10765 m->fs.fp_valid = true;
10766 }
10767 else
10768 {
10769 /* The frame pointer is not needed so pop %ebp again.
10770 This leaves us with a pristine state. */
10771 emit_insn (gen_pop (hard_frame_pointer_rtx));
10772 }
10773 }
10774
10775 /* The first insn of a function that accepts its static chain on the
10776 stack is to push the register that would be filled in by a direct
10777 call. This insn will be skipped by the trampoline. */
10778 else if (ix86_static_chain_on_stack)
10779 {
10780 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
10781 emit_insn (gen_blockage ());
10782
10783 /* We don't want to interpret this push insn as a register save,
10784 only as a stack adjustment. The real copy of the register as
10785 a save will be done later, if needed. */
10786 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
10787 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
10788 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
10789 RTX_FRAME_RELATED_P (insn) = 1;
10790 }
10791
10792 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
10793 of DRAP is needed and stack realignment is really needed after reload */
10794 if (stack_realign_drap)
10795 {
10796 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10797
10798 /* Only need to push parameter pointer reg if it is caller saved. */
10799 if (!call_used_regs[REGNO (crtl->drap_reg)])
10800 {
10801 /* Push arg pointer reg */
10802 insn = emit_insn (gen_push (crtl->drap_reg));
10803 RTX_FRAME_RELATED_P (insn) = 1;
10804 }
10805
10806 /* Grab the argument pointer. */
10807 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
10808 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10809 RTX_FRAME_RELATED_P (insn) = 1;
10810 m->fs.cfa_reg = crtl->drap_reg;
10811 m->fs.cfa_offset = 0;
10812
10813 /* Align the stack. */
10814 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10815 stack_pointer_rtx,
10816 GEN_INT (-align_bytes)));
10817 RTX_FRAME_RELATED_P (insn) = 1;
10818
10819 /* Replicate the return address on the stack so that return
10820 address can be reached via (argp - 1) slot. This is needed
10821 to implement macro RETURN_ADDR_RTX and intrinsic function
10822 expand_builtin_return_addr etc. */
10823 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
10824 t = gen_frame_mem (word_mode, t);
10825 insn = emit_insn (gen_push (t));
10826 RTX_FRAME_RELATED_P (insn) = 1;
10827
10828 /* For the purposes of frame and register save area addressing,
10829 we've started over with a new frame. */
10830 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10831 m->fs.realigned = true;
10832 }
10833
10834 int_registers_saved = (frame.nregs == 0);
10835 sse_registers_saved = (frame.nsseregs == 0);
10836
10837 if (frame_pointer_needed && !m->fs.fp_valid)
10838 {
10839 /* Note: AT&T enter does NOT have reversed args. Enter is probably
10840 slower on all targets. Also sdb doesn't like it. */
10841 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
10842 RTX_FRAME_RELATED_P (insn) = 1;
10843
10844 /* Push registers now, before setting the frame pointer
10845 on SEH target. */
10846 if (!int_registers_saved
10847 && TARGET_SEH
10848 && !frame.save_regs_using_mov)
10849 {
10850 ix86_emit_save_regs ();
10851 int_registers_saved = true;
10852 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
10853 }
10854
10855 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
10856 {
10857 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
10858 RTX_FRAME_RELATED_P (insn) = 1;
10859
10860 if (m->fs.cfa_reg == stack_pointer_rtx)
10861 m->fs.cfa_reg = hard_frame_pointer_rtx;
10862 m->fs.fp_offset = m->fs.sp_offset;
10863 m->fs.fp_valid = true;
10864 }
10865 }
10866
10867 if (!int_registers_saved)
10868 {
10869 /* If saving registers via PUSH, do so now. */
10870 if (!frame.save_regs_using_mov)
10871 {
10872 ix86_emit_save_regs ();
10873 int_registers_saved = true;
10874 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
10875 }
10876
10877 /* When using red zone we may start register saving before allocating
10878 the stack frame saving one cycle of the prologue. However, avoid
10879 doing this if we have to probe the stack; at least on x86_64 the
10880 stack probe can turn into a call that clobbers a red zone location. */
10881 else if (ix86_using_red_zone ()
10882 && (! TARGET_STACK_PROBE
10883 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
10884 {
10885 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10886 int_registers_saved = true;
10887 }
10888 }
10889
10890 if (stack_realign_fp)
10891 {
10892 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10893 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
10894
10895 /* The computation of the size of the re-aligned stack frame means
10896 that we must allocate the size of the register save area before
10897 performing the actual alignment. Otherwise we cannot guarantee
10898 that there's enough storage above the realignment point. */
10899 if (m->fs.sp_offset != frame.sse_reg_save_offset)
10900 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10901 GEN_INT (m->fs.sp_offset
10902 - frame.sse_reg_save_offset),
10903 -1, false);
10904
10905 /* Align the stack. */
10906 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10907 stack_pointer_rtx,
10908 GEN_INT (-align_bytes)));
10909
10910 /* For the purposes of register save area addressing, the stack
10911 pointer is no longer valid. As for the value of sp_offset,
10912 see ix86_compute_frame_layout, which we need to match in order
10913 to pass verification of stack_pointer_offset at the end. */
10914 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
10915 m->fs.sp_valid = false;
10916 }
10917
10918 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
10919
10920 if (flag_stack_usage_info)
10921 {
10922 /* We start to count from ARG_POINTER. */
10923 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
10924
10925 /* If it was realigned, take into account the fake frame. */
10926 if (stack_realign_drap)
10927 {
10928 if (ix86_static_chain_on_stack)
10929 stack_size += UNITS_PER_WORD;
10930
10931 if (!call_used_regs[REGNO (crtl->drap_reg)])
10932 stack_size += UNITS_PER_WORD;
10933
10934 /* This over-estimates by 1 minimal-stack-alignment-unit but
10935 mitigates that by counting in the new return address slot. */
10936 current_function_dynamic_stack_size
10937 += crtl->stack_alignment_needed / BITS_PER_UNIT;
10938 }
10939
10940 current_function_static_stack_size = stack_size;
10941 }
10942
10943 /* On SEH target with very large frame size, allocate an area to save
10944 SSE registers (as the very large allocation won't be described). */
10945 if (TARGET_SEH
10946 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
10947 && !sse_registers_saved)
10948 {
10949 HOST_WIDE_INT sse_size =
10950 frame.sse_reg_save_offset - frame.reg_save_offset;
10951
10952 gcc_assert (int_registers_saved);
10953
10954 /* No need to do stack checking as the area will be immediately
10955 written. */
10956 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10957 GEN_INT (-sse_size), -1,
10958 m->fs.cfa_reg == stack_pointer_rtx);
10959 allocate -= sse_size;
10960 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
10961 sse_registers_saved = true;
10962 }
10963
10964 /* The stack has already been decremented by the instruction calling us
10965 so probe if the size is non-negative to preserve the protection area. */
10966 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
10967 {
10968 /* We expect the registers to be saved when probes are used. */
10969 gcc_assert (int_registers_saved);
10970
10971 if (STACK_CHECK_MOVING_SP)
10972 {
10973 if (!(crtl->is_leaf && !cfun->calls_alloca
10974 && allocate <= PROBE_INTERVAL))
10975 {
10976 ix86_adjust_stack_and_probe (allocate);
10977 allocate = 0;
10978 }
10979 }
10980 else
10981 {
10982 HOST_WIDE_INT size = allocate;
10983
10984 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
10985 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
10986
10987 if (TARGET_STACK_PROBE)
10988 {
10989 if (crtl->is_leaf && !cfun->calls_alloca)
10990 {
10991 if (size > PROBE_INTERVAL)
10992 ix86_emit_probe_stack_range (0, size);
10993 }
10994 else
10995 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
10996 }
10997 else
10998 {
10999 if (crtl->is_leaf && !cfun->calls_alloca)
11000 {
11001 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
11002 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT,
11003 size - STACK_CHECK_PROTECT);
11004 }
11005 else
11006 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
11007 }
11008 }
11009 }
11010
11011 if (allocate == 0)
11012 ;
11013 else if (!ix86_target_stack_probe ()
11014 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
11015 {
11016 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11017 GEN_INT (-allocate), -1,
11018 m->fs.cfa_reg == stack_pointer_rtx);
11019 }
11020 else
11021 {
11022 rtx eax = gen_rtx_REG (Pmode, AX_REG);
11023 rtx r10 = NULL;
11024 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
11025 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
11026 bool eax_live = ix86_eax_live_at_start_p ();
11027 bool r10_live = false;
11028
11029 if (TARGET_64BIT)
11030 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
11031
11032 if (eax_live)
11033 {
11034 insn = emit_insn (gen_push (eax));
11035 allocate -= UNITS_PER_WORD;
11036 /* Note that SEH directives need to continue tracking the stack
11037 pointer even after the frame pointer has been set up. */
11038 if (sp_is_cfa_reg || TARGET_SEH)
11039 {
11040 if (sp_is_cfa_reg)
11041 m->fs.cfa_offset += UNITS_PER_WORD;
11042 RTX_FRAME_RELATED_P (insn) = 1;
11043 }
11044 }
11045
11046 if (r10_live)
11047 {
11048 r10 = gen_rtx_REG (Pmode, R10_REG);
11049 insn = emit_insn (gen_push (r10));
11050 allocate -= UNITS_PER_WORD;
11051 if (sp_is_cfa_reg || TARGET_SEH)
11052 {
11053 if (sp_is_cfa_reg)
11054 m->fs.cfa_offset += UNITS_PER_WORD;
11055 RTX_FRAME_RELATED_P (insn) = 1;
11056 }
11057 }
11058
11059 emit_move_insn (eax, GEN_INT (allocate));
11060 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
11061
11062 /* Use the fact that AX still contains ALLOCATE. */
11063 adjust_stack_insn = (Pmode == DImode
11064 ? gen_pro_epilogue_adjust_stack_di_sub
11065 : gen_pro_epilogue_adjust_stack_si_sub);
11066
11067 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
11068 stack_pointer_rtx, eax));
11069
11070 if (sp_is_cfa_reg || TARGET_SEH)
11071 {
11072 if (sp_is_cfa_reg)
11073 m->fs.cfa_offset += allocate;
11074 RTX_FRAME_RELATED_P (insn) = 1;
11075 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11076 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11077 plus_constant (Pmode, stack_pointer_rtx,
11078 -allocate)));
11079 }
11080 m->fs.sp_offset += allocate;
11081
11082 /* Use stack_pointer_rtx for relative addressing so that code
11083 works for realigned stack, too. */
11084 if (r10_live && eax_live)
11085 {
11086 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11087 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
11088 gen_frame_mem (word_mode, t));
11089 t = plus_constant (Pmode, t, UNITS_PER_WORD);
11090 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
11091 gen_frame_mem (word_mode, t));
11092 }
11093 else if (eax_live || r10_live)
11094 {
11095 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11096 emit_move_insn (gen_rtx_REG (word_mode,
11097 (eax_live ? AX_REG : R10_REG)),
11098 gen_frame_mem (word_mode, t));
11099 }
11100 }
11101 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
11102
11103 /* If we havn't already set up the frame pointer, do so now. */
11104 if (frame_pointer_needed && !m->fs.fp_valid)
11105 {
11106 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
11107 GEN_INT (frame.stack_pointer_offset
11108 - frame.hard_frame_pointer_offset));
11109 insn = emit_insn (insn);
11110 RTX_FRAME_RELATED_P (insn) = 1;
11111 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
11112
11113 if (m->fs.cfa_reg == stack_pointer_rtx)
11114 m->fs.cfa_reg = hard_frame_pointer_rtx;
11115 m->fs.fp_offset = frame.hard_frame_pointer_offset;
11116 m->fs.fp_valid = true;
11117 }
11118
11119 if (!int_registers_saved)
11120 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11121 if (!sse_registers_saved)
11122 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11123
11124 pic_reg_used = false;
11125 /* We don't use pic-register for pe-coff target. */
11126 if (pic_offset_table_rtx
11127 && !TARGET_PECOFF
11128 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
11129 || crtl->profile))
11130 {
11131 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
11132
11133 if (alt_pic_reg_used != INVALID_REGNUM)
11134 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
11135
11136 pic_reg_used = true;
11137 }
11138
11139 if (pic_reg_used)
11140 {
11141 if (TARGET_64BIT)
11142 {
11143 if (ix86_cmodel == CM_LARGE_PIC)
11144 {
11145 rtx label, tmp_reg;
11146
11147 gcc_assert (Pmode == DImode);
11148 label = gen_label_rtx ();
11149 emit_label (label);
11150 LABEL_PRESERVE_P (label) = 1;
11151 tmp_reg = gen_rtx_REG (Pmode, R11_REG);
11152 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
11153 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
11154 label));
11155 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
11156 insn = emit_insn (ix86_gen_add3 (pic_offset_table_rtx,
11157 pic_offset_table_rtx, tmp_reg));
11158 }
11159 else
11160 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
11161 }
11162 else
11163 {
11164 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
11165 RTX_FRAME_RELATED_P (insn) = 1;
11166 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
11167 }
11168 }
11169
11170 /* In the pic_reg_used case, make sure that the got load isn't deleted
11171 when mcount needs it. Blockage to avoid call movement across mcount
11172 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
11173 note. */
11174 if (crtl->profile && !flag_fentry && pic_reg_used)
11175 emit_insn (gen_prologue_use (pic_offset_table_rtx));
11176
11177 if (crtl->drap_reg && !crtl->stack_realign_needed)
11178 {
11179 /* vDRAP is setup but after reload it turns out stack realign
11180 isn't necessary, here we will emit prologue to setup DRAP
11181 without stack realign adjustment */
11182 t = choose_baseaddr (0);
11183 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
11184 }
11185
11186 /* Prevent instructions from being scheduled into register save push
11187 sequence when access to the redzone area is done through frame pointer.
11188 The offset between the frame pointer and the stack pointer is calculated
11189 relative to the value of the stack pointer at the end of the function
11190 prologue, and moving instructions that access redzone area via frame
11191 pointer inside push sequence violates this assumption. */
11192 if (frame_pointer_needed && frame.red_zone_size)
11193 emit_insn (gen_memory_blockage ());
11194
11195 /* Emit cld instruction if stringops are used in the function. */
11196 if (TARGET_CLD && ix86_current_function_needs_cld)
11197 emit_insn (gen_cld ());
11198
11199 /* SEH requires that the prologue end within 256 bytes of the start of
11200 the function. Prevent instruction schedules that would extend that.
11201 Further, prevent alloca modifications to the stack pointer from being
11202 combined with prologue modifications. */
11203 if (TARGET_SEH)
11204 emit_insn (gen_prologue_use (stack_pointer_rtx));
11205 }
11206
11207 /* Emit code to restore REG using a POP insn. */
11208
11209 static void
11210 ix86_emit_restore_reg_using_pop (rtx reg)
11211 {
11212 struct machine_function *m = cfun->machine;
11213 rtx insn = emit_insn (gen_pop (reg));
11214
11215 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
11216 m->fs.sp_offset -= UNITS_PER_WORD;
11217
11218 if (m->fs.cfa_reg == crtl->drap_reg
11219 && REGNO (reg) == REGNO (crtl->drap_reg))
11220 {
11221 /* Previously we'd represented the CFA as an expression
11222 like *(%ebp - 8). We've just popped that value from
11223 the stack, which means we need to reset the CFA to
11224 the drap register. This will remain until we restore
11225 the stack pointer. */
11226 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11227 RTX_FRAME_RELATED_P (insn) = 1;
11228
11229 /* This means that the DRAP register is valid for addressing too. */
11230 m->fs.drap_valid = true;
11231 return;
11232 }
11233
11234 if (m->fs.cfa_reg == stack_pointer_rtx)
11235 {
11236 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
11237 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
11238 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
11239 RTX_FRAME_RELATED_P (insn) = 1;
11240
11241 m->fs.cfa_offset -= UNITS_PER_WORD;
11242 }
11243
11244 /* When the frame pointer is the CFA, and we pop it, we are
11245 swapping back to the stack pointer as the CFA. This happens
11246 for stack frames that don't allocate other data, so we assume
11247 the stack pointer is now pointing at the return address, i.e.
11248 the function entry state, which makes the offset be 1 word. */
11249 if (reg == hard_frame_pointer_rtx)
11250 {
11251 m->fs.fp_valid = false;
11252 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11253 {
11254 m->fs.cfa_reg = stack_pointer_rtx;
11255 m->fs.cfa_offset -= UNITS_PER_WORD;
11256
11257 add_reg_note (insn, REG_CFA_DEF_CFA,
11258 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11259 GEN_INT (m->fs.cfa_offset)));
11260 RTX_FRAME_RELATED_P (insn) = 1;
11261 }
11262 }
11263 }
11264
11265 /* Emit code to restore saved registers using POP insns. */
11266
11267 static void
11268 ix86_emit_restore_regs_using_pop (void)
11269 {
11270 unsigned int regno;
11271
11272 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11273 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
11274 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
11275 }
11276
11277 /* Emit code and notes for the LEAVE instruction. */
11278
11279 static void
11280 ix86_emit_leave (void)
11281 {
11282 struct machine_function *m = cfun->machine;
11283 rtx insn = emit_insn (ix86_gen_leave ());
11284
11285 ix86_add_queued_cfa_restore_notes (insn);
11286
11287 gcc_assert (m->fs.fp_valid);
11288 m->fs.sp_valid = true;
11289 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
11290 m->fs.fp_valid = false;
11291
11292 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11293 {
11294 m->fs.cfa_reg = stack_pointer_rtx;
11295 m->fs.cfa_offset = m->fs.sp_offset;
11296
11297 add_reg_note (insn, REG_CFA_DEF_CFA,
11298 plus_constant (Pmode, stack_pointer_rtx,
11299 m->fs.sp_offset));
11300 RTX_FRAME_RELATED_P (insn) = 1;
11301 }
11302 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
11303 m->fs.fp_offset);
11304 }
11305
11306 /* Emit code to restore saved registers using MOV insns.
11307 First register is restored from CFA - CFA_OFFSET. */
11308 static void
11309 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
11310 bool maybe_eh_return)
11311 {
11312 struct machine_function *m = cfun->machine;
11313 unsigned int regno;
11314
11315 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11316 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11317 {
11318 rtx reg = gen_rtx_REG (word_mode, regno);
11319 rtx insn, mem;
11320
11321 mem = choose_baseaddr (cfa_offset);
11322 mem = gen_frame_mem (word_mode, mem);
11323 insn = emit_move_insn (reg, mem);
11324
11325 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
11326 {
11327 /* Previously we'd represented the CFA as an expression
11328 like *(%ebp - 8). We've just popped that value from
11329 the stack, which means we need to reset the CFA to
11330 the drap register. This will remain until we restore
11331 the stack pointer. */
11332 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11333 RTX_FRAME_RELATED_P (insn) = 1;
11334
11335 /* This means that the DRAP register is valid for addressing. */
11336 m->fs.drap_valid = true;
11337 }
11338 else
11339 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11340
11341 cfa_offset -= UNITS_PER_WORD;
11342 }
11343 }
11344
11345 /* Emit code to restore saved registers using MOV insns.
11346 First register is restored from CFA - CFA_OFFSET. */
11347 static void
11348 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
11349 bool maybe_eh_return)
11350 {
11351 unsigned int regno;
11352
11353 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11354 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11355 {
11356 rtx reg = gen_rtx_REG (V4SFmode, regno);
11357 rtx mem;
11358
11359 mem = choose_baseaddr (cfa_offset);
11360 mem = gen_rtx_MEM (V4SFmode, mem);
11361 set_mem_align (mem, 128);
11362 emit_move_insn (reg, mem);
11363
11364 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11365
11366 cfa_offset -= 16;
11367 }
11368 }
11369
11370 /* Restore function stack, frame, and registers. */
11371
11372 void
11373 ix86_expand_epilogue (int style)
11374 {
11375 struct machine_function *m = cfun->machine;
11376 struct machine_frame_state frame_state_save = m->fs;
11377 struct ix86_frame frame;
11378 bool restore_regs_via_mov;
11379 bool using_drap;
11380
11381 ix86_finalize_stack_realign_flags ();
11382 ix86_compute_frame_layout (&frame);
11383
11384 m->fs.sp_valid = (!frame_pointer_needed
11385 || (crtl->sp_is_unchanging
11386 && !stack_realign_fp));
11387 gcc_assert (!m->fs.sp_valid
11388 || m->fs.sp_offset == frame.stack_pointer_offset);
11389
11390 /* The FP must be valid if the frame pointer is present. */
11391 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
11392 gcc_assert (!m->fs.fp_valid
11393 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
11394
11395 /* We must have *some* valid pointer to the stack frame. */
11396 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
11397
11398 /* The DRAP is never valid at this point. */
11399 gcc_assert (!m->fs.drap_valid);
11400
11401 /* See the comment about red zone and frame
11402 pointer usage in ix86_expand_prologue. */
11403 if (frame_pointer_needed && frame.red_zone_size)
11404 emit_insn (gen_memory_blockage ());
11405
11406 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
11407 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
11408
11409 /* Determine the CFA offset of the end of the red-zone. */
11410 m->fs.red_zone_offset = 0;
11411 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
11412 {
11413 /* The red-zone begins below the return address. */
11414 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
11415
11416 /* When the register save area is in the aligned portion of
11417 the stack, determine the maximum runtime displacement that
11418 matches up with the aligned frame. */
11419 if (stack_realign_drap)
11420 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
11421 + UNITS_PER_WORD);
11422 }
11423
11424 /* Special care must be taken for the normal return case of a function
11425 using eh_return: the eax and edx registers are marked as saved, but
11426 not restored along this path. Adjust the save location to match. */
11427 if (crtl->calls_eh_return && style != 2)
11428 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
11429
11430 /* EH_RETURN requires the use of moves to function properly. */
11431 if (crtl->calls_eh_return)
11432 restore_regs_via_mov = true;
11433 /* SEH requires the use of pops to identify the epilogue. */
11434 else if (TARGET_SEH)
11435 restore_regs_via_mov = false;
11436 /* If we're only restoring one register and sp is not valid then
11437 using a move instruction to restore the register since it's
11438 less work than reloading sp and popping the register. */
11439 else if (!m->fs.sp_valid && frame.nregs <= 1)
11440 restore_regs_via_mov = true;
11441 else if (TARGET_EPILOGUE_USING_MOVE
11442 && cfun->machine->use_fast_prologue_epilogue
11443 && (frame.nregs > 1
11444 || m->fs.sp_offset != frame.reg_save_offset))
11445 restore_regs_via_mov = true;
11446 else if (frame_pointer_needed
11447 && !frame.nregs
11448 && m->fs.sp_offset != frame.reg_save_offset)
11449 restore_regs_via_mov = true;
11450 else if (frame_pointer_needed
11451 && TARGET_USE_LEAVE
11452 && cfun->machine->use_fast_prologue_epilogue
11453 && frame.nregs == 1)
11454 restore_regs_via_mov = true;
11455 else
11456 restore_regs_via_mov = false;
11457
11458 if (restore_regs_via_mov || frame.nsseregs)
11459 {
11460 /* Ensure that the entire register save area is addressable via
11461 the stack pointer, if we will restore via sp. */
11462 if (TARGET_64BIT
11463 && m->fs.sp_offset > 0x7fffffff
11464 && !(m->fs.fp_valid || m->fs.drap_valid)
11465 && (frame.nsseregs + frame.nregs) != 0)
11466 {
11467 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11468 GEN_INT (m->fs.sp_offset
11469 - frame.sse_reg_save_offset),
11470 style,
11471 m->fs.cfa_reg == stack_pointer_rtx);
11472 }
11473 }
11474
11475 /* If there are any SSE registers to restore, then we have to do it
11476 via moves, since there's obviously no pop for SSE regs. */
11477 if (frame.nsseregs)
11478 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
11479 style == 2);
11480
11481 if (restore_regs_via_mov)
11482 {
11483 rtx t;
11484
11485 if (frame.nregs)
11486 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
11487
11488 /* eh_return epilogues need %ecx added to the stack pointer. */
11489 if (style == 2)
11490 {
11491 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
11492
11493 /* Stack align doesn't work with eh_return. */
11494 gcc_assert (!stack_realign_drap);
11495 /* Neither does regparm nested functions. */
11496 gcc_assert (!ix86_static_chain_on_stack);
11497
11498 if (frame_pointer_needed)
11499 {
11500 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
11501 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
11502 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
11503
11504 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
11505 insn = emit_move_insn (hard_frame_pointer_rtx, t);
11506
11507 /* Note that we use SA as a temporary CFA, as the return
11508 address is at the proper place relative to it. We
11509 pretend this happens at the FP restore insn because
11510 prior to this insn the FP would be stored at the wrong
11511 offset relative to SA, and after this insn we have no
11512 other reasonable register to use for the CFA. We don't
11513 bother resetting the CFA to the SP for the duration of
11514 the return insn. */
11515 add_reg_note (insn, REG_CFA_DEF_CFA,
11516 plus_constant (Pmode, sa, UNITS_PER_WORD));
11517 ix86_add_queued_cfa_restore_notes (insn);
11518 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
11519 RTX_FRAME_RELATED_P (insn) = 1;
11520
11521 m->fs.cfa_reg = sa;
11522 m->fs.cfa_offset = UNITS_PER_WORD;
11523 m->fs.fp_valid = false;
11524
11525 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
11526 const0_rtx, style, false);
11527 }
11528 else
11529 {
11530 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
11531 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
11532 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
11533 ix86_add_queued_cfa_restore_notes (insn);
11534
11535 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
11536 if (m->fs.cfa_offset != UNITS_PER_WORD)
11537 {
11538 m->fs.cfa_offset = UNITS_PER_WORD;
11539 add_reg_note (insn, REG_CFA_DEF_CFA,
11540 plus_constant (Pmode, stack_pointer_rtx,
11541 UNITS_PER_WORD));
11542 RTX_FRAME_RELATED_P (insn) = 1;
11543 }
11544 }
11545 m->fs.sp_offset = UNITS_PER_WORD;
11546 m->fs.sp_valid = true;
11547 }
11548 }
11549 else
11550 {
11551 /* SEH requires that the function end with (1) a stack adjustment
11552 if necessary, (2) a sequence of pops, and (3) a return or
11553 jump instruction. Prevent insns from the function body from
11554 being scheduled into this sequence. */
11555 if (TARGET_SEH)
11556 {
11557 /* Prevent a catch region from being adjacent to the standard
11558 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
11559 several other flags that would be interesting to test are
11560 not yet set up. */
11561 if (flag_non_call_exceptions)
11562 emit_insn (gen_nops (const1_rtx));
11563 else
11564 emit_insn (gen_blockage ());
11565 }
11566
11567 /* First step is to deallocate the stack frame so that we can
11568 pop the registers. Also do it on SEH target for very large
11569 frame as the emitted instructions aren't allowed by the ABI in
11570 epilogues. */
11571 if (!m->fs.sp_valid
11572 || (TARGET_SEH
11573 && (m->fs.sp_offset - frame.reg_save_offset
11574 >= SEH_MAX_FRAME_SIZE)))
11575 {
11576 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
11577 GEN_INT (m->fs.fp_offset
11578 - frame.reg_save_offset),
11579 style, false);
11580 }
11581 else if (m->fs.sp_offset != frame.reg_save_offset)
11582 {
11583 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11584 GEN_INT (m->fs.sp_offset
11585 - frame.reg_save_offset),
11586 style,
11587 m->fs.cfa_reg == stack_pointer_rtx);
11588 }
11589
11590 ix86_emit_restore_regs_using_pop ();
11591 }
11592
11593 /* If we used a stack pointer and haven't already got rid of it,
11594 then do so now. */
11595 if (m->fs.fp_valid)
11596 {
11597 /* If the stack pointer is valid and pointing at the frame
11598 pointer store address, then we only need a pop. */
11599 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
11600 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
11601 /* Leave results in shorter dependency chains on CPUs that are
11602 able to grok it fast. */
11603 else if (TARGET_USE_LEAVE
11604 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
11605 || !cfun->machine->use_fast_prologue_epilogue)
11606 ix86_emit_leave ();
11607 else
11608 {
11609 pro_epilogue_adjust_stack (stack_pointer_rtx,
11610 hard_frame_pointer_rtx,
11611 const0_rtx, style, !using_drap);
11612 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
11613 }
11614 }
11615
11616 if (using_drap)
11617 {
11618 int param_ptr_offset = UNITS_PER_WORD;
11619 rtx insn;
11620
11621 gcc_assert (stack_realign_drap);
11622
11623 if (ix86_static_chain_on_stack)
11624 param_ptr_offset += UNITS_PER_WORD;
11625 if (!call_used_regs[REGNO (crtl->drap_reg)])
11626 param_ptr_offset += UNITS_PER_WORD;
11627
11628 insn = emit_insn (gen_rtx_SET
11629 (VOIDmode, stack_pointer_rtx,
11630 gen_rtx_PLUS (Pmode,
11631 crtl->drap_reg,
11632 GEN_INT (-param_ptr_offset))));
11633 m->fs.cfa_reg = stack_pointer_rtx;
11634 m->fs.cfa_offset = param_ptr_offset;
11635 m->fs.sp_offset = param_ptr_offset;
11636 m->fs.realigned = false;
11637
11638 add_reg_note (insn, REG_CFA_DEF_CFA,
11639 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11640 GEN_INT (param_ptr_offset)));
11641 RTX_FRAME_RELATED_P (insn) = 1;
11642
11643 if (!call_used_regs[REGNO (crtl->drap_reg)])
11644 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
11645 }
11646
11647 /* At this point the stack pointer must be valid, and we must have
11648 restored all of the registers. We may not have deallocated the
11649 entire stack frame. We've delayed this until now because it may
11650 be possible to merge the local stack deallocation with the
11651 deallocation forced by ix86_static_chain_on_stack. */
11652 gcc_assert (m->fs.sp_valid);
11653 gcc_assert (!m->fs.fp_valid);
11654 gcc_assert (!m->fs.realigned);
11655 if (m->fs.sp_offset != UNITS_PER_WORD)
11656 {
11657 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11658 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
11659 style, true);
11660 }
11661 else
11662 ix86_add_queued_cfa_restore_notes (get_last_insn ());
11663
11664 /* Sibcall epilogues don't want a return instruction. */
11665 if (style == 0)
11666 {
11667 m->fs = frame_state_save;
11668 return;
11669 }
11670
11671 if (crtl->args.pops_args && crtl->args.size)
11672 {
11673 rtx popc = GEN_INT (crtl->args.pops_args);
11674
11675 /* i386 can only pop 64K bytes. If asked to pop more, pop return
11676 address, do explicit add, and jump indirectly to the caller. */
11677
11678 if (crtl->args.pops_args >= 65536)
11679 {
11680 rtx ecx = gen_rtx_REG (SImode, CX_REG);
11681 rtx insn;
11682
11683 /* There is no "pascal" calling convention in any 64bit ABI. */
11684 gcc_assert (!TARGET_64BIT);
11685
11686 insn = emit_insn (gen_pop (ecx));
11687 m->fs.cfa_offset -= UNITS_PER_WORD;
11688 m->fs.sp_offset -= UNITS_PER_WORD;
11689
11690 add_reg_note (insn, REG_CFA_ADJUST_CFA,
11691 copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
11692 add_reg_note (insn, REG_CFA_REGISTER,
11693 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
11694 RTX_FRAME_RELATED_P (insn) = 1;
11695
11696 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11697 popc, -1, true);
11698 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
11699 }
11700 else
11701 emit_jump_insn (gen_simple_return_pop_internal (popc));
11702 }
11703 else
11704 emit_jump_insn (gen_simple_return_internal ());
11705
11706 /* Restore the state back to the state from the prologue,
11707 so that it's correct for the next epilogue. */
11708 m->fs = frame_state_save;
11709 }
11710
11711 /* Reset from the function's potential modifications. */
11712
11713 static void
11714 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
11715 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
11716 {
11717 if (pic_offset_table_rtx)
11718 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
11719 #if TARGET_MACHO
11720 /* Mach-O doesn't support labels at the end of objects, so if
11721 it looks like we might want one, insert a NOP. */
11722 {
11723 rtx insn = get_last_insn ();
11724 rtx deleted_debug_label = NULL_RTX;
11725 while (insn
11726 && NOTE_P (insn)
11727 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
11728 {
11729 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
11730 notes only, instead set their CODE_LABEL_NUMBER to -1,
11731 otherwise there would be code generation differences
11732 in between -g and -g0. */
11733 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
11734 deleted_debug_label = insn;
11735 insn = PREV_INSN (insn);
11736 }
11737 if (insn
11738 && (LABEL_P (insn)
11739 || (NOTE_P (insn)
11740 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
11741 fputs ("\tnop\n", file);
11742 else if (deleted_debug_label)
11743 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
11744 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
11745 CODE_LABEL_NUMBER (insn) = -1;
11746 }
11747 #endif
11748
11749 }
11750
11751 /* Return a scratch register to use in the split stack prologue. The
11752 split stack prologue is used for -fsplit-stack. It is the first
11753 instructions in the function, even before the regular prologue.
11754 The scratch register can be any caller-saved register which is not
11755 used for parameters or for the static chain. */
11756
11757 static unsigned int
11758 split_stack_prologue_scratch_regno (void)
11759 {
11760 if (TARGET_64BIT)
11761 return R11_REG;
11762 else
11763 {
11764 bool is_fastcall, is_thiscall;
11765 int regparm;
11766
11767 is_fastcall = (lookup_attribute ("fastcall",
11768 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
11769 != NULL);
11770 is_thiscall = (lookup_attribute ("thiscall",
11771 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
11772 != NULL);
11773 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
11774
11775 if (is_fastcall)
11776 {
11777 if (DECL_STATIC_CHAIN (cfun->decl))
11778 {
11779 sorry ("-fsplit-stack does not support fastcall with "
11780 "nested function");
11781 return INVALID_REGNUM;
11782 }
11783 return AX_REG;
11784 }
11785 else if (is_thiscall)
11786 {
11787 if (!DECL_STATIC_CHAIN (cfun->decl))
11788 return DX_REG;
11789 return AX_REG;
11790 }
11791 else if (regparm < 3)
11792 {
11793 if (!DECL_STATIC_CHAIN (cfun->decl))
11794 return CX_REG;
11795 else
11796 {
11797 if (regparm >= 2)
11798 {
11799 sorry ("-fsplit-stack does not support 2 register "
11800 " parameters for a nested function");
11801 return INVALID_REGNUM;
11802 }
11803 return DX_REG;
11804 }
11805 }
11806 else
11807 {
11808 /* FIXME: We could make this work by pushing a register
11809 around the addition and comparison. */
11810 sorry ("-fsplit-stack does not support 3 register parameters");
11811 return INVALID_REGNUM;
11812 }
11813 }
11814 }
11815
11816 /* A SYMBOL_REF for the function which allocates new stackspace for
11817 -fsplit-stack. */
11818
11819 static GTY(()) rtx split_stack_fn;
11820
11821 /* A SYMBOL_REF for the more stack function when using the large
11822 model. */
11823
11824 static GTY(()) rtx split_stack_fn_large;
11825
11826 /* Handle -fsplit-stack. These are the first instructions in the
11827 function, even before the regular prologue. */
11828
11829 void
11830 ix86_expand_split_stack_prologue (void)
11831 {
11832 struct ix86_frame frame;
11833 HOST_WIDE_INT allocate;
11834 unsigned HOST_WIDE_INT args_size;
11835 rtx label, limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
11836 rtx scratch_reg = NULL_RTX;
11837 rtx varargs_label = NULL_RTX;
11838 rtx fn;
11839
11840 gcc_assert (flag_split_stack && reload_completed);
11841
11842 ix86_finalize_stack_realign_flags ();
11843 ix86_compute_frame_layout (&frame);
11844 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
11845
11846 /* This is the label we will branch to if we have enough stack
11847 space. We expect the basic block reordering pass to reverse this
11848 branch if optimizing, so that we branch in the unlikely case. */
11849 label = gen_label_rtx ();
11850
11851 /* We need to compare the stack pointer minus the frame size with
11852 the stack boundary in the TCB. The stack boundary always gives
11853 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
11854 can compare directly. Otherwise we need to do an addition. */
11855
11856 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
11857 UNSPEC_STACK_CHECK);
11858 limit = gen_rtx_CONST (Pmode, limit);
11859 limit = gen_rtx_MEM (Pmode, limit);
11860 if (allocate < SPLIT_STACK_AVAILABLE)
11861 current = stack_pointer_rtx;
11862 else
11863 {
11864 unsigned int scratch_regno;
11865 rtx offset;
11866
11867 /* We need a scratch register to hold the stack pointer minus
11868 the required frame size. Since this is the very start of the
11869 function, the scratch register can be any caller-saved
11870 register which is not used for parameters. */
11871 offset = GEN_INT (- allocate);
11872 scratch_regno = split_stack_prologue_scratch_regno ();
11873 if (scratch_regno == INVALID_REGNUM)
11874 return;
11875 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11876 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
11877 {
11878 /* We don't use ix86_gen_add3 in this case because it will
11879 want to split to lea, but when not optimizing the insn
11880 will not be split after this point. */
11881 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11882 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11883 offset)));
11884 }
11885 else
11886 {
11887 emit_move_insn (scratch_reg, offset);
11888 emit_insn (ix86_gen_add3 (scratch_reg, scratch_reg,
11889 stack_pointer_rtx));
11890 }
11891 current = scratch_reg;
11892 }
11893
11894 ix86_expand_branch (GEU, current, limit, label);
11895 jump_insn = get_last_insn ();
11896 JUMP_LABEL (jump_insn) = label;
11897
11898 /* Mark the jump as very likely to be taken. */
11899 add_int_reg_note (jump_insn, REG_BR_PROB,
11900 REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100);
11901
11902 if (split_stack_fn == NULL_RTX)
11903 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
11904 fn = split_stack_fn;
11905
11906 /* Get more stack space. We pass in the desired stack space and the
11907 size of the arguments to copy to the new stack. In 32-bit mode
11908 we push the parameters; __morestack will return on a new stack
11909 anyhow. In 64-bit mode we pass the parameters in r10 and
11910 r11. */
11911 allocate_rtx = GEN_INT (allocate);
11912 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
11913 call_fusage = NULL_RTX;
11914 if (TARGET_64BIT)
11915 {
11916 rtx reg10, reg11;
11917
11918 reg10 = gen_rtx_REG (Pmode, R10_REG);
11919 reg11 = gen_rtx_REG (Pmode, R11_REG);
11920
11921 /* If this function uses a static chain, it will be in %r10.
11922 Preserve it across the call to __morestack. */
11923 if (DECL_STATIC_CHAIN (cfun->decl))
11924 {
11925 rtx rax;
11926
11927 rax = gen_rtx_REG (word_mode, AX_REG);
11928 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
11929 use_reg (&call_fusage, rax);
11930 }
11931
11932 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
11933 && !TARGET_PECOFF)
11934 {
11935 HOST_WIDE_INT argval;
11936
11937 gcc_assert (Pmode == DImode);
11938 /* When using the large model we need to load the address
11939 into a register, and we've run out of registers. So we
11940 switch to a different calling convention, and we call a
11941 different function: __morestack_large. We pass the
11942 argument size in the upper 32 bits of r10 and pass the
11943 frame size in the lower 32 bits. */
11944 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
11945 gcc_assert ((args_size & 0xffffffff) == args_size);
11946
11947 if (split_stack_fn_large == NULL_RTX)
11948 split_stack_fn_large =
11949 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
11950
11951 if (ix86_cmodel == CM_LARGE_PIC)
11952 {
11953 rtx label, x;
11954
11955 label = gen_label_rtx ();
11956 emit_label (label);
11957 LABEL_PRESERVE_P (label) = 1;
11958 emit_insn (gen_set_rip_rex64 (reg10, label));
11959 emit_insn (gen_set_got_offset_rex64 (reg11, label));
11960 emit_insn (ix86_gen_add3 (reg10, reg10, reg11));
11961 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
11962 UNSPEC_GOT);
11963 x = gen_rtx_CONST (Pmode, x);
11964 emit_move_insn (reg11, x);
11965 x = gen_rtx_PLUS (Pmode, reg10, reg11);
11966 x = gen_const_mem (Pmode, x);
11967 emit_move_insn (reg11, x);
11968 }
11969 else
11970 emit_move_insn (reg11, split_stack_fn_large);
11971
11972 fn = reg11;
11973
11974 argval = ((args_size << 16) << 16) + allocate;
11975 emit_move_insn (reg10, GEN_INT (argval));
11976 }
11977 else
11978 {
11979 emit_move_insn (reg10, allocate_rtx);
11980 emit_move_insn (reg11, GEN_INT (args_size));
11981 use_reg (&call_fusage, reg11);
11982 }
11983
11984 use_reg (&call_fusage, reg10);
11985 }
11986 else
11987 {
11988 emit_insn (gen_push (GEN_INT (args_size)));
11989 emit_insn (gen_push (allocate_rtx));
11990 }
11991 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
11992 GEN_INT (UNITS_PER_WORD), constm1_rtx,
11993 NULL_RTX, false);
11994 add_function_usage_to (call_insn, call_fusage);
11995
11996 /* In order to make call/return prediction work right, we now need
11997 to execute a return instruction. See
11998 libgcc/config/i386/morestack.S for the details on how this works.
11999
12000 For flow purposes gcc must not see this as a return
12001 instruction--we need control flow to continue at the subsequent
12002 label. Therefore, we use an unspec. */
12003 gcc_assert (crtl->args.pops_args < 65536);
12004 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
12005
12006 /* If we are in 64-bit mode and this function uses a static chain,
12007 we saved %r10 in %rax before calling _morestack. */
12008 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
12009 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
12010 gen_rtx_REG (word_mode, AX_REG));
12011
12012 /* If this function calls va_start, we need to store a pointer to
12013 the arguments on the old stack, because they may not have been
12014 all copied to the new stack. At this point the old stack can be
12015 found at the frame pointer value used by __morestack, because
12016 __morestack has set that up before calling back to us. Here we
12017 store that pointer in a scratch register, and in
12018 ix86_expand_prologue we store the scratch register in a stack
12019 slot. */
12020 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12021 {
12022 unsigned int scratch_regno;
12023 rtx frame_reg;
12024 int words;
12025
12026 scratch_regno = split_stack_prologue_scratch_regno ();
12027 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12028 frame_reg = gen_rtx_REG (Pmode, BP_REG);
12029
12030 /* 64-bit:
12031 fp -> old fp value
12032 return address within this function
12033 return address of caller of this function
12034 stack arguments
12035 So we add three words to get to the stack arguments.
12036
12037 32-bit:
12038 fp -> old fp value
12039 return address within this function
12040 first argument to __morestack
12041 second argument to __morestack
12042 return address of caller of this function
12043 stack arguments
12044 So we add five words to get to the stack arguments.
12045 */
12046 words = TARGET_64BIT ? 3 : 5;
12047 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12048 gen_rtx_PLUS (Pmode, frame_reg,
12049 GEN_INT (words * UNITS_PER_WORD))));
12050
12051 varargs_label = gen_label_rtx ();
12052 emit_jump_insn (gen_jump (varargs_label));
12053 JUMP_LABEL (get_last_insn ()) = varargs_label;
12054
12055 emit_barrier ();
12056 }
12057
12058 emit_label (label);
12059 LABEL_NUSES (label) = 1;
12060
12061 /* If this function calls va_start, we now have to set the scratch
12062 register for the case where we do not call __morestack. In this
12063 case we need to set it based on the stack pointer. */
12064 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12065 {
12066 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12067 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12068 GEN_INT (UNITS_PER_WORD))));
12069
12070 emit_label (varargs_label);
12071 LABEL_NUSES (varargs_label) = 1;
12072 }
12073 }
12074
12075 /* We may have to tell the dataflow pass that the split stack prologue
12076 is initializing a scratch register. */
12077
12078 static void
12079 ix86_live_on_entry (bitmap regs)
12080 {
12081 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12082 {
12083 gcc_assert (flag_split_stack);
12084 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
12085 }
12086 }
12087 \f
12088 /* Extract the parts of an RTL expression that is a valid memory address
12089 for an instruction. Return 0 if the structure of the address is
12090 grossly off. Return -1 if the address contains ASHIFT, so it is not
12091 strictly valid, but still used for computing length of lea instruction. */
12092
12093 int
12094 ix86_decompose_address (rtx addr, struct ix86_address *out)
12095 {
12096 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
12097 rtx base_reg, index_reg;
12098 HOST_WIDE_INT scale = 1;
12099 rtx scale_rtx = NULL_RTX;
12100 rtx tmp;
12101 int retval = 1;
12102 enum ix86_address_seg seg = SEG_DEFAULT;
12103
12104 /* Allow zero-extended SImode addresses,
12105 they will be emitted with addr32 prefix. */
12106 if (TARGET_64BIT && GET_MODE (addr) == DImode)
12107 {
12108 if (GET_CODE (addr) == ZERO_EXTEND
12109 && GET_MODE (XEXP (addr, 0)) == SImode)
12110 {
12111 addr = XEXP (addr, 0);
12112 if (CONST_INT_P (addr))
12113 return 0;
12114 }
12115 else if (GET_CODE (addr) == AND
12116 && const_32bit_mask (XEXP (addr, 1), DImode))
12117 {
12118 addr = simplify_gen_subreg (SImode, XEXP (addr, 0), DImode, 0);
12119 if (addr == NULL_RTX)
12120 return 0;
12121
12122 if (CONST_INT_P (addr))
12123 return 0;
12124 }
12125 }
12126
12127 /* Allow SImode subregs of DImode addresses,
12128 they will be emitted with addr32 prefix. */
12129 if (TARGET_64BIT && GET_MODE (addr) == SImode)
12130 {
12131 if (GET_CODE (addr) == SUBREG
12132 && GET_MODE (SUBREG_REG (addr)) == DImode)
12133 {
12134 addr = SUBREG_REG (addr);
12135 if (CONST_INT_P (addr))
12136 return 0;
12137 }
12138 }
12139
12140 if (REG_P (addr))
12141 base = addr;
12142 else if (GET_CODE (addr) == SUBREG)
12143 {
12144 if (REG_P (SUBREG_REG (addr)))
12145 base = addr;
12146 else
12147 return 0;
12148 }
12149 else if (GET_CODE (addr) == PLUS)
12150 {
12151 rtx addends[4], op;
12152 int n = 0, i;
12153
12154 op = addr;
12155 do
12156 {
12157 if (n >= 4)
12158 return 0;
12159 addends[n++] = XEXP (op, 1);
12160 op = XEXP (op, 0);
12161 }
12162 while (GET_CODE (op) == PLUS);
12163 if (n >= 4)
12164 return 0;
12165 addends[n] = op;
12166
12167 for (i = n; i >= 0; --i)
12168 {
12169 op = addends[i];
12170 switch (GET_CODE (op))
12171 {
12172 case MULT:
12173 if (index)
12174 return 0;
12175 index = XEXP (op, 0);
12176 scale_rtx = XEXP (op, 1);
12177 break;
12178
12179 case ASHIFT:
12180 if (index)
12181 return 0;
12182 index = XEXP (op, 0);
12183 tmp = XEXP (op, 1);
12184 if (!CONST_INT_P (tmp))
12185 return 0;
12186 scale = INTVAL (tmp);
12187 if ((unsigned HOST_WIDE_INT) scale > 3)
12188 return 0;
12189 scale = 1 << scale;
12190 break;
12191
12192 case ZERO_EXTEND:
12193 op = XEXP (op, 0);
12194 if (GET_CODE (op) != UNSPEC)
12195 return 0;
12196 /* FALLTHRU */
12197
12198 case UNSPEC:
12199 if (XINT (op, 1) == UNSPEC_TP
12200 && TARGET_TLS_DIRECT_SEG_REFS
12201 && seg == SEG_DEFAULT)
12202 seg = DEFAULT_TLS_SEG_REG;
12203 else
12204 return 0;
12205 break;
12206
12207 case SUBREG:
12208 if (!REG_P (SUBREG_REG (op)))
12209 return 0;
12210 /* FALLTHRU */
12211
12212 case REG:
12213 if (!base)
12214 base = op;
12215 else if (!index)
12216 index = op;
12217 else
12218 return 0;
12219 break;
12220
12221 case CONST:
12222 case CONST_INT:
12223 case SYMBOL_REF:
12224 case LABEL_REF:
12225 if (disp)
12226 return 0;
12227 disp = op;
12228 break;
12229
12230 default:
12231 return 0;
12232 }
12233 }
12234 }
12235 else if (GET_CODE (addr) == MULT)
12236 {
12237 index = XEXP (addr, 0); /* index*scale */
12238 scale_rtx = XEXP (addr, 1);
12239 }
12240 else if (GET_CODE (addr) == ASHIFT)
12241 {
12242 /* We're called for lea too, which implements ashift on occasion. */
12243 index = XEXP (addr, 0);
12244 tmp = XEXP (addr, 1);
12245 if (!CONST_INT_P (tmp))
12246 return 0;
12247 scale = INTVAL (tmp);
12248 if ((unsigned HOST_WIDE_INT) scale > 3)
12249 return 0;
12250 scale = 1 << scale;
12251 retval = -1;
12252 }
12253 else
12254 disp = addr; /* displacement */
12255
12256 if (index)
12257 {
12258 if (REG_P (index))
12259 ;
12260 else if (GET_CODE (index) == SUBREG
12261 && REG_P (SUBREG_REG (index)))
12262 ;
12263 else
12264 return 0;
12265 }
12266
12267 /* Extract the integral value of scale. */
12268 if (scale_rtx)
12269 {
12270 if (!CONST_INT_P (scale_rtx))
12271 return 0;
12272 scale = INTVAL (scale_rtx);
12273 }
12274
12275 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
12276 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
12277
12278 /* Avoid useless 0 displacement. */
12279 if (disp == const0_rtx && (base || index))
12280 disp = NULL_RTX;
12281
12282 /* Allow arg pointer and stack pointer as index if there is not scaling. */
12283 if (base_reg && index_reg && scale == 1
12284 && (index_reg == arg_pointer_rtx
12285 || index_reg == frame_pointer_rtx
12286 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
12287 {
12288 rtx tmp;
12289 tmp = base, base = index, index = tmp;
12290 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
12291 }
12292
12293 /* Special case: %ebp cannot be encoded as a base without a displacement.
12294 Similarly %r13. */
12295 if (!disp
12296 && base_reg
12297 && (base_reg == hard_frame_pointer_rtx
12298 || base_reg == frame_pointer_rtx
12299 || base_reg == arg_pointer_rtx
12300 || (REG_P (base_reg)
12301 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
12302 || REGNO (base_reg) == R13_REG))))
12303 disp = const0_rtx;
12304
12305 /* Special case: on K6, [%esi] makes the instruction vector decoded.
12306 Avoid this by transforming to [%esi+0].
12307 Reload calls address legitimization without cfun defined, so we need
12308 to test cfun for being non-NULL. */
12309 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
12310 && base_reg && !index_reg && !disp
12311 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
12312 disp = const0_rtx;
12313
12314 /* Special case: encode reg+reg instead of reg*2. */
12315 if (!base && index && scale == 2)
12316 base = index, base_reg = index_reg, scale = 1;
12317
12318 /* Special case: scaling cannot be encoded without base or displacement. */
12319 if (!base && !disp && index && scale != 1)
12320 disp = const0_rtx;
12321
12322 out->base = base;
12323 out->index = index;
12324 out->disp = disp;
12325 out->scale = scale;
12326 out->seg = seg;
12327
12328 return retval;
12329 }
12330 \f
12331 /* Return cost of the memory address x.
12332 For i386, it is better to use a complex address than let gcc copy
12333 the address into a reg and make a new pseudo. But not if the address
12334 requires to two regs - that would mean more pseudos with longer
12335 lifetimes. */
12336 static int
12337 ix86_address_cost (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED,
12338 addr_space_t as ATTRIBUTE_UNUSED,
12339 bool speed ATTRIBUTE_UNUSED)
12340 {
12341 struct ix86_address parts;
12342 int cost = 1;
12343 int ok = ix86_decompose_address (x, &parts);
12344
12345 gcc_assert (ok);
12346
12347 if (parts.base && GET_CODE (parts.base) == SUBREG)
12348 parts.base = SUBREG_REG (parts.base);
12349 if (parts.index && GET_CODE (parts.index) == SUBREG)
12350 parts.index = SUBREG_REG (parts.index);
12351
12352 /* Attempt to minimize number of registers in the address. */
12353 if ((parts.base
12354 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
12355 || (parts.index
12356 && (!REG_P (parts.index)
12357 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
12358 cost++;
12359
12360 if (parts.base
12361 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
12362 && parts.index
12363 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
12364 && parts.base != parts.index)
12365 cost++;
12366
12367 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
12368 since it's predecode logic can't detect the length of instructions
12369 and it degenerates to vector decoded. Increase cost of such
12370 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
12371 to split such addresses or even refuse such addresses at all.
12372
12373 Following addressing modes are affected:
12374 [base+scale*index]
12375 [scale*index+disp]
12376 [base+index]
12377
12378 The first and last case may be avoidable by explicitly coding the zero in
12379 memory address, but I don't have AMD-K6 machine handy to check this
12380 theory. */
12381
12382 if (TARGET_K6
12383 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
12384 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
12385 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
12386 cost += 10;
12387
12388 return cost;
12389 }
12390 \f
12391 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
12392 this is used for to form addresses to local data when -fPIC is in
12393 use. */
12394
12395 static bool
12396 darwin_local_data_pic (rtx disp)
12397 {
12398 return (GET_CODE (disp) == UNSPEC
12399 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
12400 }
12401
12402 /* Determine if a given RTX is a valid constant. We already know this
12403 satisfies CONSTANT_P. */
12404
12405 static bool
12406 ix86_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
12407 {
12408 switch (GET_CODE (x))
12409 {
12410 case CONST:
12411 x = XEXP (x, 0);
12412
12413 if (GET_CODE (x) == PLUS)
12414 {
12415 if (!CONST_INT_P (XEXP (x, 1)))
12416 return false;
12417 x = XEXP (x, 0);
12418 }
12419
12420 if (TARGET_MACHO && darwin_local_data_pic (x))
12421 return true;
12422
12423 /* Only some unspecs are valid as "constants". */
12424 if (GET_CODE (x) == UNSPEC)
12425 switch (XINT (x, 1))
12426 {
12427 case UNSPEC_GOT:
12428 case UNSPEC_GOTOFF:
12429 case UNSPEC_PLTOFF:
12430 return TARGET_64BIT;
12431 case UNSPEC_TPOFF:
12432 case UNSPEC_NTPOFF:
12433 x = XVECEXP (x, 0, 0);
12434 return (GET_CODE (x) == SYMBOL_REF
12435 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
12436 case UNSPEC_DTPOFF:
12437 x = XVECEXP (x, 0, 0);
12438 return (GET_CODE (x) == SYMBOL_REF
12439 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
12440 default:
12441 return false;
12442 }
12443
12444 /* We must have drilled down to a symbol. */
12445 if (GET_CODE (x) == LABEL_REF)
12446 return true;
12447 if (GET_CODE (x) != SYMBOL_REF)
12448 return false;
12449 /* FALLTHRU */
12450
12451 case SYMBOL_REF:
12452 /* TLS symbols are never valid. */
12453 if (SYMBOL_REF_TLS_MODEL (x))
12454 return false;
12455
12456 /* DLLIMPORT symbols are never valid. */
12457 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12458 && SYMBOL_REF_DLLIMPORT_P (x))
12459 return false;
12460
12461 #if TARGET_MACHO
12462 /* mdynamic-no-pic */
12463 if (MACHO_DYNAMIC_NO_PIC_P)
12464 return machopic_symbol_defined_p (x);
12465 #endif
12466 break;
12467
12468 case CONST_DOUBLE:
12469 if (GET_MODE (x) == TImode
12470 && x != CONST0_RTX (TImode)
12471 && !TARGET_64BIT)
12472 return false;
12473 break;
12474
12475 case CONST_VECTOR:
12476 if (!standard_sse_constant_p (x))
12477 return false;
12478
12479 default:
12480 break;
12481 }
12482
12483 /* Otherwise we handle everything else in the move patterns. */
12484 return true;
12485 }
12486
12487 /* Determine if it's legal to put X into the constant pool. This
12488 is not possible for the address of thread-local symbols, which
12489 is checked above. */
12490
12491 static bool
12492 ix86_cannot_force_const_mem (enum machine_mode mode, rtx x)
12493 {
12494 /* We can always put integral constants and vectors in memory. */
12495 switch (GET_CODE (x))
12496 {
12497 case CONST_INT:
12498 case CONST_DOUBLE:
12499 case CONST_VECTOR:
12500 return false;
12501
12502 default:
12503 break;
12504 }
12505 return !ix86_legitimate_constant_p (mode, x);
12506 }
12507
12508 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
12509 otherwise zero. */
12510
12511 static bool
12512 is_imported_p (rtx x)
12513 {
12514 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
12515 || GET_CODE (x) != SYMBOL_REF)
12516 return false;
12517
12518 return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
12519 }
12520
12521
12522 /* Nonzero if the constant value X is a legitimate general operand
12523 when generating PIC code. It is given that flag_pic is on and
12524 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
12525
12526 bool
12527 legitimate_pic_operand_p (rtx x)
12528 {
12529 rtx inner;
12530
12531 switch (GET_CODE (x))
12532 {
12533 case CONST:
12534 inner = XEXP (x, 0);
12535 if (GET_CODE (inner) == PLUS
12536 && CONST_INT_P (XEXP (inner, 1)))
12537 inner = XEXP (inner, 0);
12538
12539 /* Only some unspecs are valid as "constants". */
12540 if (GET_CODE (inner) == UNSPEC)
12541 switch (XINT (inner, 1))
12542 {
12543 case UNSPEC_GOT:
12544 case UNSPEC_GOTOFF:
12545 case UNSPEC_PLTOFF:
12546 return TARGET_64BIT;
12547 case UNSPEC_TPOFF:
12548 x = XVECEXP (inner, 0, 0);
12549 return (GET_CODE (x) == SYMBOL_REF
12550 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
12551 case UNSPEC_MACHOPIC_OFFSET:
12552 return legitimate_pic_address_disp_p (x);
12553 default:
12554 return false;
12555 }
12556 /* FALLTHRU */
12557
12558 case SYMBOL_REF:
12559 case LABEL_REF:
12560 return legitimate_pic_address_disp_p (x);
12561
12562 default:
12563 return true;
12564 }
12565 }
12566
12567 /* Determine if a given CONST RTX is a valid memory displacement
12568 in PIC mode. */
12569
12570 bool
12571 legitimate_pic_address_disp_p (rtx disp)
12572 {
12573 bool saw_plus;
12574
12575 /* In 64bit mode we can allow direct addresses of symbols and labels
12576 when they are not dynamic symbols. */
12577 if (TARGET_64BIT)
12578 {
12579 rtx op0 = disp, op1;
12580
12581 switch (GET_CODE (disp))
12582 {
12583 case LABEL_REF:
12584 return true;
12585
12586 case CONST:
12587 if (GET_CODE (XEXP (disp, 0)) != PLUS)
12588 break;
12589 op0 = XEXP (XEXP (disp, 0), 0);
12590 op1 = XEXP (XEXP (disp, 0), 1);
12591 if (!CONST_INT_P (op1)
12592 || INTVAL (op1) >= 16*1024*1024
12593 || INTVAL (op1) < -16*1024*1024)
12594 break;
12595 if (GET_CODE (op0) == LABEL_REF)
12596 return true;
12597 if (GET_CODE (op0) == CONST
12598 && GET_CODE (XEXP (op0, 0)) == UNSPEC
12599 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
12600 return true;
12601 if (GET_CODE (op0) == UNSPEC
12602 && XINT (op0, 1) == UNSPEC_PCREL)
12603 return true;
12604 if (GET_CODE (op0) != SYMBOL_REF)
12605 break;
12606 /* FALLTHRU */
12607
12608 case SYMBOL_REF:
12609 /* TLS references should always be enclosed in UNSPEC.
12610 The dllimported symbol needs always to be resolved. */
12611 if (SYMBOL_REF_TLS_MODEL (op0)
12612 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
12613 return false;
12614
12615 if (TARGET_PECOFF)
12616 {
12617 if (is_imported_p (op0))
12618 return true;
12619
12620 if (SYMBOL_REF_FAR_ADDR_P (op0)
12621 || !SYMBOL_REF_LOCAL_P (op0))
12622 break;
12623
12624 /* Function-symbols need to be resolved only for
12625 large-model.
12626 For the small-model we don't need to resolve anything
12627 here. */
12628 if ((ix86_cmodel != CM_LARGE_PIC
12629 && SYMBOL_REF_FUNCTION_P (op0))
12630 || ix86_cmodel == CM_SMALL_PIC)
12631 return true;
12632 /* Non-external symbols don't need to be resolved for
12633 large, and medium-model. */
12634 if ((ix86_cmodel == CM_LARGE_PIC
12635 || ix86_cmodel == CM_MEDIUM_PIC)
12636 && !SYMBOL_REF_EXTERNAL_P (op0))
12637 return true;
12638 }
12639 else if (!SYMBOL_REF_FAR_ADDR_P (op0)
12640 && SYMBOL_REF_LOCAL_P (op0)
12641 && ix86_cmodel != CM_LARGE_PIC)
12642 return true;
12643 break;
12644
12645 default:
12646 break;
12647 }
12648 }
12649 if (GET_CODE (disp) != CONST)
12650 return false;
12651 disp = XEXP (disp, 0);
12652
12653 if (TARGET_64BIT)
12654 {
12655 /* We are unsafe to allow PLUS expressions. This limit allowed distance
12656 of GOT tables. We should not need these anyway. */
12657 if (GET_CODE (disp) != UNSPEC
12658 || (XINT (disp, 1) != UNSPEC_GOTPCREL
12659 && XINT (disp, 1) != UNSPEC_GOTOFF
12660 && XINT (disp, 1) != UNSPEC_PCREL
12661 && XINT (disp, 1) != UNSPEC_PLTOFF))
12662 return false;
12663
12664 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
12665 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
12666 return false;
12667 return true;
12668 }
12669
12670 saw_plus = false;
12671 if (GET_CODE (disp) == PLUS)
12672 {
12673 if (!CONST_INT_P (XEXP (disp, 1)))
12674 return false;
12675 disp = XEXP (disp, 0);
12676 saw_plus = true;
12677 }
12678
12679 if (TARGET_MACHO && darwin_local_data_pic (disp))
12680 return true;
12681
12682 if (GET_CODE (disp) != UNSPEC)
12683 return false;
12684
12685 switch (XINT (disp, 1))
12686 {
12687 case UNSPEC_GOT:
12688 if (saw_plus)
12689 return false;
12690 /* We need to check for both symbols and labels because VxWorks loads
12691 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
12692 details. */
12693 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
12694 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
12695 case UNSPEC_GOTOFF:
12696 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
12697 While ABI specify also 32bit relocation but we don't produce it in
12698 small PIC model at all. */
12699 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
12700 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
12701 && !TARGET_64BIT)
12702 return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
12703 return false;
12704 case UNSPEC_GOTTPOFF:
12705 case UNSPEC_GOTNTPOFF:
12706 case UNSPEC_INDNTPOFF:
12707 if (saw_plus)
12708 return false;
12709 disp = XVECEXP (disp, 0, 0);
12710 return (GET_CODE (disp) == SYMBOL_REF
12711 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
12712 case UNSPEC_NTPOFF:
12713 disp = XVECEXP (disp, 0, 0);
12714 return (GET_CODE (disp) == SYMBOL_REF
12715 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
12716 case UNSPEC_DTPOFF:
12717 disp = XVECEXP (disp, 0, 0);
12718 return (GET_CODE (disp) == SYMBOL_REF
12719 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
12720 }
12721
12722 return false;
12723 }
12724
12725 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
12726 replace the input X, or the original X if no replacement is called for.
12727 The output parameter *WIN is 1 if the calling macro should goto WIN,
12728 0 if it should not. */
12729
12730 bool
12731 ix86_legitimize_reload_address (rtx x,
12732 enum machine_mode mode ATTRIBUTE_UNUSED,
12733 int opnum, int type,
12734 int ind_levels ATTRIBUTE_UNUSED)
12735 {
12736 /* Reload can generate:
12737
12738 (plus:DI (plus:DI (unspec:DI [(const_int 0 [0])] UNSPEC_TP)
12739 (reg:DI 97))
12740 (reg:DI 2 cx))
12741
12742 This RTX is rejected from ix86_legitimate_address_p due to
12743 non-strictness of base register 97. Following this rejection,
12744 reload pushes all three components into separate registers,
12745 creating invalid memory address RTX.
12746
12747 Following code reloads only the invalid part of the
12748 memory address RTX. */
12749
12750 if (GET_CODE (x) == PLUS
12751 && REG_P (XEXP (x, 1))
12752 && GET_CODE (XEXP (x, 0)) == PLUS
12753 && REG_P (XEXP (XEXP (x, 0), 1)))
12754 {
12755 rtx base, index;
12756 bool something_reloaded = false;
12757
12758 base = XEXP (XEXP (x, 0), 1);
12759 if (!REG_OK_FOR_BASE_STRICT_P (base))
12760 {
12761 push_reload (base, NULL_RTX, &XEXP (XEXP (x, 0), 1), NULL,
12762 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
12763 opnum, (enum reload_type) type);
12764 something_reloaded = true;
12765 }
12766
12767 index = XEXP (x, 1);
12768 if (!REG_OK_FOR_INDEX_STRICT_P (index))
12769 {
12770 push_reload (index, NULL_RTX, &XEXP (x, 1), NULL,
12771 INDEX_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
12772 opnum, (enum reload_type) type);
12773 something_reloaded = true;
12774 }
12775
12776 gcc_assert (something_reloaded);
12777 return true;
12778 }
12779
12780 return false;
12781 }
12782
12783 /* Determine if op is suitable RTX for an address register.
12784 Return naked register if a register or a register subreg is
12785 found, otherwise return NULL_RTX. */
12786
12787 static rtx
12788 ix86_validate_address_register (rtx op)
12789 {
12790 enum machine_mode mode = GET_MODE (op);
12791
12792 /* Only SImode or DImode registers can form the address. */
12793 if (mode != SImode && mode != DImode)
12794 return NULL_RTX;
12795
12796 if (REG_P (op))
12797 return op;
12798 else if (GET_CODE (op) == SUBREG)
12799 {
12800 rtx reg = SUBREG_REG (op);
12801
12802 if (!REG_P (reg))
12803 return NULL_RTX;
12804
12805 mode = GET_MODE (reg);
12806
12807 /* Don't allow SUBREGs that span more than a word. It can
12808 lead to spill failures when the register is one word out
12809 of a two word structure. */
12810 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
12811 return NULL_RTX;
12812
12813 /* Allow only SUBREGs of non-eliminable hard registers. */
12814 if (register_no_elim_operand (reg, mode))
12815 return reg;
12816 }
12817
12818 /* Op is not a register. */
12819 return NULL_RTX;
12820 }
12821
12822 /* Recognizes RTL expressions that are valid memory addresses for an
12823 instruction. The MODE argument is the machine mode for the MEM
12824 expression that wants to use this address.
12825
12826 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
12827 convert common non-canonical forms to canonical form so that they will
12828 be recognized. */
12829
12830 static bool
12831 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
12832 rtx addr, bool strict)
12833 {
12834 struct ix86_address parts;
12835 rtx base, index, disp;
12836 HOST_WIDE_INT scale;
12837 enum ix86_address_seg seg;
12838
12839 if (ix86_decompose_address (addr, &parts) <= 0)
12840 /* Decomposition failed. */
12841 return false;
12842
12843 base = parts.base;
12844 index = parts.index;
12845 disp = parts.disp;
12846 scale = parts.scale;
12847 seg = parts.seg;
12848
12849 /* Validate base register. */
12850 if (base)
12851 {
12852 rtx reg = ix86_validate_address_register (base);
12853
12854 if (reg == NULL_RTX)
12855 return false;
12856
12857 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
12858 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
12859 /* Base is not valid. */
12860 return false;
12861 }
12862
12863 /* Validate index register. */
12864 if (index)
12865 {
12866 rtx reg = ix86_validate_address_register (index);
12867
12868 if (reg == NULL_RTX)
12869 return false;
12870
12871 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
12872 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
12873 /* Index is not valid. */
12874 return false;
12875 }
12876
12877 /* Index and base should have the same mode. */
12878 if (base && index
12879 && GET_MODE (base) != GET_MODE (index))
12880 return false;
12881
12882 /* Address override works only on the (%reg) part of %fs:(%reg). */
12883 if (seg != SEG_DEFAULT
12884 && ((base && GET_MODE (base) != word_mode)
12885 || (index && GET_MODE (index) != word_mode)))
12886 return false;
12887
12888 /* Validate scale factor. */
12889 if (scale != 1)
12890 {
12891 if (!index)
12892 /* Scale without index. */
12893 return false;
12894
12895 if (scale != 2 && scale != 4 && scale != 8)
12896 /* Scale is not a valid multiplier. */
12897 return false;
12898 }
12899
12900 /* Validate displacement. */
12901 if (disp)
12902 {
12903 if (GET_CODE (disp) == CONST
12904 && GET_CODE (XEXP (disp, 0)) == UNSPEC
12905 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
12906 switch (XINT (XEXP (disp, 0), 1))
12907 {
12908 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
12909 used. While ABI specify also 32bit relocations, we don't produce
12910 them at all and use IP relative instead. */
12911 case UNSPEC_GOT:
12912 case UNSPEC_GOTOFF:
12913 gcc_assert (flag_pic);
12914 if (!TARGET_64BIT)
12915 goto is_legitimate_pic;
12916
12917 /* 64bit address unspec. */
12918 return false;
12919
12920 case UNSPEC_GOTPCREL:
12921 case UNSPEC_PCREL:
12922 gcc_assert (flag_pic);
12923 goto is_legitimate_pic;
12924
12925 case UNSPEC_GOTTPOFF:
12926 case UNSPEC_GOTNTPOFF:
12927 case UNSPEC_INDNTPOFF:
12928 case UNSPEC_NTPOFF:
12929 case UNSPEC_DTPOFF:
12930 break;
12931
12932 case UNSPEC_STACK_CHECK:
12933 gcc_assert (flag_split_stack);
12934 break;
12935
12936 default:
12937 /* Invalid address unspec. */
12938 return false;
12939 }
12940
12941 else if (SYMBOLIC_CONST (disp)
12942 && (flag_pic
12943 || (TARGET_MACHO
12944 #if TARGET_MACHO
12945 && MACHOPIC_INDIRECT
12946 && !machopic_operand_p (disp)
12947 #endif
12948 )))
12949 {
12950
12951 is_legitimate_pic:
12952 if (TARGET_64BIT && (index || base))
12953 {
12954 /* foo@dtpoff(%rX) is ok. */
12955 if (GET_CODE (disp) != CONST
12956 || GET_CODE (XEXP (disp, 0)) != PLUS
12957 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
12958 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
12959 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
12960 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
12961 /* Non-constant pic memory reference. */
12962 return false;
12963 }
12964 else if ((!TARGET_MACHO || flag_pic)
12965 && ! legitimate_pic_address_disp_p (disp))
12966 /* Displacement is an invalid pic construct. */
12967 return false;
12968 #if TARGET_MACHO
12969 else if (MACHO_DYNAMIC_NO_PIC_P
12970 && !ix86_legitimate_constant_p (Pmode, disp))
12971 /* displacment must be referenced via non_lazy_pointer */
12972 return false;
12973 #endif
12974
12975 /* This code used to verify that a symbolic pic displacement
12976 includes the pic_offset_table_rtx register.
12977
12978 While this is good idea, unfortunately these constructs may
12979 be created by "adds using lea" optimization for incorrect
12980 code like:
12981
12982 int a;
12983 int foo(int i)
12984 {
12985 return *(&a+i);
12986 }
12987
12988 This code is nonsensical, but results in addressing
12989 GOT table with pic_offset_table_rtx base. We can't
12990 just refuse it easily, since it gets matched by
12991 "addsi3" pattern, that later gets split to lea in the
12992 case output register differs from input. While this
12993 can be handled by separate addsi pattern for this case
12994 that never results in lea, this seems to be easier and
12995 correct fix for crash to disable this test. */
12996 }
12997 else if (GET_CODE (disp) != LABEL_REF
12998 && !CONST_INT_P (disp)
12999 && (GET_CODE (disp) != CONST
13000 || !ix86_legitimate_constant_p (Pmode, disp))
13001 && (GET_CODE (disp) != SYMBOL_REF
13002 || !ix86_legitimate_constant_p (Pmode, disp)))
13003 /* Displacement is not constant. */
13004 return false;
13005 else if (TARGET_64BIT
13006 && !x86_64_immediate_operand (disp, VOIDmode))
13007 /* Displacement is out of range. */
13008 return false;
13009 /* In x32 mode, constant addresses are sign extended to 64bit, so
13010 we have to prevent addresses from 0x80000000 to 0xffffffff. */
13011 else if (TARGET_X32 && !(index || base)
13012 && CONST_INT_P (disp)
13013 && val_signbit_known_set_p (SImode, INTVAL (disp)))
13014 return false;
13015 }
13016
13017 /* Everything looks valid. */
13018 return true;
13019 }
13020
13021 /* Determine if a given RTX is a valid constant address. */
13022
13023 bool
13024 constant_address_p (rtx x)
13025 {
13026 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
13027 }
13028 \f
13029 /* Return a unique alias set for the GOT. */
13030
13031 static alias_set_type
13032 ix86_GOT_alias_set (void)
13033 {
13034 static alias_set_type set = -1;
13035 if (set == -1)
13036 set = new_alias_set ();
13037 return set;
13038 }
13039
13040 /* Return a legitimate reference for ORIG (an address) using the
13041 register REG. If REG is 0, a new pseudo is generated.
13042
13043 There are two types of references that must be handled:
13044
13045 1. Global data references must load the address from the GOT, via
13046 the PIC reg. An insn is emitted to do this load, and the reg is
13047 returned.
13048
13049 2. Static data references, constant pool addresses, and code labels
13050 compute the address as an offset from the GOT, whose base is in
13051 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
13052 differentiate them from global data objects. The returned
13053 address is the PIC reg + an unspec constant.
13054
13055 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
13056 reg also appears in the address. */
13057
13058 static rtx
13059 legitimize_pic_address (rtx orig, rtx reg)
13060 {
13061 rtx addr = orig;
13062 rtx new_rtx = orig;
13063
13064 #if TARGET_MACHO
13065 if (TARGET_MACHO && !TARGET_64BIT)
13066 {
13067 if (reg == 0)
13068 reg = gen_reg_rtx (Pmode);
13069 /* Use the generic Mach-O PIC machinery. */
13070 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
13071 }
13072 #endif
13073
13074 if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13075 {
13076 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13077 if (tmp)
13078 return tmp;
13079 }
13080
13081 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
13082 new_rtx = addr;
13083 else if (TARGET_64BIT && !TARGET_PECOFF
13084 && ix86_cmodel != CM_SMALL_PIC && gotoff_operand (addr, Pmode))
13085 {
13086 rtx tmpreg;
13087 /* This symbol may be referenced via a displacement from the PIC
13088 base address (@GOTOFF). */
13089
13090 if (reload_in_progress)
13091 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13092 if (GET_CODE (addr) == CONST)
13093 addr = XEXP (addr, 0);
13094 if (GET_CODE (addr) == PLUS)
13095 {
13096 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13097 UNSPEC_GOTOFF);
13098 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13099 }
13100 else
13101 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13102 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13103 if (!reg)
13104 tmpreg = gen_reg_rtx (Pmode);
13105 else
13106 tmpreg = reg;
13107 emit_move_insn (tmpreg, new_rtx);
13108
13109 if (reg != 0)
13110 {
13111 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
13112 tmpreg, 1, OPTAB_DIRECT);
13113 new_rtx = reg;
13114 }
13115 else
13116 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
13117 }
13118 else if (!TARGET_64BIT && !TARGET_PECOFF && gotoff_operand (addr, Pmode))
13119 {
13120 /* This symbol may be referenced via a displacement from the PIC
13121 base address (@GOTOFF). */
13122
13123 if (reload_in_progress)
13124 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13125 if (GET_CODE (addr) == CONST)
13126 addr = XEXP (addr, 0);
13127 if (GET_CODE (addr) == PLUS)
13128 {
13129 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13130 UNSPEC_GOTOFF);
13131 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13132 }
13133 else
13134 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13135 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13136 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13137
13138 if (reg != 0)
13139 {
13140 emit_move_insn (reg, new_rtx);
13141 new_rtx = reg;
13142 }
13143 }
13144 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
13145 /* We can't use @GOTOFF for text labels on VxWorks;
13146 see gotoff_operand. */
13147 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
13148 {
13149 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13150 if (tmp)
13151 return tmp;
13152
13153 /* For x64 PE-COFF there is no GOT table. So we use address
13154 directly. */
13155 if (TARGET_64BIT && TARGET_PECOFF)
13156 {
13157 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
13158 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13159
13160 if (reg == 0)
13161 reg = gen_reg_rtx (Pmode);
13162 emit_move_insn (reg, new_rtx);
13163 new_rtx = reg;
13164 }
13165 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
13166 {
13167 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
13168 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13169 new_rtx = gen_const_mem (Pmode, new_rtx);
13170 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13171
13172 if (reg == 0)
13173 reg = gen_reg_rtx (Pmode);
13174 /* Use directly gen_movsi, otherwise the address is loaded
13175 into register for CSE. We don't want to CSE this addresses,
13176 instead we CSE addresses from the GOT table, so skip this. */
13177 emit_insn (gen_movsi (reg, new_rtx));
13178 new_rtx = reg;
13179 }
13180 else
13181 {
13182 /* This symbol must be referenced via a load from the
13183 Global Offset Table (@GOT). */
13184
13185 if (reload_in_progress)
13186 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13187 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
13188 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13189 if (TARGET_64BIT)
13190 new_rtx = force_reg (Pmode, new_rtx);
13191 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13192 new_rtx = gen_const_mem (Pmode, new_rtx);
13193 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13194
13195 if (reg == 0)
13196 reg = gen_reg_rtx (Pmode);
13197 emit_move_insn (reg, new_rtx);
13198 new_rtx = reg;
13199 }
13200 }
13201 else
13202 {
13203 if (CONST_INT_P (addr)
13204 && !x86_64_immediate_operand (addr, VOIDmode))
13205 {
13206 if (reg)
13207 {
13208 emit_move_insn (reg, addr);
13209 new_rtx = reg;
13210 }
13211 else
13212 new_rtx = force_reg (Pmode, addr);
13213 }
13214 else if (GET_CODE (addr) == CONST)
13215 {
13216 addr = XEXP (addr, 0);
13217
13218 /* We must match stuff we generate before. Assume the only
13219 unspecs that can get here are ours. Not that we could do
13220 anything with them anyway.... */
13221 if (GET_CODE (addr) == UNSPEC
13222 || (GET_CODE (addr) == PLUS
13223 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
13224 return orig;
13225 gcc_assert (GET_CODE (addr) == PLUS);
13226 }
13227 if (GET_CODE (addr) == PLUS)
13228 {
13229 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
13230
13231 /* Check first to see if this is a constant offset from a @GOTOFF
13232 symbol reference. */
13233 if (!TARGET_PECOFF && gotoff_operand (op0, Pmode)
13234 && CONST_INT_P (op1))
13235 {
13236 if (!TARGET_64BIT)
13237 {
13238 if (reload_in_progress)
13239 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13240 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
13241 UNSPEC_GOTOFF);
13242 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
13243 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13244 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13245
13246 if (reg != 0)
13247 {
13248 emit_move_insn (reg, new_rtx);
13249 new_rtx = reg;
13250 }
13251 }
13252 else
13253 {
13254 if (INTVAL (op1) < -16*1024*1024
13255 || INTVAL (op1) >= 16*1024*1024)
13256 {
13257 if (!x86_64_immediate_operand (op1, Pmode))
13258 op1 = force_reg (Pmode, op1);
13259 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
13260 }
13261 }
13262 }
13263 else
13264 {
13265 rtx base = legitimize_pic_address (op0, reg);
13266 enum machine_mode mode = GET_MODE (base);
13267 new_rtx
13268 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
13269
13270 if (CONST_INT_P (new_rtx))
13271 {
13272 if (INTVAL (new_rtx) < -16*1024*1024
13273 || INTVAL (new_rtx) >= 16*1024*1024)
13274 {
13275 if (!x86_64_immediate_operand (new_rtx, mode))
13276 new_rtx = force_reg (mode, new_rtx);
13277 new_rtx
13278 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
13279 }
13280 else
13281 new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
13282 }
13283 else
13284 {
13285 if (GET_CODE (new_rtx) == PLUS
13286 && CONSTANT_P (XEXP (new_rtx, 1)))
13287 {
13288 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
13289 new_rtx = XEXP (new_rtx, 1);
13290 }
13291 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
13292 }
13293 }
13294 }
13295 }
13296 return new_rtx;
13297 }
13298 \f
13299 /* Load the thread pointer. If TO_REG is true, force it into a register. */
13300
13301 static rtx
13302 get_thread_pointer (enum machine_mode tp_mode, bool to_reg)
13303 {
13304 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
13305
13306 if (GET_MODE (tp) != tp_mode)
13307 {
13308 gcc_assert (GET_MODE (tp) == SImode);
13309 gcc_assert (tp_mode == DImode);
13310
13311 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
13312 }
13313
13314 if (to_reg)
13315 tp = copy_to_mode_reg (tp_mode, tp);
13316
13317 return tp;
13318 }
13319
13320 /* Construct the SYMBOL_REF for the tls_get_addr function. */
13321
13322 static GTY(()) rtx ix86_tls_symbol;
13323
13324 static rtx
13325 ix86_tls_get_addr (void)
13326 {
13327 if (!ix86_tls_symbol)
13328 {
13329 const char *sym
13330 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
13331 ? "___tls_get_addr" : "__tls_get_addr");
13332
13333 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
13334 }
13335
13336 if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
13337 {
13338 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
13339 UNSPEC_PLTOFF);
13340 return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
13341 gen_rtx_CONST (Pmode, unspec));
13342 }
13343
13344 return ix86_tls_symbol;
13345 }
13346
13347 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13348
13349 static GTY(()) rtx ix86_tls_module_base_symbol;
13350
13351 rtx
13352 ix86_tls_module_base (void)
13353 {
13354 if (!ix86_tls_module_base_symbol)
13355 {
13356 ix86_tls_module_base_symbol
13357 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
13358
13359 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
13360 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
13361 }
13362
13363 return ix86_tls_module_base_symbol;
13364 }
13365
13366 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
13367 false if we expect this to be used for a memory address and true if
13368 we expect to load the address into a register. */
13369
13370 static rtx
13371 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
13372 {
13373 rtx dest, base, off;
13374 rtx pic = NULL_RTX, tp = NULL_RTX;
13375 enum machine_mode tp_mode = Pmode;
13376 int type;
13377
13378 switch (model)
13379 {
13380 case TLS_MODEL_GLOBAL_DYNAMIC:
13381 dest = gen_reg_rtx (Pmode);
13382
13383 if (!TARGET_64BIT)
13384 {
13385 if (flag_pic && !TARGET_PECOFF)
13386 pic = pic_offset_table_rtx;
13387 else
13388 {
13389 pic = gen_reg_rtx (Pmode);
13390 emit_insn (gen_set_got (pic));
13391 }
13392 }
13393
13394 if (TARGET_GNU2_TLS)
13395 {
13396 if (TARGET_64BIT)
13397 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
13398 else
13399 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
13400
13401 tp = get_thread_pointer (Pmode, true);
13402 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
13403
13404 if (GET_MODE (x) != Pmode)
13405 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13406
13407 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
13408 }
13409 else
13410 {
13411 rtx caddr = ix86_tls_get_addr ();
13412
13413 if (TARGET_64BIT)
13414 {
13415 rtx rax = gen_rtx_REG (Pmode, AX_REG);
13416 rtx insns;
13417
13418 start_sequence ();
13419 emit_call_insn
13420 (ix86_gen_tls_global_dynamic_64 (rax, x, caddr));
13421 insns = get_insns ();
13422 end_sequence ();
13423
13424 if (GET_MODE (x) != Pmode)
13425 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13426
13427 RTL_CONST_CALL_P (insns) = 1;
13428 emit_libcall_block (insns, dest, rax, x);
13429 }
13430 else
13431 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
13432 }
13433 break;
13434
13435 case TLS_MODEL_LOCAL_DYNAMIC:
13436 base = gen_reg_rtx (Pmode);
13437
13438 if (!TARGET_64BIT)
13439 {
13440 if (flag_pic)
13441 pic = pic_offset_table_rtx;
13442 else
13443 {
13444 pic = gen_reg_rtx (Pmode);
13445 emit_insn (gen_set_got (pic));
13446 }
13447 }
13448
13449 if (TARGET_GNU2_TLS)
13450 {
13451 rtx tmp = ix86_tls_module_base ();
13452
13453 if (TARGET_64BIT)
13454 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
13455 else
13456 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
13457
13458 tp = get_thread_pointer (Pmode, true);
13459 set_unique_reg_note (get_last_insn (), REG_EQUAL,
13460 gen_rtx_MINUS (Pmode, tmp, tp));
13461 }
13462 else
13463 {
13464 rtx caddr = ix86_tls_get_addr ();
13465
13466 if (TARGET_64BIT)
13467 {
13468 rtx rax = gen_rtx_REG (Pmode, AX_REG);
13469 rtx insns, eqv;
13470
13471 start_sequence ();
13472 emit_call_insn
13473 (ix86_gen_tls_local_dynamic_base_64 (rax, caddr));
13474 insns = get_insns ();
13475 end_sequence ();
13476
13477 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
13478 share the LD_BASE result with other LD model accesses. */
13479 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
13480 UNSPEC_TLS_LD_BASE);
13481
13482 RTL_CONST_CALL_P (insns) = 1;
13483 emit_libcall_block (insns, base, rax, eqv);
13484 }
13485 else
13486 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
13487 }
13488
13489 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
13490 off = gen_rtx_CONST (Pmode, off);
13491
13492 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
13493
13494 if (TARGET_GNU2_TLS)
13495 {
13496 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
13497
13498 if (GET_MODE (x) != Pmode)
13499 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13500
13501 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
13502 }
13503 break;
13504
13505 case TLS_MODEL_INITIAL_EXEC:
13506 if (TARGET_64BIT)
13507 {
13508 if (TARGET_SUN_TLS && !TARGET_X32)
13509 {
13510 /* The Sun linker took the AMD64 TLS spec literally
13511 and can only handle %rax as destination of the
13512 initial executable code sequence. */
13513
13514 dest = gen_reg_rtx (DImode);
13515 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
13516 return dest;
13517 }
13518
13519 /* Generate DImode references to avoid %fs:(%reg32)
13520 problems and linker IE->LE relaxation bug. */
13521 tp_mode = DImode;
13522 pic = NULL;
13523 type = UNSPEC_GOTNTPOFF;
13524 }
13525 else if (flag_pic)
13526 {
13527 if (reload_in_progress)
13528 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13529 pic = pic_offset_table_rtx;
13530 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
13531 }
13532 else if (!TARGET_ANY_GNU_TLS)
13533 {
13534 pic = gen_reg_rtx (Pmode);
13535 emit_insn (gen_set_got (pic));
13536 type = UNSPEC_GOTTPOFF;
13537 }
13538 else
13539 {
13540 pic = NULL;
13541 type = UNSPEC_INDNTPOFF;
13542 }
13543
13544 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
13545 off = gen_rtx_CONST (tp_mode, off);
13546 if (pic)
13547 off = gen_rtx_PLUS (tp_mode, pic, off);
13548 off = gen_const_mem (tp_mode, off);
13549 set_mem_alias_set (off, ix86_GOT_alias_set ());
13550
13551 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13552 {
13553 base = get_thread_pointer (tp_mode,
13554 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
13555 off = force_reg (tp_mode, off);
13556 return gen_rtx_PLUS (tp_mode, base, off);
13557 }
13558 else
13559 {
13560 base = get_thread_pointer (Pmode, true);
13561 dest = gen_reg_rtx (Pmode);
13562 emit_insn (ix86_gen_sub3 (dest, base, off));
13563 }
13564 break;
13565
13566 case TLS_MODEL_LOCAL_EXEC:
13567 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
13568 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13569 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
13570 off = gen_rtx_CONST (Pmode, off);
13571
13572 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13573 {
13574 base = get_thread_pointer (Pmode,
13575 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
13576 return gen_rtx_PLUS (Pmode, base, off);
13577 }
13578 else
13579 {
13580 base = get_thread_pointer (Pmode, true);
13581 dest = gen_reg_rtx (Pmode);
13582 emit_insn (ix86_gen_sub3 (dest, base, off));
13583 }
13584 break;
13585
13586 default:
13587 gcc_unreachable ();
13588 }
13589
13590 return dest;
13591 }
13592
13593 /* Create or return the unique __imp_DECL dllimport symbol corresponding
13594 to symbol DECL if BEIMPORT is true. Otherwise create or return the
13595 unique refptr-DECL symbol corresponding to symbol DECL. */
13596
13597 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
13598 htab_t dllimport_map;
13599
13600 static tree
13601 get_dllimport_decl (tree decl, bool beimport)
13602 {
13603 struct tree_map *h, in;
13604 void **loc;
13605 const char *name;
13606 const char *prefix;
13607 size_t namelen, prefixlen;
13608 char *imp_name;
13609 tree to;
13610 rtx rtl;
13611
13612 if (!dllimport_map)
13613 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
13614
13615 in.hash = htab_hash_pointer (decl);
13616 in.base.from = decl;
13617 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
13618 h = (struct tree_map *) *loc;
13619 if (h)
13620 return h->to;
13621
13622 *loc = h = ggc_alloc_tree_map ();
13623 h->hash = in.hash;
13624 h->base.from = decl;
13625 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
13626 VAR_DECL, NULL, ptr_type_node);
13627 DECL_ARTIFICIAL (to) = 1;
13628 DECL_IGNORED_P (to) = 1;
13629 DECL_EXTERNAL (to) = 1;
13630 TREE_READONLY (to) = 1;
13631
13632 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
13633 name = targetm.strip_name_encoding (name);
13634 if (beimport)
13635 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
13636 ? "*__imp_" : "*__imp__";
13637 else
13638 prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
13639 namelen = strlen (name);
13640 prefixlen = strlen (prefix);
13641 imp_name = (char *) alloca (namelen + prefixlen + 1);
13642 memcpy (imp_name, prefix, prefixlen);
13643 memcpy (imp_name + prefixlen, name, namelen + 1);
13644
13645 name = ggc_alloc_string (imp_name, namelen + prefixlen);
13646 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
13647 SET_SYMBOL_REF_DECL (rtl, to);
13648 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
13649 if (!beimport)
13650 {
13651 SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
13652 #ifdef SUB_TARGET_RECORD_STUB
13653 SUB_TARGET_RECORD_STUB (name);
13654 #endif
13655 }
13656
13657 rtl = gen_const_mem (Pmode, rtl);
13658 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
13659
13660 SET_DECL_RTL (to, rtl);
13661 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
13662
13663 return to;
13664 }
13665
13666 /* Expand SYMBOL into its corresponding far-addresse symbol.
13667 WANT_REG is true if we require the result be a register. */
13668
13669 static rtx
13670 legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
13671 {
13672 tree imp_decl;
13673 rtx x;
13674
13675 gcc_assert (SYMBOL_REF_DECL (symbol));
13676 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false);
13677
13678 x = DECL_RTL (imp_decl);
13679 if (want_reg)
13680 x = force_reg (Pmode, x);
13681 return x;
13682 }
13683
13684 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
13685 true if we require the result be a register. */
13686
13687 static rtx
13688 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
13689 {
13690 tree imp_decl;
13691 rtx x;
13692
13693 gcc_assert (SYMBOL_REF_DECL (symbol));
13694 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true);
13695
13696 x = DECL_RTL (imp_decl);
13697 if (want_reg)
13698 x = force_reg (Pmode, x);
13699 return x;
13700 }
13701
13702 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
13703 is true if we require the result be a register. */
13704
13705 static rtx
13706 legitimize_pe_coff_symbol (rtx addr, bool inreg)
13707 {
13708 if (!TARGET_PECOFF)
13709 return NULL_RTX;
13710
13711 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13712 {
13713 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
13714 return legitimize_dllimport_symbol (addr, inreg);
13715 if (GET_CODE (addr) == CONST
13716 && GET_CODE (XEXP (addr, 0)) == PLUS
13717 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
13718 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
13719 {
13720 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg);
13721 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
13722 }
13723 }
13724
13725 if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
13726 return NULL_RTX;
13727 if (GET_CODE (addr) == SYMBOL_REF
13728 && !is_imported_p (addr)
13729 && SYMBOL_REF_EXTERNAL_P (addr)
13730 && SYMBOL_REF_DECL (addr))
13731 return legitimize_pe_coff_extern_decl (addr, inreg);
13732
13733 if (GET_CODE (addr) == CONST
13734 && GET_CODE (XEXP (addr, 0)) == PLUS
13735 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
13736 && !is_imported_p (XEXP (XEXP (addr, 0), 0))
13737 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
13738 && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
13739 {
13740 rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg);
13741 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
13742 }
13743 return NULL_RTX;
13744 }
13745
13746 /* Try machine-dependent ways of modifying an illegitimate address
13747 to be legitimate. If we find one, return the new, valid address.
13748 This macro is used in only one place: `memory_address' in explow.c.
13749
13750 OLDX is the address as it was before break_out_memory_refs was called.
13751 In some cases it is useful to look at this to decide what needs to be done.
13752
13753 It is always safe for this macro to do nothing. It exists to recognize
13754 opportunities to optimize the output.
13755
13756 For the 80386, we handle X+REG by loading X into a register R and
13757 using R+REG. R will go in a general reg and indexing will be used.
13758 However, if REG is a broken-out memory address or multiplication,
13759 nothing needs to be done because REG can certainly go in a general reg.
13760
13761 When -fpic is used, special handling is needed for symbolic references.
13762 See comments by legitimize_pic_address in i386.c for details. */
13763
13764 static rtx
13765 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
13766 enum machine_mode mode)
13767 {
13768 int changed = 0;
13769 unsigned log;
13770
13771 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
13772 if (log)
13773 return legitimize_tls_address (x, (enum tls_model) log, false);
13774 if (GET_CODE (x) == CONST
13775 && GET_CODE (XEXP (x, 0)) == PLUS
13776 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
13777 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
13778 {
13779 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
13780 (enum tls_model) log, false);
13781 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
13782 }
13783
13784 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13785 {
13786 rtx tmp = legitimize_pe_coff_symbol (x, true);
13787 if (tmp)
13788 return tmp;
13789 }
13790
13791 if (flag_pic && SYMBOLIC_CONST (x))
13792 return legitimize_pic_address (x, 0);
13793
13794 #if TARGET_MACHO
13795 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
13796 return machopic_indirect_data_reference (x, 0);
13797 #endif
13798
13799 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
13800 if (GET_CODE (x) == ASHIFT
13801 && CONST_INT_P (XEXP (x, 1))
13802 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
13803 {
13804 changed = 1;
13805 log = INTVAL (XEXP (x, 1));
13806 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
13807 GEN_INT (1 << log));
13808 }
13809
13810 if (GET_CODE (x) == PLUS)
13811 {
13812 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
13813
13814 if (GET_CODE (XEXP (x, 0)) == ASHIFT
13815 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
13816 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
13817 {
13818 changed = 1;
13819 log = INTVAL (XEXP (XEXP (x, 0), 1));
13820 XEXP (x, 0) = gen_rtx_MULT (Pmode,
13821 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
13822 GEN_INT (1 << log));
13823 }
13824
13825 if (GET_CODE (XEXP (x, 1)) == ASHIFT
13826 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
13827 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
13828 {
13829 changed = 1;
13830 log = INTVAL (XEXP (XEXP (x, 1), 1));
13831 XEXP (x, 1) = gen_rtx_MULT (Pmode,
13832 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
13833 GEN_INT (1 << log));
13834 }
13835
13836 /* Put multiply first if it isn't already. */
13837 if (GET_CODE (XEXP (x, 1)) == MULT)
13838 {
13839 rtx tmp = XEXP (x, 0);
13840 XEXP (x, 0) = XEXP (x, 1);
13841 XEXP (x, 1) = tmp;
13842 changed = 1;
13843 }
13844
13845 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
13846 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
13847 created by virtual register instantiation, register elimination, and
13848 similar optimizations. */
13849 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
13850 {
13851 changed = 1;
13852 x = gen_rtx_PLUS (Pmode,
13853 gen_rtx_PLUS (Pmode, XEXP (x, 0),
13854 XEXP (XEXP (x, 1), 0)),
13855 XEXP (XEXP (x, 1), 1));
13856 }
13857
13858 /* Canonicalize
13859 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
13860 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
13861 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
13862 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
13863 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
13864 && CONSTANT_P (XEXP (x, 1)))
13865 {
13866 rtx constant;
13867 rtx other = NULL_RTX;
13868
13869 if (CONST_INT_P (XEXP (x, 1)))
13870 {
13871 constant = XEXP (x, 1);
13872 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
13873 }
13874 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
13875 {
13876 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
13877 other = XEXP (x, 1);
13878 }
13879 else
13880 constant = 0;
13881
13882 if (constant)
13883 {
13884 changed = 1;
13885 x = gen_rtx_PLUS (Pmode,
13886 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
13887 XEXP (XEXP (XEXP (x, 0), 1), 0)),
13888 plus_constant (Pmode, other,
13889 INTVAL (constant)));
13890 }
13891 }
13892
13893 if (changed && ix86_legitimate_address_p (mode, x, false))
13894 return x;
13895
13896 if (GET_CODE (XEXP (x, 0)) == MULT)
13897 {
13898 changed = 1;
13899 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
13900 }
13901
13902 if (GET_CODE (XEXP (x, 1)) == MULT)
13903 {
13904 changed = 1;
13905 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
13906 }
13907
13908 if (changed
13909 && REG_P (XEXP (x, 1))
13910 && REG_P (XEXP (x, 0)))
13911 return x;
13912
13913 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
13914 {
13915 changed = 1;
13916 x = legitimize_pic_address (x, 0);
13917 }
13918
13919 if (changed && ix86_legitimate_address_p (mode, x, false))
13920 return x;
13921
13922 if (REG_P (XEXP (x, 0)))
13923 {
13924 rtx temp = gen_reg_rtx (Pmode);
13925 rtx val = force_operand (XEXP (x, 1), temp);
13926 if (val != temp)
13927 {
13928 val = convert_to_mode (Pmode, val, 1);
13929 emit_move_insn (temp, val);
13930 }
13931
13932 XEXP (x, 1) = temp;
13933 return x;
13934 }
13935
13936 else if (REG_P (XEXP (x, 1)))
13937 {
13938 rtx temp = gen_reg_rtx (Pmode);
13939 rtx val = force_operand (XEXP (x, 0), temp);
13940 if (val != temp)
13941 {
13942 val = convert_to_mode (Pmode, val, 1);
13943 emit_move_insn (temp, val);
13944 }
13945
13946 XEXP (x, 0) = temp;
13947 return x;
13948 }
13949 }
13950
13951 return x;
13952 }
13953 \f
13954 /* Print an integer constant expression in assembler syntax. Addition
13955 and subtraction are the only arithmetic that may appear in these
13956 expressions. FILE is the stdio stream to write to, X is the rtx, and
13957 CODE is the operand print code from the output string. */
13958
13959 static void
13960 output_pic_addr_const (FILE *file, rtx x, int code)
13961 {
13962 char buf[256];
13963
13964 switch (GET_CODE (x))
13965 {
13966 case PC:
13967 gcc_assert (flag_pic);
13968 putc ('.', file);
13969 break;
13970
13971 case SYMBOL_REF:
13972 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
13973 output_addr_const (file, x);
13974 else
13975 {
13976 const char *name = XSTR (x, 0);
13977
13978 /* Mark the decl as referenced so that cgraph will
13979 output the function. */
13980 if (SYMBOL_REF_DECL (x))
13981 mark_decl_referenced (SYMBOL_REF_DECL (x));
13982
13983 #if TARGET_MACHO
13984 if (MACHOPIC_INDIRECT
13985 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
13986 name = machopic_indirection_name (x, /*stub_p=*/true);
13987 #endif
13988 assemble_name (file, name);
13989 }
13990 if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
13991 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
13992 fputs ("@PLT", file);
13993 break;
13994
13995 case LABEL_REF:
13996 x = XEXP (x, 0);
13997 /* FALLTHRU */
13998 case CODE_LABEL:
13999 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
14000 assemble_name (asm_out_file, buf);
14001 break;
14002
14003 case CONST_INT:
14004 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14005 break;
14006
14007 case CONST:
14008 /* This used to output parentheses around the expression,
14009 but that does not work on the 386 (either ATT or BSD assembler). */
14010 output_pic_addr_const (file, XEXP (x, 0), code);
14011 break;
14012
14013 case CONST_DOUBLE:
14014 if (GET_MODE (x) == VOIDmode)
14015 {
14016 /* We can use %d if the number is <32 bits and positive. */
14017 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
14018 fprintf (file, "0x%lx%08lx",
14019 (unsigned long) CONST_DOUBLE_HIGH (x),
14020 (unsigned long) CONST_DOUBLE_LOW (x));
14021 else
14022 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
14023 }
14024 else
14025 /* We can't handle floating point constants;
14026 TARGET_PRINT_OPERAND must handle them. */
14027 output_operand_lossage ("floating constant misused");
14028 break;
14029
14030 case PLUS:
14031 /* Some assemblers need integer constants to appear first. */
14032 if (CONST_INT_P (XEXP (x, 0)))
14033 {
14034 output_pic_addr_const (file, XEXP (x, 0), code);
14035 putc ('+', file);
14036 output_pic_addr_const (file, XEXP (x, 1), code);
14037 }
14038 else
14039 {
14040 gcc_assert (CONST_INT_P (XEXP (x, 1)));
14041 output_pic_addr_const (file, XEXP (x, 1), code);
14042 putc ('+', file);
14043 output_pic_addr_const (file, XEXP (x, 0), code);
14044 }
14045 break;
14046
14047 case MINUS:
14048 if (!TARGET_MACHO)
14049 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
14050 output_pic_addr_const (file, XEXP (x, 0), code);
14051 putc ('-', file);
14052 output_pic_addr_const (file, XEXP (x, 1), code);
14053 if (!TARGET_MACHO)
14054 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
14055 break;
14056
14057 case UNSPEC:
14058 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
14059 {
14060 bool f = i386_asm_output_addr_const_extra (file, x);
14061 gcc_assert (f);
14062 break;
14063 }
14064
14065 gcc_assert (XVECLEN (x, 0) == 1);
14066 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
14067 switch (XINT (x, 1))
14068 {
14069 case UNSPEC_GOT:
14070 fputs ("@GOT", file);
14071 break;
14072 case UNSPEC_GOTOFF:
14073 fputs ("@GOTOFF", file);
14074 break;
14075 case UNSPEC_PLTOFF:
14076 fputs ("@PLTOFF", file);
14077 break;
14078 case UNSPEC_PCREL:
14079 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14080 "(%rip)" : "[rip]", file);
14081 break;
14082 case UNSPEC_GOTPCREL:
14083 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14084 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
14085 break;
14086 case UNSPEC_GOTTPOFF:
14087 /* FIXME: This might be @TPOFF in Sun ld too. */
14088 fputs ("@gottpoff", file);
14089 break;
14090 case UNSPEC_TPOFF:
14091 fputs ("@tpoff", file);
14092 break;
14093 case UNSPEC_NTPOFF:
14094 if (TARGET_64BIT)
14095 fputs ("@tpoff", file);
14096 else
14097 fputs ("@ntpoff", file);
14098 break;
14099 case UNSPEC_DTPOFF:
14100 fputs ("@dtpoff", file);
14101 break;
14102 case UNSPEC_GOTNTPOFF:
14103 if (TARGET_64BIT)
14104 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14105 "@gottpoff(%rip)": "@gottpoff[rip]", file);
14106 else
14107 fputs ("@gotntpoff", file);
14108 break;
14109 case UNSPEC_INDNTPOFF:
14110 fputs ("@indntpoff", file);
14111 break;
14112 #if TARGET_MACHO
14113 case UNSPEC_MACHOPIC_OFFSET:
14114 putc ('-', file);
14115 machopic_output_function_base_name (file);
14116 break;
14117 #endif
14118 default:
14119 output_operand_lossage ("invalid UNSPEC as operand");
14120 break;
14121 }
14122 break;
14123
14124 default:
14125 output_operand_lossage ("invalid expression as operand");
14126 }
14127 }
14128
14129 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
14130 We need to emit DTP-relative relocations. */
14131
14132 static void ATTRIBUTE_UNUSED
14133 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
14134 {
14135 fputs (ASM_LONG, file);
14136 output_addr_const (file, x);
14137 fputs ("@dtpoff", file);
14138 switch (size)
14139 {
14140 case 4:
14141 break;
14142 case 8:
14143 fputs (", 0", file);
14144 break;
14145 default:
14146 gcc_unreachable ();
14147 }
14148 }
14149
14150 /* Return true if X is a representation of the PIC register. This copes
14151 with calls from ix86_find_base_term, where the register might have
14152 been replaced by a cselib value. */
14153
14154 static bool
14155 ix86_pic_register_p (rtx x)
14156 {
14157 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
14158 return (pic_offset_table_rtx
14159 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
14160 else
14161 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
14162 }
14163
14164 /* Helper function for ix86_delegitimize_address.
14165 Attempt to delegitimize TLS local-exec accesses. */
14166
14167 static rtx
14168 ix86_delegitimize_tls_address (rtx orig_x)
14169 {
14170 rtx x = orig_x, unspec;
14171 struct ix86_address addr;
14172
14173 if (!TARGET_TLS_DIRECT_SEG_REFS)
14174 return orig_x;
14175 if (MEM_P (x))
14176 x = XEXP (x, 0);
14177 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
14178 return orig_x;
14179 if (ix86_decompose_address (x, &addr) == 0
14180 || addr.seg != DEFAULT_TLS_SEG_REG
14181 || addr.disp == NULL_RTX
14182 || GET_CODE (addr.disp) != CONST)
14183 return orig_x;
14184 unspec = XEXP (addr.disp, 0);
14185 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
14186 unspec = XEXP (unspec, 0);
14187 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
14188 return orig_x;
14189 x = XVECEXP (unspec, 0, 0);
14190 gcc_assert (GET_CODE (x) == SYMBOL_REF);
14191 if (unspec != XEXP (addr.disp, 0))
14192 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
14193 if (addr.index)
14194 {
14195 rtx idx = addr.index;
14196 if (addr.scale != 1)
14197 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
14198 x = gen_rtx_PLUS (Pmode, idx, x);
14199 }
14200 if (addr.base)
14201 x = gen_rtx_PLUS (Pmode, addr.base, x);
14202 if (MEM_P (orig_x))
14203 x = replace_equiv_address_nv (orig_x, x);
14204 return x;
14205 }
14206
14207 /* In the name of slightly smaller debug output, and to cater to
14208 general assembler lossage, recognize PIC+GOTOFF and turn it back
14209 into a direct symbol reference.
14210
14211 On Darwin, this is necessary to avoid a crash, because Darwin
14212 has a different PIC label for each routine but the DWARF debugging
14213 information is not associated with any particular routine, so it's
14214 necessary to remove references to the PIC label from RTL stored by
14215 the DWARF output code. */
14216
14217 static rtx
14218 ix86_delegitimize_address (rtx x)
14219 {
14220 rtx orig_x = delegitimize_mem_from_attrs (x);
14221 /* addend is NULL or some rtx if x is something+GOTOFF where
14222 something doesn't include the PIC register. */
14223 rtx addend = NULL_RTX;
14224 /* reg_addend is NULL or a multiple of some register. */
14225 rtx reg_addend = NULL_RTX;
14226 /* const_addend is NULL or a const_int. */
14227 rtx const_addend = NULL_RTX;
14228 /* This is the result, or NULL. */
14229 rtx result = NULL_RTX;
14230
14231 x = orig_x;
14232
14233 if (MEM_P (x))
14234 x = XEXP (x, 0);
14235
14236 if (TARGET_64BIT)
14237 {
14238 if (GET_CODE (x) == CONST
14239 && GET_CODE (XEXP (x, 0)) == PLUS
14240 && GET_MODE (XEXP (x, 0)) == Pmode
14241 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
14242 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
14243 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
14244 {
14245 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
14246 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
14247 if (MEM_P (orig_x))
14248 x = replace_equiv_address_nv (orig_x, x);
14249 return x;
14250 }
14251
14252 if (GET_CODE (x) == CONST
14253 && GET_CODE (XEXP (x, 0)) == UNSPEC
14254 && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
14255 || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
14256 && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
14257 {
14258 x = XVECEXP (XEXP (x, 0), 0, 0);
14259 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
14260 {
14261 x = simplify_gen_subreg (GET_MODE (orig_x), x,
14262 GET_MODE (x), 0);
14263 if (x == NULL_RTX)
14264 return orig_x;
14265 }
14266 return x;
14267 }
14268
14269 if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
14270 return ix86_delegitimize_tls_address (orig_x);
14271
14272 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
14273 and -mcmodel=medium -fpic. */
14274 }
14275
14276 if (GET_CODE (x) != PLUS
14277 || GET_CODE (XEXP (x, 1)) != CONST)
14278 return ix86_delegitimize_tls_address (orig_x);
14279
14280 if (ix86_pic_register_p (XEXP (x, 0)))
14281 /* %ebx + GOT/GOTOFF */
14282 ;
14283 else if (GET_CODE (XEXP (x, 0)) == PLUS)
14284 {
14285 /* %ebx + %reg * scale + GOT/GOTOFF */
14286 reg_addend = XEXP (x, 0);
14287 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
14288 reg_addend = XEXP (reg_addend, 1);
14289 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
14290 reg_addend = XEXP (reg_addend, 0);
14291 else
14292 {
14293 reg_addend = NULL_RTX;
14294 addend = XEXP (x, 0);
14295 }
14296 }
14297 else
14298 addend = XEXP (x, 0);
14299
14300 x = XEXP (XEXP (x, 1), 0);
14301 if (GET_CODE (x) == PLUS
14302 && CONST_INT_P (XEXP (x, 1)))
14303 {
14304 const_addend = XEXP (x, 1);
14305 x = XEXP (x, 0);
14306 }
14307
14308 if (GET_CODE (x) == UNSPEC
14309 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
14310 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
14311 || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
14312 && !MEM_P (orig_x) && !addend)))
14313 result = XVECEXP (x, 0, 0);
14314
14315 if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
14316 && !MEM_P (orig_x))
14317 result = XVECEXP (x, 0, 0);
14318
14319 if (! result)
14320 return ix86_delegitimize_tls_address (orig_x);
14321
14322 if (const_addend)
14323 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
14324 if (reg_addend)
14325 result = gen_rtx_PLUS (Pmode, reg_addend, result);
14326 if (addend)
14327 {
14328 /* If the rest of original X doesn't involve the PIC register, add
14329 addend and subtract pic_offset_table_rtx. This can happen e.g.
14330 for code like:
14331 leal (%ebx, %ecx, 4), %ecx
14332 ...
14333 movl foo@GOTOFF(%ecx), %edx
14334 in which case we return (%ecx - %ebx) + foo. */
14335 if (pic_offset_table_rtx)
14336 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
14337 pic_offset_table_rtx),
14338 result);
14339 else
14340 return orig_x;
14341 }
14342 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
14343 {
14344 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
14345 if (result == NULL_RTX)
14346 return orig_x;
14347 }
14348 return result;
14349 }
14350
14351 /* If X is a machine specific address (i.e. a symbol or label being
14352 referenced as a displacement from the GOT implemented using an
14353 UNSPEC), then return the base term. Otherwise return X. */
14354
14355 rtx
14356 ix86_find_base_term (rtx x)
14357 {
14358 rtx term;
14359
14360 if (TARGET_64BIT)
14361 {
14362 if (GET_CODE (x) != CONST)
14363 return x;
14364 term = XEXP (x, 0);
14365 if (GET_CODE (term) == PLUS
14366 && (CONST_INT_P (XEXP (term, 1))
14367 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
14368 term = XEXP (term, 0);
14369 if (GET_CODE (term) != UNSPEC
14370 || (XINT (term, 1) != UNSPEC_GOTPCREL
14371 && XINT (term, 1) != UNSPEC_PCREL))
14372 return x;
14373
14374 return XVECEXP (term, 0, 0);
14375 }
14376
14377 return ix86_delegitimize_address (x);
14378 }
14379 \f
14380 static void
14381 put_condition_code (enum rtx_code code, enum machine_mode mode, bool reverse,
14382 bool fp, FILE *file)
14383 {
14384 const char *suffix;
14385
14386 if (mode == CCFPmode || mode == CCFPUmode)
14387 {
14388 code = ix86_fp_compare_code_to_integer (code);
14389 mode = CCmode;
14390 }
14391 if (reverse)
14392 code = reverse_condition (code);
14393
14394 switch (code)
14395 {
14396 case EQ:
14397 switch (mode)
14398 {
14399 case CCAmode:
14400 suffix = "a";
14401 break;
14402
14403 case CCCmode:
14404 suffix = "c";
14405 break;
14406
14407 case CCOmode:
14408 suffix = "o";
14409 break;
14410
14411 case CCSmode:
14412 suffix = "s";
14413 break;
14414
14415 default:
14416 suffix = "e";
14417 }
14418 break;
14419 case NE:
14420 switch (mode)
14421 {
14422 case CCAmode:
14423 suffix = "na";
14424 break;
14425
14426 case CCCmode:
14427 suffix = "nc";
14428 break;
14429
14430 case CCOmode:
14431 suffix = "no";
14432 break;
14433
14434 case CCSmode:
14435 suffix = "ns";
14436 break;
14437
14438 default:
14439 suffix = "ne";
14440 }
14441 break;
14442 case GT:
14443 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
14444 suffix = "g";
14445 break;
14446 case GTU:
14447 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
14448 Those same assemblers have the same but opposite lossage on cmov. */
14449 if (mode == CCmode)
14450 suffix = fp ? "nbe" : "a";
14451 else
14452 gcc_unreachable ();
14453 break;
14454 case LT:
14455 switch (mode)
14456 {
14457 case CCNOmode:
14458 case CCGOCmode:
14459 suffix = "s";
14460 break;
14461
14462 case CCmode:
14463 case CCGCmode:
14464 suffix = "l";
14465 break;
14466
14467 default:
14468 gcc_unreachable ();
14469 }
14470 break;
14471 case LTU:
14472 if (mode == CCmode)
14473 suffix = "b";
14474 else if (mode == CCCmode)
14475 suffix = "c";
14476 else
14477 gcc_unreachable ();
14478 break;
14479 case GE:
14480 switch (mode)
14481 {
14482 case CCNOmode:
14483 case CCGOCmode:
14484 suffix = "ns";
14485 break;
14486
14487 case CCmode:
14488 case CCGCmode:
14489 suffix = "ge";
14490 break;
14491
14492 default:
14493 gcc_unreachable ();
14494 }
14495 break;
14496 case GEU:
14497 if (mode == CCmode)
14498 suffix = fp ? "nb" : "ae";
14499 else if (mode == CCCmode)
14500 suffix = "nc";
14501 else
14502 gcc_unreachable ();
14503 break;
14504 case LE:
14505 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
14506 suffix = "le";
14507 break;
14508 case LEU:
14509 if (mode == CCmode)
14510 suffix = "be";
14511 else
14512 gcc_unreachable ();
14513 break;
14514 case UNORDERED:
14515 suffix = fp ? "u" : "p";
14516 break;
14517 case ORDERED:
14518 suffix = fp ? "nu" : "np";
14519 break;
14520 default:
14521 gcc_unreachable ();
14522 }
14523 fputs (suffix, file);
14524 }
14525
14526 /* Print the name of register X to FILE based on its machine mode and number.
14527 If CODE is 'w', pretend the mode is HImode.
14528 If CODE is 'b', pretend the mode is QImode.
14529 If CODE is 'k', pretend the mode is SImode.
14530 If CODE is 'q', pretend the mode is DImode.
14531 If CODE is 'x', pretend the mode is V4SFmode.
14532 If CODE is 't', pretend the mode is V8SFmode.
14533 If CODE is 'g', pretend the mode is V16SFmode.
14534 If CODE is 'h', pretend the reg is the 'high' byte register.
14535 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
14536 If CODE is 'd', duplicate the operand for AVX instruction.
14537 */
14538
14539 void
14540 print_reg (rtx x, int code, FILE *file)
14541 {
14542 const char *reg;
14543 unsigned int regno;
14544 bool duplicated = code == 'd' && TARGET_AVX;
14545
14546 if (ASSEMBLER_DIALECT == ASM_ATT)
14547 putc ('%', file);
14548
14549 if (x == pc_rtx)
14550 {
14551 gcc_assert (TARGET_64BIT);
14552 fputs ("rip", file);
14553 return;
14554 }
14555
14556 regno = true_regnum (x);
14557 gcc_assert (regno != ARG_POINTER_REGNUM
14558 && regno != FRAME_POINTER_REGNUM
14559 && regno != FLAGS_REG
14560 && regno != FPSR_REG
14561 && regno != FPCR_REG);
14562
14563 if (code == 'w' || MMX_REG_P (x))
14564 code = 2;
14565 else if (code == 'b')
14566 code = 1;
14567 else if (code == 'k')
14568 code = 4;
14569 else if (code == 'q')
14570 code = 8;
14571 else if (code == 'y')
14572 code = 3;
14573 else if (code == 'h')
14574 code = 0;
14575 else if (code == 'x')
14576 code = 16;
14577 else if (code == 't')
14578 code = 32;
14579 else if (code == 'g')
14580 code = 64;
14581 else
14582 code = GET_MODE_SIZE (GET_MODE (x));
14583
14584 /* Irritatingly, AMD extended registers use different naming convention
14585 from the normal registers: "r%d[bwd]" */
14586 if (REX_INT_REGNO_P (regno))
14587 {
14588 gcc_assert (TARGET_64BIT);
14589 putc ('r', file);
14590 fprint_ul (file, regno - FIRST_REX_INT_REG + 8);
14591 switch (code)
14592 {
14593 case 0:
14594 error ("extended registers have no high halves");
14595 break;
14596 case 1:
14597 putc ('b', file);
14598 break;
14599 case 2:
14600 putc ('w', file);
14601 break;
14602 case 4:
14603 putc ('d', file);
14604 break;
14605 case 8:
14606 /* no suffix */
14607 break;
14608 default:
14609 error ("unsupported operand size for extended register");
14610 break;
14611 }
14612 return;
14613 }
14614
14615 reg = NULL;
14616 switch (code)
14617 {
14618 case 3:
14619 if (STACK_TOP_P (x))
14620 {
14621 reg = "st(0)";
14622 break;
14623 }
14624 /* FALLTHRU */
14625 case 8:
14626 case 4:
14627 case 12:
14628 if (! ANY_FP_REG_P (x))
14629 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
14630 /* FALLTHRU */
14631 case 16:
14632 case 2:
14633 normal:
14634 reg = hi_reg_name[regno];
14635 break;
14636 case 1:
14637 if (regno >= ARRAY_SIZE (qi_reg_name))
14638 goto normal;
14639 reg = qi_reg_name[regno];
14640 break;
14641 case 0:
14642 if (regno >= ARRAY_SIZE (qi_high_reg_name))
14643 goto normal;
14644 reg = qi_high_reg_name[regno];
14645 break;
14646 case 32:
14647 if (SSE_REG_P (x))
14648 {
14649 gcc_assert (!duplicated);
14650 putc ('y', file);
14651 fputs (hi_reg_name[regno] + 1, file);
14652 return;
14653 }
14654 case 64:
14655 if (SSE_REG_P (x))
14656 {
14657 gcc_assert (!duplicated);
14658 putc ('z', file);
14659 fputs (hi_reg_name[REGNO (x)] + 1, file);
14660 return;
14661 }
14662 break;
14663 default:
14664 gcc_unreachable ();
14665 }
14666
14667 fputs (reg, file);
14668 if (duplicated)
14669 {
14670 if (ASSEMBLER_DIALECT == ASM_ATT)
14671 fprintf (file, ", %%%s", reg);
14672 else
14673 fprintf (file, ", %s", reg);
14674 }
14675 }
14676
14677 /* Locate some local-dynamic symbol still in use by this function
14678 so that we can print its name in some tls_local_dynamic_base
14679 pattern. */
14680
14681 static int
14682 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
14683 {
14684 rtx x = *px;
14685
14686 if (GET_CODE (x) == SYMBOL_REF
14687 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
14688 {
14689 cfun->machine->some_ld_name = XSTR (x, 0);
14690 return 1;
14691 }
14692
14693 return 0;
14694 }
14695
14696 static const char *
14697 get_some_local_dynamic_name (void)
14698 {
14699 rtx insn;
14700
14701 if (cfun->machine->some_ld_name)
14702 return cfun->machine->some_ld_name;
14703
14704 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
14705 if (NONDEBUG_INSN_P (insn)
14706 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
14707 return cfun->machine->some_ld_name;
14708
14709 return NULL;
14710 }
14711
14712 /* Meaning of CODE:
14713 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
14714 C -- print opcode suffix for set/cmov insn.
14715 c -- like C, but print reversed condition
14716 F,f -- likewise, but for floating-point.
14717 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
14718 otherwise nothing
14719 R -- print embeded rounding and sae.
14720 r -- print only sae.
14721 z -- print the opcode suffix for the size of the current operand.
14722 Z -- likewise, with special suffixes for x87 instructions.
14723 * -- print a star (in certain assembler syntax)
14724 A -- print an absolute memory reference.
14725 E -- print address with DImode register names if TARGET_64BIT.
14726 w -- print the operand as if it's a "word" (HImode) even if it isn't.
14727 s -- print a shift double count, followed by the assemblers argument
14728 delimiter.
14729 b -- print the QImode name of the register for the indicated operand.
14730 %b0 would print %al if operands[0] is reg 0.
14731 w -- likewise, print the HImode name of the register.
14732 k -- likewise, print the SImode name of the register.
14733 q -- likewise, print the DImode name of the register.
14734 x -- likewise, print the V4SFmode name of the register.
14735 t -- likewise, print the V8SFmode name of the register.
14736 g -- likewise, print the V16SFmode name of the register.
14737 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
14738 y -- print "st(0)" instead of "st" as a register.
14739 d -- print duplicated register operand for AVX instruction.
14740 D -- print condition for SSE cmp instruction.
14741 P -- if PIC, print an @PLT suffix.
14742 p -- print raw symbol name.
14743 X -- don't print any sort of PIC '@' suffix for a symbol.
14744 & -- print some in-use local-dynamic symbol name.
14745 H -- print a memory address offset by 8; used for sse high-parts
14746 Y -- print condition for XOP pcom* instruction.
14747 + -- print a branch hint as 'cs' or 'ds' prefix
14748 ; -- print a semicolon (after prefixes due to bug in older gas).
14749 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
14750 @ -- print a segment register of thread base pointer load
14751 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
14752 */
14753
14754 void
14755 ix86_print_operand (FILE *file, rtx x, int code)
14756 {
14757 if (code)
14758 {
14759 switch (code)
14760 {
14761 case 'A':
14762 switch (ASSEMBLER_DIALECT)
14763 {
14764 case ASM_ATT:
14765 putc ('*', file);
14766 break;
14767
14768 case ASM_INTEL:
14769 /* Intel syntax. For absolute addresses, registers should not
14770 be surrounded by braces. */
14771 if (!REG_P (x))
14772 {
14773 putc ('[', file);
14774 ix86_print_operand (file, x, 0);
14775 putc (']', file);
14776 return;
14777 }
14778 break;
14779
14780 default:
14781 gcc_unreachable ();
14782 }
14783
14784 ix86_print_operand (file, x, 0);
14785 return;
14786
14787 case 'E':
14788 /* Wrap address in an UNSPEC to declare special handling. */
14789 if (TARGET_64BIT)
14790 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
14791
14792 output_address (x);
14793 return;
14794
14795 case 'L':
14796 if (ASSEMBLER_DIALECT == ASM_ATT)
14797 putc ('l', file);
14798 return;
14799
14800 case 'W':
14801 if (ASSEMBLER_DIALECT == ASM_ATT)
14802 putc ('w', file);
14803 return;
14804
14805 case 'B':
14806 if (ASSEMBLER_DIALECT == ASM_ATT)
14807 putc ('b', file);
14808 return;
14809
14810 case 'Q':
14811 if (ASSEMBLER_DIALECT == ASM_ATT)
14812 putc ('l', file);
14813 return;
14814
14815 case 'S':
14816 if (ASSEMBLER_DIALECT == ASM_ATT)
14817 putc ('s', file);
14818 return;
14819
14820 case 'T':
14821 if (ASSEMBLER_DIALECT == ASM_ATT)
14822 putc ('t', file);
14823 return;
14824
14825 case 'O':
14826 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14827 if (ASSEMBLER_DIALECT != ASM_ATT)
14828 return;
14829
14830 switch (GET_MODE_SIZE (GET_MODE (x)))
14831 {
14832 case 2:
14833 putc ('w', file);
14834 break;
14835
14836 case 4:
14837 putc ('l', file);
14838 break;
14839
14840 case 8:
14841 putc ('q', file);
14842 break;
14843
14844 default:
14845 output_operand_lossage
14846 ("invalid operand size for operand code 'O'");
14847 return;
14848 }
14849
14850 putc ('.', file);
14851 #endif
14852 return;
14853
14854 case 'z':
14855 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
14856 {
14857 /* Opcodes don't get size suffixes if using Intel opcodes. */
14858 if (ASSEMBLER_DIALECT == ASM_INTEL)
14859 return;
14860
14861 switch (GET_MODE_SIZE (GET_MODE (x)))
14862 {
14863 case 1:
14864 putc ('b', file);
14865 return;
14866
14867 case 2:
14868 putc ('w', file);
14869 return;
14870
14871 case 4:
14872 putc ('l', file);
14873 return;
14874
14875 case 8:
14876 putc ('q', file);
14877 return;
14878
14879 default:
14880 output_operand_lossage
14881 ("invalid operand size for operand code 'z'");
14882 return;
14883 }
14884 }
14885
14886 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14887 warning
14888 (0, "non-integer operand used with operand code 'z'");
14889 /* FALLTHRU */
14890
14891 case 'Z':
14892 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
14893 if (ASSEMBLER_DIALECT == ASM_INTEL)
14894 return;
14895
14896 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
14897 {
14898 switch (GET_MODE_SIZE (GET_MODE (x)))
14899 {
14900 case 2:
14901 #ifdef HAVE_AS_IX86_FILDS
14902 putc ('s', file);
14903 #endif
14904 return;
14905
14906 case 4:
14907 putc ('l', file);
14908 return;
14909
14910 case 8:
14911 #ifdef HAVE_AS_IX86_FILDQ
14912 putc ('q', file);
14913 #else
14914 fputs ("ll", file);
14915 #endif
14916 return;
14917
14918 default:
14919 break;
14920 }
14921 }
14922 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14923 {
14924 /* 387 opcodes don't get size suffixes
14925 if the operands are registers. */
14926 if (STACK_REG_P (x))
14927 return;
14928
14929 switch (GET_MODE_SIZE (GET_MODE (x)))
14930 {
14931 case 4:
14932 putc ('s', file);
14933 return;
14934
14935 case 8:
14936 putc ('l', file);
14937 return;
14938
14939 case 12:
14940 case 16:
14941 putc ('t', file);
14942 return;
14943
14944 default:
14945 break;
14946 }
14947 }
14948 else
14949 {
14950 output_operand_lossage
14951 ("invalid operand type used with operand code 'Z'");
14952 return;
14953 }
14954
14955 output_operand_lossage
14956 ("invalid operand size for operand code 'Z'");
14957 return;
14958
14959 case 'd':
14960 case 'b':
14961 case 'w':
14962 case 'k':
14963 case 'q':
14964 case 'h':
14965 case 't':
14966 case 'g':
14967 case 'y':
14968 case 'x':
14969 case 'X':
14970 case 'P':
14971 case 'p':
14972 break;
14973
14974 case 's':
14975 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
14976 {
14977 ix86_print_operand (file, x, 0);
14978 fputs (", ", file);
14979 }
14980 return;
14981
14982 case 'Y':
14983 switch (GET_CODE (x))
14984 {
14985 case NE:
14986 fputs ("neq", file);
14987 break;
14988 case EQ:
14989 fputs ("eq", file);
14990 break;
14991 case GE:
14992 case GEU:
14993 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
14994 break;
14995 case GT:
14996 case GTU:
14997 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
14998 break;
14999 case LE:
15000 case LEU:
15001 fputs ("le", file);
15002 break;
15003 case LT:
15004 case LTU:
15005 fputs ("lt", file);
15006 break;
15007 case UNORDERED:
15008 fputs ("unord", file);
15009 break;
15010 case ORDERED:
15011 fputs ("ord", file);
15012 break;
15013 case UNEQ:
15014 fputs ("ueq", file);
15015 break;
15016 case UNGE:
15017 fputs ("nlt", file);
15018 break;
15019 case UNGT:
15020 fputs ("nle", file);
15021 break;
15022 case UNLE:
15023 fputs ("ule", file);
15024 break;
15025 case UNLT:
15026 fputs ("ult", file);
15027 break;
15028 case LTGT:
15029 fputs ("une", file);
15030 break;
15031 default:
15032 output_operand_lossage ("operand is not a condition code, "
15033 "invalid operand code 'Y'");
15034 return;
15035 }
15036 return;
15037
15038 case 'D':
15039 /* Little bit of braindamage here. The SSE compare instructions
15040 does use completely different names for the comparisons that the
15041 fp conditional moves. */
15042 switch (GET_CODE (x))
15043 {
15044 case UNEQ:
15045 if (TARGET_AVX)
15046 {
15047 fputs ("eq_us", file);
15048 break;
15049 }
15050 case EQ:
15051 fputs ("eq", file);
15052 break;
15053 case UNLT:
15054 if (TARGET_AVX)
15055 {
15056 fputs ("nge", file);
15057 break;
15058 }
15059 case LT:
15060 fputs ("lt", file);
15061 break;
15062 case UNLE:
15063 if (TARGET_AVX)
15064 {
15065 fputs ("ngt", file);
15066 break;
15067 }
15068 case LE:
15069 fputs ("le", file);
15070 break;
15071 case UNORDERED:
15072 fputs ("unord", file);
15073 break;
15074 case LTGT:
15075 if (TARGET_AVX)
15076 {
15077 fputs ("neq_oq", file);
15078 break;
15079 }
15080 case NE:
15081 fputs ("neq", file);
15082 break;
15083 case GE:
15084 if (TARGET_AVX)
15085 {
15086 fputs ("ge", file);
15087 break;
15088 }
15089 case UNGE:
15090 fputs ("nlt", file);
15091 break;
15092 case GT:
15093 if (TARGET_AVX)
15094 {
15095 fputs ("gt", file);
15096 break;
15097 }
15098 case UNGT:
15099 fputs ("nle", file);
15100 break;
15101 case ORDERED:
15102 fputs ("ord", file);
15103 break;
15104 default:
15105 output_operand_lossage ("operand is not a condition code, "
15106 "invalid operand code 'D'");
15107 return;
15108 }
15109 return;
15110
15111 case 'F':
15112 case 'f':
15113 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15114 if (ASSEMBLER_DIALECT == ASM_ATT)
15115 putc ('.', file);
15116 #endif
15117
15118 case 'C':
15119 case 'c':
15120 if (!COMPARISON_P (x))
15121 {
15122 output_operand_lossage ("operand is not a condition code, "
15123 "invalid operand code '%c'", code);
15124 return;
15125 }
15126 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
15127 code == 'c' || code == 'f',
15128 code == 'F' || code == 'f',
15129 file);
15130 return;
15131
15132 case 'H':
15133 if (!offsettable_memref_p (x))
15134 {
15135 output_operand_lossage ("operand is not an offsettable memory "
15136 "reference, invalid operand code 'H'");
15137 return;
15138 }
15139 /* It doesn't actually matter what mode we use here, as we're
15140 only going to use this for printing. */
15141 x = adjust_address_nv (x, DImode, 8);
15142 /* Output 'qword ptr' for intel assembler dialect. */
15143 if (ASSEMBLER_DIALECT == ASM_INTEL)
15144 code = 'q';
15145 break;
15146
15147 case 'K':
15148 gcc_assert (CONST_INT_P (x));
15149
15150 if (INTVAL (x) & IX86_HLE_ACQUIRE)
15151 #ifdef HAVE_AS_IX86_HLE
15152 fputs ("xacquire ", file);
15153 #else
15154 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
15155 #endif
15156 else if (INTVAL (x) & IX86_HLE_RELEASE)
15157 #ifdef HAVE_AS_IX86_HLE
15158 fputs ("xrelease ", file);
15159 #else
15160 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
15161 #endif
15162 /* We do not want to print value of the operand. */
15163 return;
15164
15165 case 'N':
15166 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
15167 fputs ("{z}", file);
15168 return;
15169
15170 case 'r':
15171 gcc_assert (CONST_INT_P (x));
15172 gcc_assert (INTVAL (x) == ROUND_SAE);
15173
15174 if (ASSEMBLER_DIALECT == ASM_INTEL)
15175 fputs (", ", file);
15176
15177 fputs ("{sae}", file);
15178
15179 if (ASSEMBLER_DIALECT == ASM_ATT)
15180 fputs (", ", file);
15181
15182 return;
15183
15184 case 'R':
15185 gcc_assert (CONST_INT_P (x));
15186
15187 if (ASSEMBLER_DIALECT == ASM_INTEL)
15188 fputs (", ", file);
15189
15190 switch (INTVAL (x))
15191 {
15192 case ROUND_NEAREST_INT | ROUND_SAE:
15193 fputs ("{rn-sae}", file);
15194 break;
15195 case ROUND_NEG_INF | ROUND_SAE:
15196 fputs ("{rd-sae}", file);
15197 break;
15198 case ROUND_POS_INF | ROUND_SAE:
15199 fputs ("{ru-sae}", file);
15200 break;
15201 case ROUND_ZERO | ROUND_SAE:
15202 fputs ("{rz-sae}", file);
15203 break;
15204 default:
15205 gcc_unreachable ();
15206 }
15207
15208 if (ASSEMBLER_DIALECT == ASM_ATT)
15209 fputs (", ", file);
15210
15211 return;
15212
15213 case '*':
15214 if (ASSEMBLER_DIALECT == ASM_ATT)
15215 putc ('*', file);
15216 return;
15217
15218 case '&':
15219 {
15220 const char *name = get_some_local_dynamic_name ();
15221 if (name == NULL)
15222 output_operand_lossage ("'%%&' used without any "
15223 "local dynamic TLS references");
15224 else
15225 assemble_name (file, name);
15226 return;
15227 }
15228
15229 case '+':
15230 {
15231 rtx x;
15232
15233 if (!optimize
15234 || optimize_function_for_size_p (cfun)
15235 || !TARGET_BRANCH_PREDICTION_HINTS)
15236 return;
15237
15238 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
15239 if (x)
15240 {
15241 int pred_val = XINT (x, 0);
15242
15243 if (pred_val < REG_BR_PROB_BASE * 45 / 100
15244 || pred_val > REG_BR_PROB_BASE * 55 / 100)
15245 {
15246 bool taken = pred_val > REG_BR_PROB_BASE / 2;
15247 bool cputaken
15248 = final_forward_branch_p (current_output_insn) == 0;
15249
15250 /* Emit hints only in the case default branch prediction
15251 heuristics would fail. */
15252 if (taken != cputaken)
15253 {
15254 /* We use 3e (DS) prefix for taken branches and
15255 2e (CS) prefix for not taken branches. */
15256 if (taken)
15257 fputs ("ds ; ", file);
15258 else
15259 fputs ("cs ; ", file);
15260 }
15261 }
15262 }
15263 return;
15264 }
15265
15266 case ';':
15267 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
15268 putc (';', file);
15269 #endif
15270 return;
15271
15272 case '@':
15273 if (ASSEMBLER_DIALECT == ASM_ATT)
15274 putc ('%', file);
15275
15276 /* The kernel uses a different segment register for performance
15277 reasons; a system call would not have to trash the userspace
15278 segment register, which would be expensive. */
15279 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
15280 fputs ("fs", file);
15281 else
15282 fputs ("gs", file);
15283 return;
15284
15285 case '~':
15286 putc (TARGET_AVX2 ? 'i' : 'f', file);
15287 return;
15288
15289 case '^':
15290 if (TARGET_64BIT && Pmode != word_mode)
15291 fputs ("addr32 ", file);
15292 return;
15293
15294 default:
15295 output_operand_lossage ("invalid operand code '%c'", code);
15296 }
15297 }
15298
15299 if (REG_P (x))
15300 print_reg (x, code, file);
15301
15302 else if (MEM_P (x))
15303 {
15304 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
15305 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
15306 && GET_MODE (x) != BLKmode)
15307 {
15308 const char * size;
15309 switch (GET_MODE_SIZE (GET_MODE (x)))
15310 {
15311 case 1: size = "BYTE"; break;
15312 case 2: size = "WORD"; break;
15313 case 4: size = "DWORD"; break;
15314 case 8: size = "QWORD"; break;
15315 case 12: size = "TBYTE"; break;
15316 case 16:
15317 if (GET_MODE (x) == XFmode)
15318 size = "TBYTE";
15319 else
15320 size = "XMMWORD";
15321 break;
15322 case 32: size = "YMMWORD"; break;
15323 case 64: size = "ZMMWORD"; break;
15324 default:
15325 gcc_unreachable ();
15326 }
15327
15328 /* Check for explicit size override (codes 'b', 'w', 'k',
15329 'q' and 'x') */
15330 if (code == 'b')
15331 size = "BYTE";
15332 else if (code == 'w')
15333 size = "WORD";
15334 else if (code == 'k')
15335 size = "DWORD";
15336 else if (code == 'q')
15337 size = "QWORD";
15338 else if (code == 'x')
15339 size = "XMMWORD";
15340
15341 fputs (size, file);
15342 fputs (" PTR ", file);
15343 }
15344
15345 x = XEXP (x, 0);
15346 /* Avoid (%rip) for call operands. */
15347 if (CONSTANT_ADDRESS_P (x) && code == 'P'
15348 && !CONST_INT_P (x))
15349 output_addr_const (file, x);
15350 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
15351 output_operand_lossage ("invalid constraints for operand");
15352 else
15353 output_address (x);
15354 }
15355
15356 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
15357 {
15358 REAL_VALUE_TYPE r;
15359 long l;
15360
15361 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15362 REAL_VALUE_TO_TARGET_SINGLE (r, l);
15363
15364 if (ASSEMBLER_DIALECT == ASM_ATT)
15365 putc ('$', file);
15366 /* Sign extend 32bit SFmode immediate to 8 bytes. */
15367 if (code == 'q')
15368 fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
15369 (unsigned long long) (int) l);
15370 else
15371 fprintf (file, "0x%08x", (unsigned int) l);
15372 }
15373
15374 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
15375 {
15376 REAL_VALUE_TYPE r;
15377 long l[2];
15378
15379 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15380 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
15381
15382 if (ASSEMBLER_DIALECT == ASM_ATT)
15383 putc ('$', file);
15384 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
15385 }
15386
15387 /* These float cases don't actually occur as immediate operands. */
15388 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
15389 {
15390 char dstr[30];
15391
15392 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
15393 fputs (dstr, file);
15394 }
15395
15396 else
15397 {
15398 /* We have patterns that allow zero sets of memory, for instance.
15399 In 64-bit mode, we should probably support all 8-byte vectors,
15400 since we can in fact encode that into an immediate. */
15401 if (GET_CODE (x) == CONST_VECTOR)
15402 {
15403 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
15404 x = const0_rtx;
15405 }
15406
15407 if (code != 'P' && code != 'p')
15408 {
15409 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
15410 {
15411 if (ASSEMBLER_DIALECT == ASM_ATT)
15412 putc ('$', file);
15413 }
15414 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
15415 || GET_CODE (x) == LABEL_REF)
15416 {
15417 if (ASSEMBLER_DIALECT == ASM_ATT)
15418 putc ('$', file);
15419 else
15420 fputs ("OFFSET FLAT:", file);
15421 }
15422 }
15423 if (CONST_INT_P (x))
15424 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
15425 else if (flag_pic || MACHOPIC_INDIRECT)
15426 output_pic_addr_const (file, x, code);
15427 else
15428 output_addr_const (file, x);
15429 }
15430 }
15431
15432 static bool
15433 ix86_print_operand_punct_valid_p (unsigned char code)
15434 {
15435 return (code == '@' || code == '*' || code == '+' || code == '&'
15436 || code == ';' || code == '~' || code == '^');
15437 }
15438 \f
15439 /* Print a memory operand whose address is ADDR. */
15440
15441 static void
15442 ix86_print_operand_address (FILE *file, rtx addr)
15443 {
15444 struct ix86_address parts;
15445 rtx base, index, disp;
15446 int scale;
15447 int ok;
15448 bool vsib = false;
15449 int code = 0;
15450
15451 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
15452 {
15453 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15454 gcc_assert (parts.index == NULL_RTX);
15455 parts.index = XVECEXP (addr, 0, 1);
15456 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
15457 addr = XVECEXP (addr, 0, 0);
15458 vsib = true;
15459 }
15460 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
15461 {
15462 gcc_assert (TARGET_64BIT);
15463 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15464 code = 'q';
15465 }
15466 else
15467 ok = ix86_decompose_address (addr, &parts);
15468
15469 gcc_assert (ok);
15470
15471 base = parts.base;
15472 index = parts.index;
15473 disp = parts.disp;
15474 scale = parts.scale;
15475
15476 switch (parts.seg)
15477 {
15478 case SEG_DEFAULT:
15479 break;
15480 case SEG_FS:
15481 case SEG_GS:
15482 if (ASSEMBLER_DIALECT == ASM_ATT)
15483 putc ('%', file);
15484 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
15485 break;
15486 default:
15487 gcc_unreachable ();
15488 }
15489
15490 /* Use one byte shorter RIP relative addressing for 64bit mode. */
15491 if (TARGET_64BIT && !base && !index)
15492 {
15493 rtx symbol = disp;
15494
15495 if (GET_CODE (disp) == CONST
15496 && GET_CODE (XEXP (disp, 0)) == PLUS
15497 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
15498 symbol = XEXP (XEXP (disp, 0), 0);
15499
15500 if (GET_CODE (symbol) == LABEL_REF
15501 || (GET_CODE (symbol) == SYMBOL_REF
15502 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
15503 base = pc_rtx;
15504 }
15505 if (!base && !index)
15506 {
15507 /* Displacement only requires special attention. */
15508
15509 if (CONST_INT_P (disp))
15510 {
15511 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
15512 fputs ("ds:", file);
15513 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
15514 }
15515 else if (flag_pic)
15516 output_pic_addr_const (file, disp, 0);
15517 else
15518 output_addr_const (file, disp);
15519 }
15520 else
15521 {
15522 /* Print SImode register names to force addr32 prefix. */
15523 if (SImode_address_operand (addr, VOIDmode))
15524 {
15525 #ifdef ENABLE_CHECKING
15526 gcc_assert (TARGET_64BIT);
15527 switch (GET_CODE (addr))
15528 {
15529 case SUBREG:
15530 gcc_assert (GET_MODE (addr) == SImode);
15531 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
15532 break;
15533 case ZERO_EXTEND:
15534 case AND:
15535 gcc_assert (GET_MODE (addr) == DImode);
15536 break;
15537 default:
15538 gcc_unreachable ();
15539 }
15540 #endif
15541 gcc_assert (!code);
15542 code = 'k';
15543 }
15544 else if (code == 0
15545 && TARGET_X32
15546 && disp
15547 && CONST_INT_P (disp)
15548 && INTVAL (disp) < -16*1024*1024)
15549 {
15550 /* X32 runs in 64-bit mode, where displacement, DISP, in
15551 address DISP(%r64), is encoded as 32-bit immediate sign-
15552 extended from 32-bit to 64-bit. For -0x40000300(%r64),
15553 address is %r64 + 0xffffffffbffffd00. When %r64 <
15554 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
15555 which is invalid for x32. The correct address is %r64
15556 - 0x40000300 == 0xf7ffdd64. To properly encode
15557 -0x40000300(%r64) for x32, we zero-extend negative
15558 displacement by forcing addr32 prefix which truncates
15559 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
15560 zero-extend all negative displacements, including -1(%rsp).
15561 However, for small negative displacements, sign-extension
15562 won't cause overflow. We only zero-extend negative
15563 displacements if they < -16*1024*1024, which is also used
15564 to check legitimate address displacements for PIC. */
15565 code = 'k';
15566 }
15567
15568 if (ASSEMBLER_DIALECT == ASM_ATT)
15569 {
15570 if (disp)
15571 {
15572 if (flag_pic)
15573 output_pic_addr_const (file, disp, 0);
15574 else if (GET_CODE (disp) == LABEL_REF)
15575 output_asm_label (disp);
15576 else
15577 output_addr_const (file, disp);
15578 }
15579
15580 putc ('(', file);
15581 if (base)
15582 print_reg (base, code, file);
15583 if (index)
15584 {
15585 putc (',', file);
15586 print_reg (index, vsib ? 0 : code, file);
15587 if (scale != 1 || vsib)
15588 fprintf (file, ",%d", scale);
15589 }
15590 putc (')', file);
15591 }
15592 else
15593 {
15594 rtx offset = NULL_RTX;
15595
15596 if (disp)
15597 {
15598 /* Pull out the offset of a symbol; print any symbol itself. */
15599 if (GET_CODE (disp) == CONST
15600 && GET_CODE (XEXP (disp, 0)) == PLUS
15601 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
15602 {
15603 offset = XEXP (XEXP (disp, 0), 1);
15604 disp = gen_rtx_CONST (VOIDmode,
15605 XEXP (XEXP (disp, 0), 0));
15606 }
15607
15608 if (flag_pic)
15609 output_pic_addr_const (file, disp, 0);
15610 else if (GET_CODE (disp) == LABEL_REF)
15611 output_asm_label (disp);
15612 else if (CONST_INT_P (disp))
15613 offset = disp;
15614 else
15615 output_addr_const (file, disp);
15616 }
15617
15618 putc ('[', file);
15619 if (base)
15620 {
15621 print_reg (base, code, file);
15622 if (offset)
15623 {
15624 if (INTVAL (offset) >= 0)
15625 putc ('+', file);
15626 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
15627 }
15628 }
15629 else if (offset)
15630 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
15631 else
15632 putc ('0', file);
15633
15634 if (index)
15635 {
15636 putc ('+', file);
15637 print_reg (index, vsib ? 0 : code, file);
15638 if (scale != 1 || vsib)
15639 fprintf (file, "*%d", scale);
15640 }
15641 putc (']', file);
15642 }
15643 }
15644 }
15645
15646 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
15647
15648 static bool
15649 i386_asm_output_addr_const_extra (FILE *file, rtx x)
15650 {
15651 rtx op;
15652
15653 if (GET_CODE (x) != UNSPEC)
15654 return false;
15655
15656 op = XVECEXP (x, 0, 0);
15657 switch (XINT (x, 1))
15658 {
15659 case UNSPEC_GOTTPOFF:
15660 output_addr_const (file, op);
15661 /* FIXME: This might be @TPOFF in Sun ld. */
15662 fputs ("@gottpoff", file);
15663 break;
15664 case UNSPEC_TPOFF:
15665 output_addr_const (file, op);
15666 fputs ("@tpoff", file);
15667 break;
15668 case UNSPEC_NTPOFF:
15669 output_addr_const (file, op);
15670 if (TARGET_64BIT)
15671 fputs ("@tpoff", file);
15672 else
15673 fputs ("@ntpoff", file);
15674 break;
15675 case UNSPEC_DTPOFF:
15676 output_addr_const (file, op);
15677 fputs ("@dtpoff", file);
15678 break;
15679 case UNSPEC_GOTNTPOFF:
15680 output_addr_const (file, op);
15681 if (TARGET_64BIT)
15682 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
15683 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
15684 else
15685 fputs ("@gotntpoff", file);
15686 break;
15687 case UNSPEC_INDNTPOFF:
15688 output_addr_const (file, op);
15689 fputs ("@indntpoff", file);
15690 break;
15691 #if TARGET_MACHO
15692 case UNSPEC_MACHOPIC_OFFSET:
15693 output_addr_const (file, op);
15694 putc ('-', file);
15695 machopic_output_function_base_name (file);
15696 break;
15697 #endif
15698
15699 case UNSPEC_STACK_CHECK:
15700 {
15701 int offset;
15702
15703 gcc_assert (flag_split_stack);
15704
15705 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
15706 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
15707 #else
15708 gcc_unreachable ();
15709 #endif
15710
15711 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
15712 }
15713 break;
15714
15715 default:
15716 return false;
15717 }
15718
15719 return true;
15720 }
15721 \f
15722 /* Split one or more double-mode RTL references into pairs of half-mode
15723 references. The RTL can be REG, offsettable MEM, integer constant, or
15724 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
15725 split and "num" is its length. lo_half and hi_half are output arrays
15726 that parallel "operands". */
15727
15728 void
15729 split_double_mode (enum machine_mode mode, rtx operands[],
15730 int num, rtx lo_half[], rtx hi_half[])
15731 {
15732 enum machine_mode half_mode;
15733 unsigned int byte;
15734
15735 switch (mode)
15736 {
15737 case TImode:
15738 half_mode = DImode;
15739 break;
15740 case DImode:
15741 half_mode = SImode;
15742 break;
15743 default:
15744 gcc_unreachable ();
15745 }
15746
15747 byte = GET_MODE_SIZE (half_mode);
15748
15749 while (num--)
15750 {
15751 rtx op = operands[num];
15752
15753 /* simplify_subreg refuse to split volatile memory addresses,
15754 but we still have to handle it. */
15755 if (MEM_P (op))
15756 {
15757 lo_half[num] = adjust_address (op, half_mode, 0);
15758 hi_half[num] = adjust_address (op, half_mode, byte);
15759 }
15760 else
15761 {
15762 lo_half[num] = simplify_gen_subreg (half_mode, op,
15763 GET_MODE (op) == VOIDmode
15764 ? mode : GET_MODE (op), 0);
15765 hi_half[num] = simplify_gen_subreg (half_mode, op,
15766 GET_MODE (op) == VOIDmode
15767 ? mode : GET_MODE (op), byte);
15768 }
15769 }
15770 }
15771 \f
15772 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
15773 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
15774 is the expression of the binary operation. The output may either be
15775 emitted here, or returned to the caller, like all output_* functions.
15776
15777 There is no guarantee that the operands are the same mode, as they
15778 might be within FLOAT or FLOAT_EXTEND expressions. */
15779
15780 #ifndef SYSV386_COMPAT
15781 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
15782 wants to fix the assemblers because that causes incompatibility
15783 with gcc. No-one wants to fix gcc because that causes
15784 incompatibility with assemblers... You can use the option of
15785 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
15786 #define SYSV386_COMPAT 1
15787 #endif
15788
15789 const char *
15790 output_387_binary_op (rtx insn, rtx *operands)
15791 {
15792 static char buf[40];
15793 const char *p;
15794 const char *ssep;
15795 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
15796
15797 #ifdef ENABLE_CHECKING
15798 /* Even if we do not want to check the inputs, this documents input
15799 constraints. Which helps in understanding the following code. */
15800 if (STACK_REG_P (operands[0])
15801 && ((REG_P (operands[1])
15802 && REGNO (operands[0]) == REGNO (operands[1])
15803 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
15804 || (REG_P (operands[2])
15805 && REGNO (operands[0]) == REGNO (operands[2])
15806 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
15807 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
15808 ; /* ok */
15809 else
15810 gcc_assert (is_sse);
15811 #endif
15812
15813 switch (GET_CODE (operands[3]))
15814 {
15815 case PLUS:
15816 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15817 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15818 p = "fiadd";
15819 else
15820 p = "fadd";
15821 ssep = "vadd";
15822 break;
15823
15824 case MINUS:
15825 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15826 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15827 p = "fisub";
15828 else
15829 p = "fsub";
15830 ssep = "vsub";
15831 break;
15832
15833 case MULT:
15834 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15835 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15836 p = "fimul";
15837 else
15838 p = "fmul";
15839 ssep = "vmul";
15840 break;
15841
15842 case DIV:
15843 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15844 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15845 p = "fidiv";
15846 else
15847 p = "fdiv";
15848 ssep = "vdiv";
15849 break;
15850
15851 default:
15852 gcc_unreachable ();
15853 }
15854
15855 if (is_sse)
15856 {
15857 if (TARGET_AVX)
15858 {
15859 strcpy (buf, ssep);
15860 if (GET_MODE (operands[0]) == SFmode)
15861 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
15862 else
15863 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
15864 }
15865 else
15866 {
15867 strcpy (buf, ssep + 1);
15868 if (GET_MODE (operands[0]) == SFmode)
15869 strcat (buf, "ss\t{%2, %0|%0, %2}");
15870 else
15871 strcat (buf, "sd\t{%2, %0|%0, %2}");
15872 }
15873 return buf;
15874 }
15875 strcpy (buf, p);
15876
15877 switch (GET_CODE (operands[3]))
15878 {
15879 case MULT:
15880 case PLUS:
15881 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
15882 {
15883 rtx temp = operands[2];
15884 operands[2] = operands[1];
15885 operands[1] = temp;
15886 }
15887
15888 /* know operands[0] == operands[1]. */
15889
15890 if (MEM_P (operands[2]))
15891 {
15892 p = "%Z2\t%2";
15893 break;
15894 }
15895
15896 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
15897 {
15898 if (STACK_TOP_P (operands[0]))
15899 /* How is it that we are storing to a dead operand[2]?
15900 Well, presumably operands[1] is dead too. We can't
15901 store the result to st(0) as st(0) gets popped on this
15902 instruction. Instead store to operands[2] (which I
15903 think has to be st(1)). st(1) will be popped later.
15904 gcc <= 2.8.1 didn't have this check and generated
15905 assembly code that the Unixware assembler rejected. */
15906 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15907 else
15908 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15909 break;
15910 }
15911
15912 if (STACK_TOP_P (operands[0]))
15913 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15914 else
15915 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15916 break;
15917
15918 case MINUS:
15919 case DIV:
15920 if (MEM_P (operands[1]))
15921 {
15922 p = "r%Z1\t%1";
15923 break;
15924 }
15925
15926 if (MEM_P (operands[2]))
15927 {
15928 p = "%Z2\t%2";
15929 break;
15930 }
15931
15932 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
15933 {
15934 #if SYSV386_COMPAT
15935 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
15936 derived assemblers, confusingly reverse the direction of
15937 the operation for fsub{r} and fdiv{r} when the
15938 destination register is not st(0). The Intel assembler
15939 doesn't have this brain damage. Read !SYSV386_COMPAT to
15940 figure out what the hardware really does. */
15941 if (STACK_TOP_P (operands[0]))
15942 p = "{p\t%0, %2|rp\t%2, %0}";
15943 else
15944 p = "{rp\t%2, %0|p\t%0, %2}";
15945 #else
15946 if (STACK_TOP_P (operands[0]))
15947 /* As above for fmul/fadd, we can't store to st(0). */
15948 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15949 else
15950 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15951 #endif
15952 break;
15953 }
15954
15955 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
15956 {
15957 #if SYSV386_COMPAT
15958 if (STACK_TOP_P (operands[0]))
15959 p = "{rp\t%0, %1|p\t%1, %0}";
15960 else
15961 p = "{p\t%1, %0|rp\t%0, %1}";
15962 #else
15963 if (STACK_TOP_P (operands[0]))
15964 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
15965 else
15966 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
15967 #endif
15968 break;
15969 }
15970
15971 if (STACK_TOP_P (operands[0]))
15972 {
15973 if (STACK_TOP_P (operands[1]))
15974 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15975 else
15976 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
15977 break;
15978 }
15979 else if (STACK_TOP_P (operands[1]))
15980 {
15981 #if SYSV386_COMPAT
15982 p = "{\t%1, %0|r\t%0, %1}";
15983 #else
15984 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
15985 #endif
15986 }
15987 else
15988 {
15989 #if SYSV386_COMPAT
15990 p = "{r\t%2, %0|\t%0, %2}";
15991 #else
15992 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15993 #endif
15994 }
15995 break;
15996
15997 default:
15998 gcc_unreachable ();
15999 }
16000
16001 strcat (buf, p);
16002 return buf;
16003 }
16004
16005 /* Check if a 256bit AVX register is referenced inside of EXP. */
16006
16007 static int
16008 ix86_check_avx256_register (rtx *pexp, void *data ATTRIBUTE_UNUSED)
16009 {
16010 rtx exp = *pexp;
16011
16012 if (GET_CODE (exp) == SUBREG)
16013 exp = SUBREG_REG (exp);
16014
16015 if (REG_P (exp)
16016 && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp)))
16017 return 1;
16018
16019 return 0;
16020 }
16021
16022 /* Return needed mode for entity in optimize_mode_switching pass. */
16023
16024 static int
16025 ix86_avx_u128_mode_needed (rtx insn)
16026 {
16027 if (CALL_P (insn))
16028 {
16029 rtx link;
16030
16031 /* Needed mode is set to AVX_U128_CLEAN if there are
16032 no 256bit modes used in function arguments. */
16033 for (link = CALL_INSN_FUNCTION_USAGE (insn);
16034 link;
16035 link = XEXP (link, 1))
16036 {
16037 if (GET_CODE (XEXP (link, 0)) == USE)
16038 {
16039 rtx arg = XEXP (XEXP (link, 0), 0);
16040
16041 if (ix86_check_avx256_register (&arg, NULL))
16042 return AVX_U128_DIRTY;
16043 }
16044 }
16045
16046 return AVX_U128_CLEAN;
16047 }
16048
16049 /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware
16050 changes state only when a 256bit register is written to, but we need
16051 to prevent the compiler from moving optimal insertion point above
16052 eventual read from 256bit register. */
16053 if (for_each_rtx (&PATTERN (insn), ix86_check_avx256_register, NULL))
16054 return AVX_U128_DIRTY;
16055
16056 return AVX_U128_ANY;
16057 }
16058
16059 /* Return mode that i387 must be switched into
16060 prior to the execution of insn. */
16061
16062 static int
16063 ix86_i387_mode_needed (int entity, rtx insn)
16064 {
16065 enum attr_i387_cw mode;
16066
16067 /* The mode UNINITIALIZED is used to store control word after a
16068 function call or ASM pattern. The mode ANY specify that function
16069 has no requirements on the control word and make no changes in the
16070 bits we are interested in. */
16071
16072 if (CALL_P (insn)
16073 || (NONJUMP_INSN_P (insn)
16074 && (asm_noperands (PATTERN (insn)) >= 0
16075 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
16076 return I387_CW_UNINITIALIZED;
16077
16078 if (recog_memoized (insn) < 0)
16079 return I387_CW_ANY;
16080
16081 mode = get_attr_i387_cw (insn);
16082
16083 switch (entity)
16084 {
16085 case I387_TRUNC:
16086 if (mode == I387_CW_TRUNC)
16087 return mode;
16088 break;
16089
16090 case I387_FLOOR:
16091 if (mode == I387_CW_FLOOR)
16092 return mode;
16093 break;
16094
16095 case I387_CEIL:
16096 if (mode == I387_CW_CEIL)
16097 return mode;
16098 break;
16099
16100 case I387_MASK_PM:
16101 if (mode == I387_CW_MASK_PM)
16102 return mode;
16103 break;
16104
16105 default:
16106 gcc_unreachable ();
16107 }
16108
16109 return I387_CW_ANY;
16110 }
16111
16112 /* Return mode that entity must be switched into
16113 prior to the execution of insn. */
16114
16115 int
16116 ix86_mode_needed (int entity, rtx insn)
16117 {
16118 switch (entity)
16119 {
16120 case AVX_U128:
16121 return ix86_avx_u128_mode_needed (insn);
16122 case I387_TRUNC:
16123 case I387_FLOOR:
16124 case I387_CEIL:
16125 case I387_MASK_PM:
16126 return ix86_i387_mode_needed (entity, insn);
16127 default:
16128 gcc_unreachable ();
16129 }
16130 return 0;
16131 }
16132
16133 /* Check if a 256bit AVX register is referenced in stores. */
16134
16135 static void
16136 ix86_check_avx256_stores (rtx dest, const_rtx set ATTRIBUTE_UNUSED, void *data)
16137 {
16138 if (ix86_check_avx256_register (&dest, NULL))
16139 {
16140 bool *used = (bool *) data;
16141 *used = true;
16142 }
16143 }
16144
16145 /* Calculate mode of upper 128bit AVX registers after the insn. */
16146
16147 static int
16148 ix86_avx_u128_mode_after (int mode, rtx insn)
16149 {
16150 rtx pat = PATTERN (insn);
16151
16152 if (vzeroupper_operation (pat, VOIDmode)
16153 || vzeroall_operation (pat, VOIDmode))
16154 return AVX_U128_CLEAN;
16155
16156 /* We know that state is clean after CALL insn if there are no
16157 256bit registers used in the function return register. */
16158 if (CALL_P (insn))
16159 {
16160 bool avx_reg256_found = false;
16161 note_stores (pat, ix86_check_avx256_stores, &avx_reg256_found);
16162
16163 return avx_reg256_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
16164 }
16165
16166 /* Otherwise, return current mode. Remember that if insn
16167 references AVX 256bit registers, the mode was already changed
16168 to DIRTY from MODE_NEEDED. */
16169 return mode;
16170 }
16171
16172 /* Return the mode that an insn results in. */
16173
16174 int
16175 ix86_mode_after (int entity, int mode, rtx insn)
16176 {
16177 switch (entity)
16178 {
16179 case AVX_U128:
16180 return ix86_avx_u128_mode_after (mode, insn);
16181 case I387_TRUNC:
16182 case I387_FLOOR:
16183 case I387_CEIL:
16184 case I387_MASK_PM:
16185 return mode;
16186 default:
16187 gcc_unreachable ();
16188 }
16189 }
16190
16191 static int
16192 ix86_avx_u128_mode_entry (void)
16193 {
16194 tree arg;
16195
16196 /* Entry mode is set to AVX_U128_DIRTY if there are
16197 256bit modes used in function arguments. */
16198 for (arg = DECL_ARGUMENTS (current_function_decl); arg;
16199 arg = TREE_CHAIN (arg))
16200 {
16201 rtx incoming = DECL_INCOMING_RTL (arg);
16202
16203 if (incoming && ix86_check_avx256_register (&incoming, NULL))
16204 return AVX_U128_DIRTY;
16205 }
16206
16207 return AVX_U128_CLEAN;
16208 }
16209
16210 /* Return a mode that ENTITY is assumed to be
16211 switched to at function entry. */
16212
16213 int
16214 ix86_mode_entry (int entity)
16215 {
16216 switch (entity)
16217 {
16218 case AVX_U128:
16219 return ix86_avx_u128_mode_entry ();
16220 case I387_TRUNC:
16221 case I387_FLOOR:
16222 case I387_CEIL:
16223 case I387_MASK_PM:
16224 return I387_CW_ANY;
16225 default:
16226 gcc_unreachable ();
16227 }
16228 }
16229
16230 static int
16231 ix86_avx_u128_mode_exit (void)
16232 {
16233 rtx reg = crtl->return_rtx;
16234
16235 /* Exit mode is set to AVX_U128_DIRTY if there are
16236 256bit modes used in the function return register. */
16237 if (reg && ix86_check_avx256_register (&reg, NULL))
16238 return AVX_U128_DIRTY;
16239
16240 return AVX_U128_CLEAN;
16241 }
16242
16243 /* Return a mode that ENTITY is assumed to be
16244 switched to at function exit. */
16245
16246 int
16247 ix86_mode_exit (int entity)
16248 {
16249 switch (entity)
16250 {
16251 case AVX_U128:
16252 return ix86_avx_u128_mode_exit ();
16253 case I387_TRUNC:
16254 case I387_FLOOR:
16255 case I387_CEIL:
16256 case I387_MASK_PM:
16257 return I387_CW_ANY;
16258 default:
16259 gcc_unreachable ();
16260 }
16261 }
16262
16263 /* Output code to initialize control word copies used by trunc?f?i and
16264 rounding patterns. CURRENT_MODE is set to current control word,
16265 while NEW_MODE is set to new control word. */
16266
16267 static void
16268 emit_i387_cw_initialization (int mode)
16269 {
16270 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
16271 rtx new_mode;
16272
16273 enum ix86_stack_slot slot;
16274
16275 rtx reg = gen_reg_rtx (HImode);
16276
16277 emit_insn (gen_x86_fnstcw_1 (stored_mode));
16278 emit_move_insn (reg, copy_rtx (stored_mode));
16279
16280 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
16281 || optimize_insn_for_size_p ())
16282 {
16283 switch (mode)
16284 {
16285 case I387_CW_TRUNC:
16286 /* round toward zero (truncate) */
16287 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
16288 slot = SLOT_CW_TRUNC;
16289 break;
16290
16291 case I387_CW_FLOOR:
16292 /* round down toward -oo */
16293 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16294 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
16295 slot = SLOT_CW_FLOOR;
16296 break;
16297
16298 case I387_CW_CEIL:
16299 /* round up toward +oo */
16300 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16301 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
16302 slot = SLOT_CW_CEIL;
16303 break;
16304
16305 case I387_CW_MASK_PM:
16306 /* mask precision exception for nearbyint() */
16307 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16308 slot = SLOT_CW_MASK_PM;
16309 break;
16310
16311 default:
16312 gcc_unreachable ();
16313 }
16314 }
16315 else
16316 {
16317 switch (mode)
16318 {
16319 case I387_CW_TRUNC:
16320 /* round toward zero (truncate) */
16321 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
16322 slot = SLOT_CW_TRUNC;
16323 break;
16324
16325 case I387_CW_FLOOR:
16326 /* round down toward -oo */
16327 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
16328 slot = SLOT_CW_FLOOR;
16329 break;
16330
16331 case I387_CW_CEIL:
16332 /* round up toward +oo */
16333 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
16334 slot = SLOT_CW_CEIL;
16335 break;
16336
16337 case I387_CW_MASK_PM:
16338 /* mask precision exception for nearbyint() */
16339 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16340 slot = SLOT_CW_MASK_PM;
16341 break;
16342
16343 default:
16344 gcc_unreachable ();
16345 }
16346 }
16347
16348 gcc_assert (slot < MAX_386_STACK_LOCALS);
16349
16350 new_mode = assign_386_stack_local (HImode, slot);
16351 emit_move_insn (new_mode, reg);
16352 }
16353
16354 /* Emit vzeroupper. */
16355
16356 void
16357 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live)
16358 {
16359 int i;
16360
16361 /* Cancel automatic vzeroupper insertion if there are
16362 live call-saved SSE registers at the insertion point. */
16363
16364 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
16365 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16366 return;
16367
16368 if (TARGET_64BIT)
16369 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
16370 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16371 return;
16372
16373 emit_insn (gen_avx_vzeroupper ());
16374 }
16375
16376 /* Generate one or more insns to set ENTITY to MODE. */
16377
16378 void
16379 ix86_emit_mode_set (int entity, int mode, HARD_REG_SET regs_live)
16380 {
16381 switch (entity)
16382 {
16383 case AVX_U128:
16384 if (mode == AVX_U128_CLEAN)
16385 ix86_avx_emit_vzeroupper (regs_live);
16386 break;
16387 case I387_TRUNC:
16388 case I387_FLOOR:
16389 case I387_CEIL:
16390 case I387_MASK_PM:
16391 if (mode != I387_CW_ANY
16392 && mode != I387_CW_UNINITIALIZED)
16393 emit_i387_cw_initialization (mode);
16394 break;
16395 default:
16396 gcc_unreachable ();
16397 }
16398 }
16399
16400 /* Output code for INSN to convert a float to a signed int. OPERANDS
16401 are the insn operands. The output may be [HSD]Imode and the input
16402 operand may be [SDX]Fmode. */
16403
16404 const char *
16405 output_fix_trunc (rtx insn, rtx *operands, bool fisttp)
16406 {
16407 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
16408 int dimode_p = GET_MODE (operands[0]) == DImode;
16409 int round_mode = get_attr_i387_cw (insn);
16410
16411 /* Jump through a hoop or two for DImode, since the hardware has no
16412 non-popping instruction. We used to do this a different way, but
16413 that was somewhat fragile and broke with post-reload splitters. */
16414 if ((dimode_p || fisttp) && !stack_top_dies)
16415 output_asm_insn ("fld\t%y1", operands);
16416
16417 gcc_assert (STACK_TOP_P (operands[1]));
16418 gcc_assert (MEM_P (operands[0]));
16419 gcc_assert (GET_MODE (operands[1]) != TFmode);
16420
16421 if (fisttp)
16422 output_asm_insn ("fisttp%Z0\t%0", operands);
16423 else
16424 {
16425 if (round_mode != I387_CW_ANY)
16426 output_asm_insn ("fldcw\t%3", operands);
16427 if (stack_top_dies || dimode_p)
16428 output_asm_insn ("fistp%Z0\t%0", operands);
16429 else
16430 output_asm_insn ("fist%Z0\t%0", operands);
16431 if (round_mode != I387_CW_ANY)
16432 output_asm_insn ("fldcw\t%2", operands);
16433 }
16434
16435 return "";
16436 }
16437
16438 /* Output code for x87 ffreep insn. The OPNO argument, which may only
16439 have the values zero or one, indicates the ffreep insn's operand
16440 from the OPERANDS array. */
16441
16442 static const char *
16443 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
16444 {
16445 if (TARGET_USE_FFREEP)
16446 #ifdef HAVE_AS_IX86_FFREEP
16447 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
16448 #else
16449 {
16450 static char retval[32];
16451 int regno = REGNO (operands[opno]);
16452
16453 gcc_assert (STACK_REGNO_P (regno));
16454
16455 regno -= FIRST_STACK_REG;
16456
16457 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
16458 return retval;
16459 }
16460 #endif
16461
16462 return opno ? "fstp\t%y1" : "fstp\t%y0";
16463 }
16464
16465
16466 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
16467 should be used. UNORDERED_P is true when fucom should be used. */
16468
16469 const char *
16470 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
16471 {
16472 int stack_top_dies;
16473 rtx cmp_op0, cmp_op1;
16474 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
16475
16476 if (eflags_p)
16477 {
16478 cmp_op0 = operands[0];
16479 cmp_op1 = operands[1];
16480 }
16481 else
16482 {
16483 cmp_op0 = operands[1];
16484 cmp_op1 = operands[2];
16485 }
16486
16487 if (is_sse)
16488 {
16489 if (GET_MODE (operands[0]) == SFmode)
16490 if (unordered_p)
16491 return "%vucomiss\t{%1, %0|%0, %1}";
16492 else
16493 return "%vcomiss\t{%1, %0|%0, %1}";
16494 else
16495 if (unordered_p)
16496 return "%vucomisd\t{%1, %0|%0, %1}";
16497 else
16498 return "%vcomisd\t{%1, %0|%0, %1}";
16499 }
16500
16501 gcc_assert (STACK_TOP_P (cmp_op0));
16502
16503 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
16504
16505 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
16506 {
16507 if (stack_top_dies)
16508 {
16509 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
16510 return output_387_ffreep (operands, 1);
16511 }
16512 else
16513 return "ftst\n\tfnstsw\t%0";
16514 }
16515
16516 if (STACK_REG_P (cmp_op1)
16517 && stack_top_dies
16518 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
16519 && REGNO (cmp_op1) != FIRST_STACK_REG)
16520 {
16521 /* If both the top of the 387 stack dies, and the other operand
16522 is also a stack register that dies, then this must be a
16523 `fcompp' float compare */
16524
16525 if (eflags_p)
16526 {
16527 /* There is no double popping fcomi variant. Fortunately,
16528 eflags is immune from the fstp's cc clobbering. */
16529 if (unordered_p)
16530 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
16531 else
16532 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
16533 return output_387_ffreep (operands, 0);
16534 }
16535 else
16536 {
16537 if (unordered_p)
16538 return "fucompp\n\tfnstsw\t%0";
16539 else
16540 return "fcompp\n\tfnstsw\t%0";
16541 }
16542 }
16543 else
16544 {
16545 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
16546
16547 static const char * const alt[16] =
16548 {
16549 "fcom%Z2\t%y2\n\tfnstsw\t%0",
16550 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
16551 "fucom%Z2\t%y2\n\tfnstsw\t%0",
16552 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
16553
16554 "ficom%Z2\t%y2\n\tfnstsw\t%0",
16555 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
16556 NULL,
16557 NULL,
16558
16559 "fcomi\t{%y1, %0|%0, %y1}",
16560 "fcomip\t{%y1, %0|%0, %y1}",
16561 "fucomi\t{%y1, %0|%0, %y1}",
16562 "fucomip\t{%y1, %0|%0, %y1}",
16563
16564 NULL,
16565 NULL,
16566 NULL,
16567 NULL
16568 };
16569
16570 int mask;
16571 const char *ret;
16572
16573 mask = eflags_p << 3;
16574 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
16575 mask |= unordered_p << 1;
16576 mask |= stack_top_dies;
16577
16578 gcc_assert (mask < 16);
16579 ret = alt[mask];
16580 gcc_assert (ret);
16581
16582 return ret;
16583 }
16584 }
16585
16586 void
16587 ix86_output_addr_vec_elt (FILE *file, int value)
16588 {
16589 const char *directive = ASM_LONG;
16590
16591 #ifdef ASM_QUAD
16592 if (TARGET_LP64)
16593 directive = ASM_QUAD;
16594 #else
16595 gcc_assert (!TARGET_64BIT);
16596 #endif
16597
16598 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
16599 }
16600
16601 void
16602 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
16603 {
16604 const char *directive = ASM_LONG;
16605
16606 #ifdef ASM_QUAD
16607 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
16608 directive = ASM_QUAD;
16609 #else
16610 gcc_assert (!TARGET_64BIT);
16611 #endif
16612 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
16613 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
16614 fprintf (file, "%s%s%d-%s%d\n",
16615 directive, LPREFIX, value, LPREFIX, rel);
16616 else if (HAVE_AS_GOTOFF_IN_DATA)
16617 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
16618 #if TARGET_MACHO
16619 else if (TARGET_MACHO)
16620 {
16621 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
16622 machopic_output_function_base_name (file);
16623 putc ('\n', file);
16624 }
16625 #endif
16626 else
16627 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
16628 GOT_SYMBOL_NAME, LPREFIX, value);
16629 }
16630 \f
16631 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
16632 for the target. */
16633
16634 void
16635 ix86_expand_clear (rtx dest)
16636 {
16637 rtx tmp;
16638
16639 /* We play register width games, which are only valid after reload. */
16640 gcc_assert (reload_completed);
16641
16642 /* Avoid HImode and its attendant prefix byte. */
16643 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
16644 dest = gen_rtx_REG (SImode, REGNO (dest));
16645 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
16646
16647 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
16648 if (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ())
16649 {
16650 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16651 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
16652 }
16653
16654 emit_insn (tmp);
16655 }
16656
16657 /* X is an unchanging MEM. If it is a constant pool reference, return
16658 the constant pool rtx, else NULL. */
16659
16660 rtx
16661 maybe_get_pool_constant (rtx x)
16662 {
16663 x = ix86_delegitimize_address (XEXP (x, 0));
16664
16665 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
16666 return get_pool_constant (x);
16667
16668 return NULL_RTX;
16669 }
16670
16671 void
16672 ix86_expand_move (enum machine_mode mode, rtx operands[])
16673 {
16674 rtx op0, op1;
16675 enum tls_model model;
16676
16677 op0 = operands[0];
16678 op1 = operands[1];
16679
16680 if (GET_CODE (op1) == SYMBOL_REF)
16681 {
16682 rtx tmp;
16683
16684 model = SYMBOL_REF_TLS_MODEL (op1);
16685 if (model)
16686 {
16687 op1 = legitimize_tls_address (op1, model, true);
16688 op1 = force_operand (op1, op0);
16689 if (op1 == op0)
16690 return;
16691 op1 = convert_to_mode (mode, op1, 1);
16692 }
16693 else if ((tmp = legitimize_pe_coff_symbol (op1, false)) != NULL_RTX)
16694 op1 = tmp;
16695 }
16696 else if (GET_CODE (op1) == CONST
16697 && GET_CODE (XEXP (op1, 0)) == PLUS
16698 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
16699 {
16700 rtx addend = XEXP (XEXP (op1, 0), 1);
16701 rtx symbol = XEXP (XEXP (op1, 0), 0);
16702 rtx tmp;
16703
16704 model = SYMBOL_REF_TLS_MODEL (symbol);
16705 if (model)
16706 tmp = legitimize_tls_address (symbol, model, true);
16707 else
16708 tmp = legitimize_pe_coff_symbol (symbol, true);
16709
16710 if (tmp)
16711 {
16712 tmp = force_operand (tmp, NULL);
16713 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
16714 op0, 1, OPTAB_DIRECT);
16715 if (tmp == op0)
16716 return;
16717 op1 = convert_to_mode (mode, tmp, 1);
16718 }
16719 }
16720
16721 if ((flag_pic || MACHOPIC_INDIRECT)
16722 && symbolic_operand (op1, mode))
16723 {
16724 if (TARGET_MACHO && !TARGET_64BIT)
16725 {
16726 #if TARGET_MACHO
16727 /* dynamic-no-pic */
16728 if (MACHOPIC_INDIRECT)
16729 {
16730 rtx temp = ((reload_in_progress
16731 || ((op0 && REG_P (op0))
16732 && mode == Pmode))
16733 ? op0 : gen_reg_rtx (Pmode));
16734 op1 = machopic_indirect_data_reference (op1, temp);
16735 if (MACHOPIC_PURE)
16736 op1 = machopic_legitimize_pic_address (op1, mode,
16737 temp == op1 ? 0 : temp);
16738 }
16739 if (op0 != op1 && GET_CODE (op0) != MEM)
16740 {
16741 rtx insn = gen_rtx_SET (VOIDmode, op0, op1);
16742 emit_insn (insn);
16743 return;
16744 }
16745 if (GET_CODE (op0) == MEM)
16746 op1 = force_reg (Pmode, op1);
16747 else
16748 {
16749 rtx temp = op0;
16750 if (GET_CODE (temp) != REG)
16751 temp = gen_reg_rtx (Pmode);
16752 temp = legitimize_pic_address (op1, temp);
16753 if (temp == op0)
16754 return;
16755 op1 = temp;
16756 }
16757 /* dynamic-no-pic */
16758 #endif
16759 }
16760 else
16761 {
16762 if (MEM_P (op0))
16763 op1 = force_reg (mode, op1);
16764 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
16765 {
16766 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
16767 op1 = legitimize_pic_address (op1, reg);
16768 if (op0 == op1)
16769 return;
16770 op1 = convert_to_mode (mode, op1, 1);
16771 }
16772 }
16773 }
16774 else
16775 {
16776 if (MEM_P (op0)
16777 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
16778 || !push_operand (op0, mode))
16779 && MEM_P (op1))
16780 op1 = force_reg (mode, op1);
16781
16782 if (push_operand (op0, mode)
16783 && ! general_no_elim_operand (op1, mode))
16784 op1 = copy_to_mode_reg (mode, op1);
16785
16786 /* Force large constants in 64bit compilation into register
16787 to get them CSEed. */
16788 if (can_create_pseudo_p ()
16789 && (mode == DImode) && TARGET_64BIT
16790 && immediate_operand (op1, mode)
16791 && !x86_64_zext_immediate_operand (op1, VOIDmode)
16792 && !register_operand (op0, mode)
16793 && optimize)
16794 op1 = copy_to_mode_reg (mode, op1);
16795
16796 if (can_create_pseudo_p ()
16797 && FLOAT_MODE_P (mode)
16798 && GET_CODE (op1) == CONST_DOUBLE)
16799 {
16800 /* If we are loading a floating point constant to a register,
16801 force the value to memory now, since we'll get better code
16802 out the back end. */
16803
16804 op1 = validize_mem (force_const_mem (mode, op1));
16805 if (!register_operand (op0, mode))
16806 {
16807 rtx temp = gen_reg_rtx (mode);
16808 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
16809 emit_move_insn (op0, temp);
16810 return;
16811 }
16812 }
16813 }
16814
16815 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
16816 }
16817
16818 void
16819 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
16820 {
16821 rtx op0 = operands[0], op1 = operands[1];
16822 unsigned int align = GET_MODE_ALIGNMENT (mode);
16823
16824 if (push_operand (op0, VOIDmode))
16825 op0 = emit_move_resolve_push (mode, op0);
16826
16827 /* Force constants other than zero into memory. We do not know how
16828 the instructions used to build constants modify the upper 64 bits
16829 of the register, once we have that information we may be able
16830 to handle some of them more efficiently. */
16831 if (can_create_pseudo_p ()
16832 && register_operand (op0, mode)
16833 && (CONSTANT_P (op1)
16834 || (GET_CODE (op1) == SUBREG
16835 && CONSTANT_P (SUBREG_REG (op1))))
16836 && !standard_sse_constant_p (op1))
16837 op1 = validize_mem (force_const_mem (mode, op1));
16838
16839 /* We need to check memory alignment for SSE mode since attribute
16840 can make operands unaligned. */
16841 if (can_create_pseudo_p ()
16842 && SSE_REG_MODE_P (mode)
16843 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
16844 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
16845 {
16846 rtx tmp[2];
16847
16848 /* ix86_expand_vector_move_misalign() does not like constants ... */
16849 if (CONSTANT_P (op1)
16850 || (GET_CODE (op1) == SUBREG
16851 && CONSTANT_P (SUBREG_REG (op1))))
16852 op1 = validize_mem (force_const_mem (mode, op1));
16853
16854 /* ... nor both arguments in memory. */
16855 if (!register_operand (op0, mode)
16856 && !register_operand (op1, mode))
16857 op1 = force_reg (mode, op1);
16858
16859 tmp[0] = op0; tmp[1] = op1;
16860 ix86_expand_vector_move_misalign (mode, tmp);
16861 return;
16862 }
16863
16864 /* Make operand1 a register if it isn't already. */
16865 if (can_create_pseudo_p ()
16866 && !register_operand (op0, mode)
16867 && !register_operand (op1, mode))
16868 {
16869 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
16870 return;
16871 }
16872
16873 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
16874 }
16875
16876 /* Split 32-byte AVX unaligned load and store if needed. */
16877
16878 static void
16879 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
16880 {
16881 rtx m;
16882 rtx (*extract) (rtx, rtx, rtx);
16883 rtx (*load_unaligned) (rtx, rtx);
16884 rtx (*store_unaligned) (rtx, rtx);
16885 enum machine_mode mode;
16886
16887 switch (GET_MODE (op0))
16888 {
16889 default:
16890 gcc_unreachable ();
16891 case V32QImode:
16892 extract = gen_avx_vextractf128v32qi;
16893 load_unaligned = gen_avx_loaddquv32qi;
16894 store_unaligned = gen_avx_storedquv32qi;
16895 mode = V16QImode;
16896 break;
16897 case V8SFmode:
16898 extract = gen_avx_vextractf128v8sf;
16899 load_unaligned = gen_avx_loadups256;
16900 store_unaligned = gen_avx_storeups256;
16901 mode = V4SFmode;
16902 break;
16903 case V4DFmode:
16904 extract = gen_avx_vextractf128v4df;
16905 load_unaligned = gen_avx_loadupd256;
16906 store_unaligned = gen_avx_storeupd256;
16907 mode = V2DFmode;
16908 break;
16909 }
16910
16911 if (MEM_P (op1))
16912 {
16913 if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
16914 {
16915 rtx r = gen_reg_rtx (mode);
16916 m = adjust_address (op1, mode, 0);
16917 emit_move_insn (r, m);
16918 m = adjust_address (op1, mode, 16);
16919 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
16920 emit_move_insn (op0, r);
16921 }
16922 /* Normal *mov<mode>_internal pattern will handle
16923 unaligned loads just fine if misaligned_operand
16924 is true, and without the UNSPEC it can be combined
16925 with arithmetic instructions. */
16926 else if (misaligned_operand (op1, GET_MODE (op1)))
16927 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
16928 else
16929 emit_insn (load_unaligned (op0, op1));
16930 }
16931 else if (MEM_P (op0))
16932 {
16933 if (TARGET_AVX256_SPLIT_UNALIGNED_STORE)
16934 {
16935 m = adjust_address (op0, mode, 0);
16936 emit_insn (extract (m, op1, const0_rtx));
16937 m = adjust_address (op0, mode, 16);
16938 emit_insn (extract (m, op1, const1_rtx));
16939 }
16940 else
16941 emit_insn (store_unaligned (op0, op1));
16942 }
16943 else
16944 gcc_unreachable ();
16945 }
16946
16947 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
16948 straight to ix86_expand_vector_move. */
16949 /* Code generation for scalar reg-reg moves of single and double precision data:
16950 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
16951 movaps reg, reg
16952 else
16953 movss reg, reg
16954 if (x86_sse_partial_reg_dependency == true)
16955 movapd reg, reg
16956 else
16957 movsd reg, reg
16958
16959 Code generation for scalar loads of double precision data:
16960 if (x86_sse_split_regs == true)
16961 movlpd mem, reg (gas syntax)
16962 else
16963 movsd mem, reg
16964
16965 Code generation for unaligned packed loads of single precision data
16966 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
16967 if (x86_sse_unaligned_move_optimal)
16968 movups mem, reg
16969
16970 if (x86_sse_partial_reg_dependency == true)
16971 {
16972 xorps reg, reg
16973 movlps mem, reg
16974 movhps mem+8, reg
16975 }
16976 else
16977 {
16978 movlps mem, reg
16979 movhps mem+8, reg
16980 }
16981
16982 Code generation for unaligned packed loads of double precision data
16983 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
16984 if (x86_sse_unaligned_move_optimal)
16985 movupd mem, reg
16986
16987 if (x86_sse_split_regs == true)
16988 {
16989 movlpd mem, reg
16990 movhpd mem+8, reg
16991 }
16992 else
16993 {
16994 movsd mem, reg
16995 movhpd mem+8, reg
16996 }
16997 */
16998
16999 void
17000 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
17001 {
17002 rtx op0, op1, orig_op0 = NULL_RTX, m;
17003 rtx (*load_unaligned) (rtx, rtx);
17004 rtx (*store_unaligned) (rtx, rtx);
17005
17006 op0 = operands[0];
17007 op1 = operands[1];
17008
17009 if (GET_MODE_SIZE (mode) == 64)
17010 {
17011 switch (GET_MODE_CLASS (mode))
17012 {
17013 case MODE_VECTOR_INT:
17014 case MODE_INT:
17015 if (GET_MODE (op0) != V16SImode)
17016 {
17017 if (!MEM_P (op0))
17018 {
17019 orig_op0 = op0;
17020 op0 = gen_reg_rtx (V16SImode);
17021 }
17022 else
17023 op0 = gen_lowpart (V16SImode, op0);
17024 }
17025 op1 = gen_lowpart (V16SImode, op1);
17026 /* FALLTHRU */
17027
17028 case MODE_VECTOR_FLOAT:
17029 switch (GET_MODE (op0))
17030 {
17031 default:
17032 gcc_unreachable ();
17033 case V16SImode:
17034 load_unaligned = gen_avx512f_loaddquv16si;
17035 store_unaligned = gen_avx512f_storedquv16si;
17036 break;
17037 case V16SFmode:
17038 load_unaligned = gen_avx512f_loadups512;
17039 store_unaligned = gen_avx512f_storeups512;
17040 break;
17041 case V8DFmode:
17042 load_unaligned = gen_avx512f_loadupd512;
17043 store_unaligned = gen_avx512f_storeupd512;
17044 break;
17045 }
17046
17047 if (MEM_P (op1))
17048 emit_insn (load_unaligned (op0, op1));
17049 else if (MEM_P (op0))
17050 emit_insn (store_unaligned (op0, op1));
17051 else
17052 gcc_unreachable ();
17053 if (orig_op0)
17054 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17055 break;
17056
17057 default:
17058 gcc_unreachable ();
17059 }
17060
17061 return;
17062 }
17063
17064 if (TARGET_AVX
17065 && GET_MODE_SIZE (mode) == 32)
17066 {
17067 switch (GET_MODE_CLASS (mode))
17068 {
17069 case MODE_VECTOR_INT:
17070 case MODE_INT:
17071 if (GET_MODE (op0) != V32QImode)
17072 {
17073 if (!MEM_P (op0))
17074 {
17075 orig_op0 = op0;
17076 op0 = gen_reg_rtx (V32QImode);
17077 }
17078 else
17079 op0 = gen_lowpart (V32QImode, op0);
17080 }
17081 op1 = gen_lowpart (V32QImode, op1);
17082 /* FALLTHRU */
17083
17084 case MODE_VECTOR_FLOAT:
17085 ix86_avx256_split_vector_move_misalign (op0, op1);
17086 if (orig_op0)
17087 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17088 break;
17089
17090 default:
17091 gcc_unreachable ();
17092 }
17093
17094 return;
17095 }
17096
17097 if (MEM_P (op1))
17098 {
17099 /* Normal *mov<mode>_internal pattern will handle
17100 unaligned loads just fine if misaligned_operand
17101 is true, and without the UNSPEC it can be combined
17102 with arithmetic instructions. */
17103 if (TARGET_AVX
17104 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
17105 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
17106 && misaligned_operand (op1, GET_MODE (op1)))
17107 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17108 /* ??? If we have typed data, then it would appear that using
17109 movdqu is the only way to get unaligned data loaded with
17110 integer type. */
17111 else if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17112 {
17113 if (GET_MODE (op0) != V16QImode)
17114 {
17115 orig_op0 = op0;
17116 op0 = gen_reg_rtx (V16QImode);
17117 }
17118 op1 = gen_lowpart (V16QImode, op1);
17119 /* We will eventually emit movups based on insn attributes. */
17120 emit_insn (gen_sse2_loaddquv16qi (op0, op1));
17121 if (orig_op0)
17122 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17123 }
17124 else if (TARGET_SSE2 && mode == V2DFmode)
17125 {
17126 rtx zero;
17127
17128 if (TARGET_AVX
17129 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17130 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17131 || optimize_insn_for_size_p ())
17132 {
17133 /* We will eventually emit movups based on insn attributes. */
17134 emit_insn (gen_sse2_loadupd (op0, op1));
17135 return;
17136 }
17137
17138 /* When SSE registers are split into halves, we can avoid
17139 writing to the top half twice. */
17140 if (TARGET_SSE_SPLIT_REGS)
17141 {
17142 emit_clobber (op0);
17143 zero = op0;
17144 }
17145 else
17146 {
17147 /* ??? Not sure about the best option for the Intel chips.
17148 The following would seem to satisfy; the register is
17149 entirely cleared, breaking the dependency chain. We
17150 then store to the upper half, with a dependency depth
17151 of one. A rumor has it that Intel recommends two movsd
17152 followed by an unpacklpd, but this is unconfirmed. And
17153 given that the dependency depth of the unpacklpd would
17154 still be one, I'm not sure why this would be better. */
17155 zero = CONST0_RTX (V2DFmode);
17156 }
17157
17158 m = adjust_address (op1, DFmode, 0);
17159 emit_insn (gen_sse2_loadlpd (op0, zero, m));
17160 m = adjust_address (op1, DFmode, 8);
17161 emit_insn (gen_sse2_loadhpd (op0, op0, m));
17162 }
17163 else
17164 {
17165 rtx t;
17166
17167 if (TARGET_AVX
17168 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17169 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17170 || optimize_insn_for_size_p ())
17171 {
17172 if (GET_MODE (op0) != V4SFmode)
17173 {
17174 orig_op0 = op0;
17175 op0 = gen_reg_rtx (V4SFmode);
17176 }
17177 op1 = gen_lowpart (V4SFmode, op1);
17178 emit_insn (gen_sse_loadups (op0, op1));
17179 if (orig_op0)
17180 emit_move_insn (orig_op0,
17181 gen_lowpart (GET_MODE (orig_op0), op0));
17182 return;
17183 }
17184
17185 if (mode != V4SFmode)
17186 t = gen_reg_rtx (V4SFmode);
17187 else
17188 t = op0;
17189
17190 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
17191 emit_move_insn (t, CONST0_RTX (V4SFmode));
17192 else
17193 emit_clobber (t);
17194
17195 m = adjust_address (op1, V2SFmode, 0);
17196 emit_insn (gen_sse_loadlps (t, t, m));
17197 m = adjust_address (op1, V2SFmode, 8);
17198 emit_insn (gen_sse_loadhps (t, t, m));
17199 if (mode != V4SFmode)
17200 emit_move_insn (op0, gen_lowpart (mode, t));
17201 }
17202 }
17203 else if (MEM_P (op0))
17204 {
17205 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17206 {
17207 op0 = gen_lowpart (V16QImode, op0);
17208 op1 = gen_lowpart (V16QImode, op1);
17209 /* We will eventually emit movups based on insn attributes. */
17210 emit_insn (gen_sse2_storedquv16qi (op0, op1));
17211 }
17212 else if (TARGET_SSE2 && mode == V2DFmode)
17213 {
17214 if (TARGET_AVX
17215 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17216 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17217 || optimize_insn_for_size_p ())
17218 /* We will eventually emit movups based on insn attributes. */
17219 emit_insn (gen_sse2_storeupd (op0, op1));
17220 else
17221 {
17222 m = adjust_address (op0, DFmode, 0);
17223 emit_insn (gen_sse2_storelpd (m, op1));
17224 m = adjust_address (op0, DFmode, 8);
17225 emit_insn (gen_sse2_storehpd (m, op1));
17226 }
17227 }
17228 else
17229 {
17230 if (mode != V4SFmode)
17231 op1 = gen_lowpart (V4SFmode, op1);
17232
17233 if (TARGET_AVX
17234 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17235 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17236 || optimize_insn_for_size_p ())
17237 {
17238 op0 = gen_lowpart (V4SFmode, op0);
17239 emit_insn (gen_sse_storeups (op0, op1));
17240 }
17241 else
17242 {
17243 m = adjust_address (op0, V2SFmode, 0);
17244 emit_insn (gen_sse_storelps (m, op1));
17245 m = adjust_address (op0, V2SFmode, 8);
17246 emit_insn (gen_sse_storehps (m, op1));
17247 }
17248 }
17249 }
17250 else
17251 gcc_unreachable ();
17252 }
17253
17254 /* Helper function of ix86_fixup_binary_operands to canonicalize
17255 operand order. Returns true if the operands should be swapped. */
17256
17257 static bool
17258 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
17259 rtx operands[])
17260 {
17261 rtx dst = operands[0];
17262 rtx src1 = operands[1];
17263 rtx src2 = operands[2];
17264
17265 /* If the operation is not commutative, we can't do anything. */
17266 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
17267 return false;
17268
17269 /* Highest priority is that src1 should match dst. */
17270 if (rtx_equal_p (dst, src1))
17271 return false;
17272 if (rtx_equal_p (dst, src2))
17273 return true;
17274
17275 /* Next highest priority is that immediate constants come second. */
17276 if (immediate_operand (src2, mode))
17277 return false;
17278 if (immediate_operand (src1, mode))
17279 return true;
17280
17281 /* Lowest priority is that memory references should come second. */
17282 if (MEM_P (src2))
17283 return false;
17284 if (MEM_P (src1))
17285 return true;
17286
17287 return false;
17288 }
17289
17290
17291 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
17292 destination to use for the operation. If different from the true
17293 destination in operands[0], a copy operation will be required. */
17294
17295 rtx
17296 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
17297 rtx operands[])
17298 {
17299 rtx dst = operands[0];
17300 rtx src1 = operands[1];
17301 rtx src2 = operands[2];
17302
17303 /* Canonicalize operand order. */
17304 if (ix86_swap_binary_operands_p (code, mode, operands))
17305 {
17306 rtx temp;
17307
17308 /* It is invalid to swap operands of different modes. */
17309 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
17310
17311 temp = src1;
17312 src1 = src2;
17313 src2 = temp;
17314 }
17315
17316 /* Both source operands cannot be in memory. */
17317 if (MEM_P (src1) && MEM_P (src2))
17318 {
17319 /* Optimization: Only read from memory once. */
17320 if (rtx_equal_p (src1, src2))
17321 {
17322 src2 = force_reg (mode, src2);
17323 src1 = src2;
17324 }
17325 else if (rtx_equal_p (dst, src1))
17326 src2 = force_reg (mode, src2);
17327 else
17328 src1 = force_reg (mode, src1);
17329 }
17330
17331 /* If the destination is memory, and we do not have matching source
17332 operands, do things in registers. */
17333 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
17334 dst = gen_reg_rtx (mode);
17335
17336 /* Source 1 cannot be a constant. */
17337 if (CONSTANT_P (src1))
17338 src1 = force_reg (mode, src1);
17339
17340 /* Source 1 cannot be a non-matching memory. */
17341 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
17342 src1 = force_reg (mode, src1);
17343
17344 /* Improve address combine. */
17345 if (code == PLUS
17346 && GET_MODE_CLASS (mode) == MODE_INT
17347 && MEM_P (src2))
17348 src2 = force_reg (mode, src2);
17349
17350 operands[1] = src1;
17351 operands[2] = src2;
17352 return dst;
17353 }
17354
17355 /* Similarly, but assume that the destination has already been
17356 set up properly. */
17357
17358 void
17359 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
17360 enum machine_mode mode, rtx operands[])
17361 {
17362 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
17363 gcc_assert (dst == operands[0]);
17364 }
17365
17366 /* Attempt to expand a binary operator. Make the expansion closer to the
17367 actual machine, then just general_operand, which will allow 3 separate
17368 memory references (one output, two input) in a single insn. */
17369
17370 void
17371 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
17372 rtx operands[])
17373 {
17374 rtx src1, src2, dst, op, clob;
17375
17376 dst = ix86_fixup_binary_operands (code, mode, operands);
17377 src1 = operands[1];
17378 src2 = operands[2];
17379
17380 /* Emit the instruction. */
17381
17382 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
17383 if (reload_in_progress)
17384 {
17385 /* Reload doesn't know about the flags register, and doesn't know that
17386 it doesn't want to clobber it. We can only do this with PLUS. */
17387 gcc_assert (code == PLUS);
17388 emit_insn (op);
17389 }
17390 else if (reload_completed
17391 && code == PLUS
17392 && !rtx_equal_p (dst, src1))
17393 {
17394 /* This is going to be an LEA; avoid splitting it later. */
17395 emit_insn (op);
17396 }
17397 else
17398 {
17399 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17400 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
17401 }
17402
17403 /* Fix up the destination if needed. */
17404 if (dst != operands[0])
17405 emit_move_insn (operands[0], dst);
17406 }
17407
17408 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
17409 the given OPERANDS. */
17410
17411 void
17412 ix86_expand_vector_logical_operator (enum rtx_code code, enum machine_mode mode,
17413 rtx operands[])
17414 {
17415 rtx op1 = NULL_RTX, op2 = NULL_RTX;
17416 if (GET_CODE (operands[1]) == SUBREG)
17417 {
17418 op1 = operands[1];
17419 op2 = operands[2];
17420 }
17421 else if (GET_CODE (operands[2]) == SUBREG)
17422 {
17423 op1 = operands[2];
17424 op2 = operands[1];
17425 }
17426 /* Optimize (__m128i) d | (__m128i) e and similar code
17427 when d and e are float vectors into float vector logical
17428 insn. In C/C++ without using intrinsics there is no other way
17429 to express vector logical operation on float vectors than
17430 to cast them temporarily to integer vectors. */
17431 if (op1
17432 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17433 && ((GET_CODE (op2) == SUBREG || GET_CODE (op2) == CONST_VECTOR))
17434 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1))) == MODE_VECTOR_FLOAT
17435 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1))) == GET_MODE_SIZE (mode)
17436 && SUBREG_BYTE (op1) == 0
17437 && (GET_CODE (op2) == CONST_VECTOR
17438 || (GET_MODE (SUBREG_REG (op1)) == GET_MODE (SUBREG_REG (op2))
17439 && SUBREG_BYTE (op2) == 0))
17440 && can_create_pseudo_p ())
17441 {
17442 rtx dst;
17443 switch (GET_MODE (SUBREG_REG (op1)))
17444 {
17445 case V4SFmode:
17446 case V8SFmode:
17447 case V2DFmode:
17448 case V4DFmode:
17449 dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1)));
17450 if (GET_CODE (op2) == CONST_VECTOR)
17451 {
17452 op2 = gen_lowpart (GET_MODE (dst), op2);
17453 op2 = force_reg (GET_MODE (dst), op2);
17454 }
17455 else
17456 {
17457 op1 = operands[1];
17458 op2 = SUBREG_REG (operands[2]);
17459 if (!nonimmediate_operand (op2, GET_MODE (dst)))
17460 op2 = force_reg (GET_MODE (dst), op2);
17461 }
17462 op1 = SUBREG_REG (op1);
17463 if (!nonimmediate_operand (op1, GET_MODE (dst)))
17464 op1 = force_reg (GET_MODE (dst), op1);
17465 emit_insn (gen_rtx_SET (VOIDmode, dst,
17466 gen_rtx_fmt_ee (code, GET_MODE (dst),
17467 op1, op2)));
17468 emit_move_insn (operands[0], gen_lowpart (mode, dst));
17469 return;
17470 default:
17471 break;
17472 }
17473 }
17474 if (!nonimmediate_operand (operands[1], mode))
17475 operands[1] = force_reg (mode, operands[1]);
17476 if (!nonimmediate_operand (operands[2], mode))
17477 operands[2] = force_reg (mode, operands[2]);
17478 ix86_fixup_binary_operands_no_copy (code, mode, operands);
17479 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
17480 gen_rtx_fmt_ee (code, mode, operands[1],
17481 operands[2])));
17482 }
17483
17484 /* Return TRUE or FALSE depending on whether the binary operator meets the
17485 appropriate constraints. */
17486
17487 bool
17488 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
17489 rtx operands[3])
17490 {
17491 rtx dst = operands[0];
17492 rtx src1 = operands[1];
17493 rtx src2 = operands[2];
17494
17495 /* Both source operands cannot be in memory. */
17496 if (MEM_P (src1) && MEM_P (src2))
17497 return false;
17498
17499 /* Canonicalize operand order for commutative operators. */
17500 if (ix86_swap_binary_operands_p (code, mode, operands))
17501 {
17502 rtx temp = src1;
17503 src1 = src2;
17504 src2 = temp;
17505 }
17506
17507 /* If the destination is memory, we must have a matching source operand. */
17508 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
17509 return false;
17510
17511 /* Source 1 cannot be a constant. */
17512 if (CONSTANT_P (src1))
17513 return false;
17514
17515 /* Source 1 cannot be a non-matching memory. */
17516 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
17517 /* Support "andhi/andsi/anddi" as a zero-extending move. */
17518 return (code == AND
17519 && (mode == HImode
17520 || mode == SImode
17521 || (TARGET_64BIT && mode == DImode))
17522 && satisfies_constraint_L (src2));
17523
17524 return true;
17525 }
17526
17527 /* Attempt to expand a unary operator. Make the expansion closer to the
17528 actual machine, then just general_operand, which will allow 2 separate
17529 memory references (one output, one input) in a single insn. */
17530
17531 void
17532 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
17533 rtx operands[])
17534 {
17535 int matching_memory;
17536 rtx src, dst, op, clob;
17537
17538 dst = operands[0];
17539 src = operands[1];
17540
17541 /* If the destination is memory, and we do not have matching source
17542 operands, do things in registers. */
17543 matching_memory = 0;
17544 if (MEM_P (dst))
17545 {
17546 if (rtx_equal_p (dst, src))
17547 matching_memory = 1;
17548 else
17549 dst = gen_reg_rtx (mode);
17550 }
17551
17552 /* When source operand is memory, destination must match. */
17553 if (MEM_P (src) && !matching_memory)
17554 src = force_reg (mode, src);
17555
17556 /* Emit the instruction. */
17557
17558 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
17559 if (reload_in_progress || code == NOT)
17560 {
17561 /* Reload doesn't know about the flags register, and doesn't know that
17562 it doesn't want to clobber it. */
17563 gcc_assert (code == NOT);
17564 emit_insn (op);
17565 }
17566 else
17567 {
17568 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17569 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
17570 }
17571
17572 /* Fix up the destination if needed. */
17573 if (dst != operands[0])
17574 emit_move_insn (operands[0], dst);
17575 }
17576
17577 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
17578 divisor are within the range [0-255]. */
17579
17580 void
17581 ix86_split_idivmod (enum machine_mode mode, rtx operands[],
17582 bool signed_p)
17583 {
17584 rtx end_label, qimode_label;
17585 rtx insn, div, mod;
17586 rtx scratch, tmp0, tmp1, tmp2;
17587 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
17588 rtx (*gen_zero_extend) (rtx, rtx);
17589 rtx (*gen_test_ccno_1) (rtx, rtx);
17590
17591 switch (mode)
17592 {
17593 case SImode:
17594 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
17595 gen_test_ccno_1 = gen_testsi_ccno_1;
17596 gen_zero_extend = gen_zero_extendqisi2;
17597 break;
17598 case DImode:
17599 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
17600 gen_test_ccno_1 = gen_testdi_ccno_1;
17601 gen_zero_extend = gen_zero_extendqidi2;
17602 break;
17603 default:
17604 gcc_unreachable ();
17605 }
17606
17607 end_label = gen_label_rtx ();
17608 qimode_label = gen_label_rtx ();
17609
17610 scratch = gen_reg_rtx (mode);
17611
17612 /* Use 8bit unsigned divimod if dividend and divisor are within
17613 the range [0-255]. */
17614 emit_move_insn (scratch, operands[2]);
17615 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
17616 scratch, 1, OPTAB_DIRECT);
17617 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
17618 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
17619 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
17620 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
17621 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
17622 pc_rtx);
17623 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
17624 predict_jump (REG_BR_PROB_BASE * 50 / 100);
17625 JUMP_LABEL (insn) = qimode_label;
17626
17627 /* Generate original signed/unsigned divimod. */
17628 div = gen_divmod4_1 (operands[0], operands[1],
17629 operands[2], operands[3]);
17630 emit_insn (div);
17631
17632 /* Branch to the end. */
17633 emit_jump_insn (gen_jump (end_label));
17634 emit_barrier ();
17635
17636 /* Generate 8bit unsigned divide. */
17637 emit_label (qimode_label);
17638 /* Don't use operands[0] for result of 8bit divide since not all
17639 registers support QImode ZERO_EXTRACT. */
17640 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
17641 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
17642 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
17643 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
17644
17645 if (signed_p)
17646 {
17647 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
17648 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
17649 }
17650 else
17651 {
17652 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
17653 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
17654 }
17655
17656 /* Extract remainder from AH. */
17657 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
17658 if (REG_P (operands[1]))
17659 insn = emit_move_insn (operands[1], tmp1);
17660 else
17661 {
17662 /* Need a new scratch register since the old one has result
17663 of 8bit divide. */
17664 scratch = gen_reg_rtx (mode);
17665 emit_move_insn (scratch, tmp1);
17666 insn = emit_move_insn (operands[1], scratch);
17667 }
17668 set_unique_reg_note (insn, REG_EQUAL, mod);
17669
17670 /* Zero extend quotient from AL. */
17671 tmp1 = gen_lowpart (QImode, tmp0);
17672 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
17673 set_unique_reg_note (insn, REG_EQUAL, div);
17674
17675 emit_label (end_label);
17676 }
17677
17678 /* Whether it is OK to emit CFI directives when emitting asm code. */
17679
17680 bool
17681 ix86_emit_cfi ()
17682 {
17683 return dwarf2out_do_cfi_asm ();
17684 }
17685
17686 #define LEA_MAX_STALL (3)
17687 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
17688
17689 /* Increase given DISTANCE in half-cycles according to
17690 dependencies between PREV and NEXT instructions.
17691 Add 1 half-cycle if there is no dependency and
17692 go to next cycle if there is some dependecy. */
17693
17694 static unsigned int
17695 increase_distance (rtx prev, rtx next, unsigned int distance)
17696 {
17697 df_ref *use_rec;
17698 df_ref *def_rec;
17699
17700 if (!prev || !next)
17701 return distance + (distance & 1) + 2;
17702
17703 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
17704 return distance + 1;
17705
17706 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
17707 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
17708 if (!DF_REF_IS_ARTIFICIAL (*def_rec)
17709 && DF_REF_REGNO (*use_rec) == DF_REF_REGNO (*def_rec))
17710 return distance + (distance & 1) + 2;
17711
17712 return distance + 1;
17713 }
17714
17715 /* Function checks if instruction INSN defines register number
17716 REGNO1 or REGNO2. */
17717
17718 static bool
17719 insn_defines_reg (unsigned int regno1, unsigned int regno2,
17720 rtx insn)
17721 {
17722 df_ref *def_rec;
17723
17724 for (def_rec = DF_INSN_DEFS (insn); *def_rec; def_rec++)
17725 if (DF_REF_REG_DEF_P (*def_rec)
17726 && !DF_REF_IS_ARTIFICIAL (*def_rec)
17727 && (regno1 == DF_REF_REGNO (*def_rec)
17728 || regno2 == DF_REF_REGNO (*def_rec)))
17729 {
17730 return true;
17731 }
17732
17733 return false;
17734 }
17735
17736 /* Function checks if instruction INSN uses register number
17737 REGNO as a part of address expression. */
17738
17739 static bool
17740 insn_uses_reg_mem (unsigned int regno, rtx insn)
17741 {
17742 df_ref *use_rec;
17743
17744 for (use_rec = DF_INSN_USES (insn); *use_rec; use_rec++)
17745 if (DF_REF_REG_MEM_P (*use_rec) && regno == DF_REF_REGNO (*use_rec))
17746 return true;
17747
17748 return false;
17749 }
17750
17751 /* Search backward for non-agu definition of register number REGNO1
17752 or register number REGNO2 in basic block starting from instruction
17753 START up to head of basic block or instruction INSN.
17754
17755 Function puts true value into *FOUND var if definition was found
17756 and false otherwise.
17757
17758 Distance in half-cycles between START and found instruction or head
17759 of BB is added to DISTANCE and returned. */
17760
17761 static int
17762 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
17763 rtx insn, int distance,
17764 rtx start, bool *found)
17765 {
17766 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
17767 rtx prev = start;
17768 rtx next = NULL;
17769
17770 *found = false;
17771
17772 while (prev
17773 && prev != insn
17774 && distance < LEA_SEARCH_THRESHOLD)
17775 {
17776 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
17777 {
17778 distance = increase_distance (prev, next, distance);
17779 if (insn_defines_reg (regno1, regno2, prev))
17780 {
17781 if (recog_memoized (prev) < 0
17782 || get_attr_type (prev) != TYPE_LEA)
17783 {
17784 *found = true;
17785 return distance;
17786 }
17787 }
17788
17789 next = prev;
17790 }
17791 if (prev == BB_HEAD (bb))
17792 break;
17793
17794 prev = PREV_INSN (prev);
17795 }
17796
17797 return distance;
17798 }
17799
17800 /* Search backward for non-agu definition of register number REGNO1
17801 or register number REGNO2 in INSN's basic block until
17802 1. Pass LEA_SEARCH_THRESHOLD instructions, or
17803 2. Reach neighbour BBs boundary, or
17804 3. Reach agu definition.
17805 Returns the distance between the non-agu definition point and INSN.
17806 If no definition point, returns -1. */
17807
17808 static int
17809 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
17810 rtx insn)
17811 {
17812 basic_block bb = BLOCK_FOR_INSN (insn);
17813 int distance = 0;
17814 bool found = false;
17815
17816 if (insn != BB_HEAD (bb))
17817 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
17818 distance, PREV_INSN (insn),
17819 &found);
17820
17821 if (!found && distance < LEA_SEARCH_THRESHOLD)
17822 {
17823 edge e;
17824 edge_iterator ei;
17825 bool simple_loop = false;
17826
17827 FOR_EACH_EDGE (e, ei, bb->preds)
17828 if (e->src == bb)
17829 {
17830 simple_loop = true;
17831 break;
17832 }
17833
17834 if (simple_loop)
17835 distance = distance_non_agu_define_in_bb (regno1, regno2,
17836 insn, distance,
17837 BB_END (bb), &found);
17838 else
17839 {
17840 int shortest_dist = -1;
17841 bool found_in_bb = false;
17842
17843 FOR_EACH_EDGE (e, ei, bb->preds)
17844 {
17845 int bb_dist
17846 = distance_non_agu_define_in_bb (regno1, regno2,
17847 insn, distance,
17848 BB_END (e->src),
17849 &found_in_bb);
17850 if (found_in_bb)
17851 {
17852 if (shortest_dist < 0)
17853 shortest_dist = bb_dist;
17854 else if (bb_dist > 0)
17855 shortest_dist = MIN (bb_dist, shortest_dist);
17856
17857 found = true;
17858 }
17859 }
17860
17861 distance = shortest_dist;
17862 }
17863 }
17864
17865 /* get_attr_type may modify recog data. We want to make sure
17866 that recog data is valid for instruction INSN, on which
17867 distance_non_agu_define is called. INSN is unchanged here. */
17868 extract_insn_cached (insn);
17869
17870 if (!found)
17871 return -1;
17872
17873 return distance >> 1;
17874 }
17875
17876 /* Return the distance in half-cycles between INSN and the next
17877 insn that uses register number REGNO in memory address added
17878 to DISTANCE. Return -1 if REGNO0 is set.
17879
17880 Put true value into *FOUND if register usage was found and
17881 false otherwise.
17882 Put true value into *REDEFINED if register redefinition was
17883 found and false otherwise. */
17884
17885 static int
17886 distance_agu_use_in_bb (unsigned int regno,
17887 rtx insn, int distance, rtx start,
17888 bool *found, bool *redefined)
17889 {
17890 basic_block bb = NULL;
17891 rtx next = start;
17892 rtx prev = NULL;
17893
17894 *found = false;
17895 *redefined = false;
17896
17897 if (start != NULL_RTX)
17898 {
17899 bb = BLOCK_FOR_INSN (start);
17900 if (start != BB_HEAD (bb))
17901 /* If insn and start belong to the same bb, set prev to insn,
17902 so the call to increase_distance will increase the distance
17903 between insns by 1. */
17904 prev = insn;
17905 }
17906
17907 while (next
17908 && next != insn
17909 && distance < LEA_SEARCH_THRESHOLD)
17910 {
17911 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
17912 {
17913 distance = increase_distance(prev, next, distance);
17914 if (insn_uses_reg_mem (regno, next))
17915 {
17916 /* Return DISTANCE if OP0 is used in memory
17917 address in NEXT. */
17918 *found = true;
17919 return distance;
17920 }
17921
17922 if (insn_defines_reg (regno, INVALID_REGNUM, next))
17923 {
17924 /* Return -1 if OP0 is set in NEXT. */
17925 *redefined = true;
17926 return -1;
17927 }
17928
17929 prev = next;
17930 }
17931
17932 if (next == BB_END (bb))
17933 break;
17934
17935 next = NEXT_INSN (next);
17936 }
17937
17938 return distance;
17939 }
17940
17941 /* Return the distance between INSN and the next insn that uses
17942 register number REGNO0 in memory address. Return -1 if no such
17943 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
17944
17945 static int
17946 distance_agu_use (unsigned int regno0, rtx insn)
17947 {
17948 basic_block bb = BLOCK_FOR_INSN (insn);
17949 int distance = 0;
17950 bool found = false;
17951 bool redefined = false;
17952
17953 if (insn != BB_END (bb))
17954 distance = distance_agu_use_in_bb (regno0, insn, distance,
17955 NEXT_INSN (insn),
17956 &found, &redefined);
17957
17958 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
17959 {
17960 edge e;
17961 edge_iterator ei;
17962 bool simple_loop = false;
17963
17964 FOR_EACH_EDGE (e, ei, bb->succs)
17965 if (e->dest == bb)
17966 {
17967 simple_loop = true;
17968 break;
17969 }
17970
17971 if (simple_loop)
17972 distance = distance_agu_use_in_bb (regno0, insn,
17973 distance, BB_HEAD (bb),
17974 &found, &redefined);
17975 else
17976 {
17977 int shortest_dist = -1;
17978 bool found_in_bb = false;
17979 bool redefined_in_bb = false;
17980
17981 FOR_EACH_EDGE (e, ei, bb->succs)
17982 {
17983 int bb_dist
17984 = distance_agu_use_in_bb (regno0, insn,
17985 distance, BB_HEAD (e->dest),
17986 &found_in_bb, &redefined_in_bb);
17987 if (found_in_bb)
17988 {
17989 if (shortest_dist < 0)
17990 shortest_dist = bb_dist;
17991 else if (bb_dist > 0)
17992 shortest_dist = MIN (bb_dist, shortest_dist);
17993
17994 found = true;
17995 }
17996 }
17997
17998 distance = shortest_dist;
17999 }
18000 }
18001
18002 if (!found || redefined)
18003 return -1;
18004
18005 return distance >> 1;
18006 }
18007
18008 /* Define this macro to tune LEA priority vs ADD, it take effect when
18009 there is a dilemma of choicing LEA or ADD
18010 Negative value: ADD is more preferred than LEA
18011 Zero: Netrual
18012 Positive value: LEA is more preferred than ADD*/
18013 #define IX86_LEA_PRIORITY 0
18014
18015 /* Return true if usage of lea INSN has performance advantage
18016 over a sequence of instructions. Instructions sequence has
18017 SPLIT_COST cycles higher latency than lea latency. */
18018
18019 static bool
18020 ix86_lea_outperforms (rtx insn, unsigned int regno0, unsigned int regno1,
18021 unsigned int regno2, int split_cost, bool has_scale)
18022 {
18023 int dist_define, dist_use;
18024
18025 /* For Silvermont if using a 2-source or 3-source LEA for
18026 non-destructive destination purposes, or due to wanting
18027 ability to use SCALE, the use of LEA is justified. */
18028 if (TARGET_SILVERMONT || TARGET_INTEL)
18029 {
18030 if (has_scale)
18031 return true;
18032 if (split_cost < 1)
18033 return false;
18034 if (regno0 == regno1 || regno0 == regno2)
18035 return false;
18036 return true;
18037 }
18038
18039 dist_define = distance_non_agu_define (regno1, regno2, insn);
18040 dist_use = distance_agu_use (regno0, insn);
18041
18042 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
18043 {
18044 /* If there is no non AGU operand definition, no AGU
18045 operand usage and split cost is 0 then both lea
18046 and non lea variants have same priority. Currently
18047 we prefer lea for 64 bit code and non lea on 32 bit
18048 code. */
18049 if (dist_use < 0 && split_cost == 0)
18050 return TARGET_64BIT || IX86_LEA_PRIORITY;
18051 else
18052 return true;
18053 }
18054
18055 /* With longer definitions distance lea is more preferable.
18056 Here we change it to take into account splitting cost and
18057 lea priority. */
18058 dist_define += split_cost + IX86_LEA_PRIORITY;
18059
18060 /* If there is no use in memory addess then we just check
18061 that split cost exceeds AGU stall. */
18062 if (dist_use < 0)
18063 return dist_define > LEA_MAX_STALL;
18064
18065 /* If this insn has both backward non-agu dependence and forward
18066 agu dependence, the one with short distance takes effect. */
18067 return dist_define >= dist_use;
18068 }
18069
18070 /* Return true if it is legal to clobber flags by INSN and
18071 false otherwise. */
18072
18073 static bool
18074 ix86_ok_to_clobber_flags (rtx insn)
18075 {
18076 basic_block bb = BLOCK_FOR_INSN (insn);
18077 df_ref *use;
18078 bitmap live;
18079
18080 while (insn)
18081 {
18082 if (NONDEBUG_INSN_P (insn))
18083 {
18084 for (use = DF_INSN_USES (insn); *use; use++)
18085 if (DF_REF_REG_USE_P (*use) && DF_REF_REGNO (*use) == FLAGS_REG)
18086 return false;
18087
18088 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
18089 return true;
18090 }
18091
18092 if (insn == BB_END (bb))
18093 break;
18094
18095 insn = NEXT_INSN (insn);
18096 }
18097
18098 live = df_get_live_out(bb);
18099 return !REGNO_REG_SET_P (live, FLAGS_REG);
18100 }
18101
18102 /* Return true if we need to split op0 = op1 + op2 into a sequence of
18103 move and add to avoid AGU stalls. */
18104
18105 bool
18106 ix86_avoid_lea_for_add (rtx insn, rtx operands[])
18107 {
18108 unsigned int regno0, regno1, regno2;
18109
18110 /* Check if we need to optimize. */
18111 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18112 return false;
18113
18114 /* Check it is correct to split here. */
18115 if (!ix86_ok_to_clobber_flags(insn))
18116 return false;
18117
18118 regno0 = true_regnum (operands[0]);
18119 regno1 = true_regnum (operands[1]);
18120 regno2 = true_regnum (operands[2]);
18121
18122 /* We need to split only adds with non destructive
18123 destination operand. */
18124 if (regno0 == regno1 || regno0 == regno2)
18125 return false;
18126 else
18127 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
18128 }
18129
18130 /* Return true if we should emit lea instruction instead of mov
18131 instruction. */
18132
18133 bool
18134 ix86_use_lea_for_mov (rtx insn, rtx operands[])
18135 {
18136 unsigned int regno0, regno1;
18137
18138 /* Check if we need to optimize. */
18139 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18140 return false;
18141
18142 /* Use lea for reg to reg moves only. */
18143 if (!REG_P (operands[0]) || !REG_P (operands[1]))
18144 return false;
18145
18146 regno0 = true_regnum (operands[0]);
18147 regno1 = true_regnum (operands[1]);
18148
18149 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
18150 }
18151
18152 /* Return true if we need to split lea into a sequence of
18153 instructions to avoid AGU stalls. */
18154
18155 bool
18156 ix86_avoid_lea_for_addr (rtx insn, rtx operands[])
18157 {
18158 unsigned int regno0, regno1, regno2;
18159 int split_cost;
18160 struct ix86_address parts;
18161 int ok;
18162
18163 /* Check we need to optimize. */
18164 if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
18165 return false;
18166
18167 /* The "at least two components" test below might not catch simple
18168 move or zero extension insns if parts.base is non-NULL and parts.disp
18169 is const0_rtx as the only components in the address, e.g. if the
18170 register is %rbp or %r13. As this test is much cheaper and moves or
18171 zero extensions are the common case, do this check first. */
18172 if (REG_P (operands[1])
18173 || (SImode_address_operand (operands[1], VOIDmode)
18174 && REG_P (XEXP (operands[1], 0))))
18175 return false;
18176
18177 /* Check if it is OK to split here. */
18178 if (!ix86_ok_to_clobber_flags (insn))
18179 return false;
18180
18181 ok = ix86_decompose_address (operands[1], &parts);
18182 gcc_assert (ok);
18183
18184 /* There should be at least two components in the address. */
18185 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
18186 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
18187 return false;
18188
18189 /* We should not split into add if non legitimate pic
18190 operand is used as displacement. */
18191 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
18192 return false;
18193
18194 regno0 = true_regnum (operands[0]) ;
18195 regno1 = INVALID_REGNUM;
18196 regno2 = INVALID_REGNUM;
18197
18198 if (parts.base)
18199 regno1 = true_regnum (parts.base);
18200 if (parts.index)
18201 regno2 = true_regnum (parts.index);
18202
18203 split_cost = 0;
18204
18205 /* Compute how many cycles we will add to execution time
18206 if split lea into a sequence of instructions. */
18207 if (parts.base || parts.index)
18208 {
18209 /* Have to use mov instruction if non desctructive
18210 destination form is used. */
18211 if (regno1 != regno0 && regno2 != regno0)
18212 split_cost += 1;
18213
18214 /* Have to add index to base if both exist. */
18215 if (parts.base && parts.index)
18216 split_cost += 1;
18217
18218 /* Have to use shift and adds if scale is 2 or greater. */
18219 if (parts.scale > 1)
18220 {
18221 if (regno0 != regno1)
18222 split_cost += 1;
18223 else if (regno2 == regno0)
18224 split_cost += 4;
18225 else
18226 split_cost += parts.scale;
18227 }
18228
18229 /* Have to use add instruction with immediate if
18230 disp is non zero. */
18231 if (parts.disp && parts.disp != const0_rtx)
18232 split_cost += 1;
18233
18234 /* Subtract the price of lea. */
18235 split_cost -= 1;
18236 }
18237
18238 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
18239 parts.scale > 1);
18240 }
18241
18242 /* Emit x86 binary operand CODE in mode MODE, where the first operand
18243 matches destination. RTX includes clobber of FLAGS_REG. */
18244
18245 static void
18246 ix86_emit_binop (enum rtx_code code, enum machine_mode mode,
18247 rtx dst, rtx src)
18248 {
18249 rtx op, clob;
18250
18251 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, dst, src));
18252 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18253
18254 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18255 }
18256
18257 /* Return true if regno1 def is nearest to the insn. */
18258
18259 static bool
18260 find_nearest_reg_def (rtx insn, int regno1, int regno2)
18261 {
18262 rtx prev = insn;
18263 rtx start = BB_HEAD (BLOCK_FOR_INSN (insn));
18264
18265 if (insn == start)
18266 return false;
18267 while (prev && prev != start)
18268 {
18269 if (!INSN_P (prev) || !NONDEBUG_INSN_P (prev))
18270 {
18271 prev = PREV_INSN (prev);
18272 continue;
18273 }
18274 if (insn_defines_reg (regno1, INVALID_REGNUM, prev))
18275 return true;
18276 else if (insn_defines_reg (regno2, INVALID_REGNUM, prev))
18277 return false;
18278 prev = PREV_INSN (prev);
18279 }
18280
18281 /* None of the regs is defined in the bb. */
18282 return false;
18283 }
18284
18285 /* Split lea instructions into a sequence of instructions
18286 which are executed on ALU to avoid AGU stalls.
18287 It is assumed that it is allowed to clobber flags register
18288 at lea position. */
18289
18290 void
18291 ix86_split_lea_for_addr (rtx insn, rtx operands[], enum machine_mode mode)
18292 {
18293 unsigned int regno0, regno1, regno2;
18294 struct ix86_address parts;
18295 rtx target, tmp;
18296 int ok, adds;
18297
18298 ok = ix86_decompose_address (operands[1], &parts);
18299 gcc_assert (ok);
18300
18301 target = gen_lowpart (mode, operands[0]);
18302
18303 regno0 = true_regnum (target);
18304 regno1 = INVALID_REGNUM;
18305 regno2 = INVALID_REGNUM;
18306
18307 if (parts.base)
18308 {
18309 parts.base = gen_lowpart (mode, parts.base);
18310 regno1 = true_regnum (parts.base);
18311 }
18312
18313 if (parts.index)
18314 {
18315 parts.index = gen_lowpart (mode, parts.index);
18316 regno2 = true_regnum (parts.index);
18317 }
18318
18319 if (parts.disp)
18320 parts.disp = gen_lowpart (mode, parts.disp);
18321
18322 if (parts.scale > 1)
18323 {
18324 /* Case r1 = r1 + ... */
18325 if (regno1 == regno0)
18326 {
18327 /* If we have a case r1 = r1 + C * r2 then we
18328 should use multiplication which is very
18329 expensive. Assume cost model is wrong if we
18330 have such case here. */
18331 gcc_assert (regno2 != regno0);
18332
18333 for (adds = parts.scale; adds > 0; adds--)
18334 ix86_emit_binop (PLUS, mode, target, parts.index);
18335 }
18336 else
18337 {
18338 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
18339 if (regno0 != regno2)
18340 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18341
18342 /* Use shift for scaling. */
18343 ix86_emit_binop (ASHIFT, mode, target,
18344 GEN_INT (exact_log2 (parts.scale)));
18345
18346 if (parts.base)
18347 ix86_emit_binop (PLUS, mode, target, parts.base);
18348
18349 if (parts.disp && parts.disp != const0_rtx)
18350 ix86_emit_binop (PLUS, mode, target, parts.disp);
18351 }
18352 }
18353 else if (!parts.base && !parts.index)
18354 {
18355 gcc_assert(parts.disp);
18356 emit_insn (gen_rtx_SET (VOIDmode, target, parts.disp));
18357 }
18358 else
18359 {
18360 if (!parts.base)
18361 {
18362 if (regno0 != regno2)
18363 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18364 }
18365 else if (!parts.index)
18366 {
18367 if (regno0 != regno1)
18368 emit_insn (gen_rtx_SET (VOIDmode, target, parts.base));
18369 }
18370 else
18371 {
18372 if (regno0 == regno1)
18373 tmp = parts.index;
18374 else if (regno0 == regno2)
18375 tmp = parts.base;
18376 else
18377 {
18378 rtx tmp1;
18379
18380 /* Find better operand for SET instruction, depending
18381 on which definition is farther from the insn. */
18382 if (find_nearest_reg_def (insn, regno1, regno2))
18383 tmp = parts.index, tmp1 = parts.base;
18384 else
18385 tmp = parts.base, tmp1 = parts.index;
18386
18387 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18388
18389 if (parts.disp && parts.disp != const0_rtx)
18390 ix86_emit_binop (PLUS, mode, target, parts.disp);
18391
18392 ix86_emit_binop (PLUS, mode, target, tmp1);
18393 return;
18394 }
18395
18396 ix86_emit_binop (PLUS, mode, target, tmp);
18397 }
18398
18399 if (parts.disp && parts.disp != const0_rtx)
18400 ix86_emit_binop (PLUS, mode, target, parts.disp);
18401 }
18402 }
18403
18404 /* Return true if it is ok to optimize an ADD operation to LEA
18405 operation to avoid flag register consumation. For most processors,
18406 ADD is faster than LEA. For the processors like BONNELL, if the
18407 destination register of LEA holds an actual address which will be
18408 used soon, LEA is better and otherwise ADD is better. */
18409
18410 bool
18411 ix86_lea_for_add_ok (rtx insn, rtx operands[])
18412 {
18413 unsigned int regno0 = true_regnum (operands[0]);
18414 unsigned int regno1 = true_regnum (operands[1]);
18415 unsigned int regno2 = true_regnum (operands[2]);
18416
18417 /* If a = b + c, (a!=b && a!=c), must use lea form. */
18418 if (regno0 != regno1 && regno0 != regno2)
18419 return true;
18420
18421 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18422 return false;
18423
18424 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
18425 }
18426
18427 /* Return true if destination reg of SET_BODY is shift count of
18428 USE_BODY. */
18429
18430 static bool
18431 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
18432 {
18433 rtx set_dest;
18434 rtx shift_rtx;
18435 int i;
18436
18437 /* Retrieve destination of SET_BODY. */
18438 switch (GET_CODE (set_body))
18439 {
18440 case SET:
18441 set_dest = SET_DEST (set_body);
18442 if (!set_dest || !REG_P (set_dest))
18443 return false;
18444 break;
18445 case PARALLEL:
18446 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
18447 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
18448 use_body))
18449 return true;
18450 default:
18451 return false;
18452 break;
18453 }
18454
18455 /* Retrieve shift count of USE_BODY. */
18456 switch (GET_CODE (use_body))
18457 {
18458 case SET:
18459 shift_rtx = XEXP (use_body, 1);
18460 break;
18461 case PARALLEL:
18462 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
18463 if (ix86_dep_by_shift_count_body (set_body,
18464 XVECEXP (use_body, 0, i)))
18465 return true;
18466 default:
18467 return false;
18468 break;
18469 }
18470
18471 if (shift_rtx
18472 && (GET_CODE (shift_rtx) == ASHIFT
18473 || GET_CODE (shift_rtx) == LSHIFTRT
18474 || GET_CODE (shift_rtx) == ASHIFTRT
18475 || GET_CODE (shift_rtx) == ROTATE
18476 || GET_CODE (shift_rtx) == ROTATERT))
18477 {
18478 rtx shift_count = XEXP (shift_rtx, 1);
18479
18480 /* Return true if shift count is dest of SET_BODY. */
18481 if (REG_P (shift_count))
18482 {
18483 /* Add check since it can be invoked before register
18484 allocation in pre-reload schedule. */
18485 if (reload_completed
18486 && true_regnum (set_dest) == true_regnum (shift_count))
18487 return true;
18488 else if (REGNO(set_dest) == REGNO(shift_count))
18489 return true;
18490 }
18491 }
18492
18493 return false;
18494 }
18495
18496 /* Return true if destination reg of SET_INSN is shift count of
18497 USE_INSN. */
18498
18499 bool
18500 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
18501 {
18502 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
18503 PATTERN (use_insn));
18504 }
18505
18506 /* Return TRUE or FALSE depending on whether the unary operator meets the
18507 appropriate constraints. */
18508
18509 bool
18510 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
18511 enum machine_mode mode ATTRIBUTE_UNUSED,
18512 rtx operands[2])
18513 {
18514 /* If one of operands is memory, source and destination must match. */
18515 if ((MEM_P (operands[0])
18516 || MEM_P (operands[1]))
18517 && ! rtx_equal_p (operands[0], operands[1]))
18518 return false;
18519 return true;
18520 }
18521
18522 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
18523 are ok, keeping in mind the possible movddup alternative. */
18524
18525 bool
18526 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
18527 {
18528 if (MEM_P (operands[0]))
18529 return rtx_equal_p (operands[0], operands[1 + high]);
18530 if (MEM_P (operands[1]) && MEM_P (operands[2]))
18531 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
18532 return true;
18533 }
18534
18535 /* Post-reload splitter for converting an SF or DFmode value in an
18536 SSE register into an unsigned SImode. */
18537
18538 void
18539 ix86_split_convert_uns_si_sse (rtx operands[])
18540 {
18541 enum machine_mode vecmode;
18542 rtx value, large, zero_or_two31, input, two31, x;
18543
18544 large = operands[1];
18545 zero_or_two31 = operands[2];
18546 input = operands[3];
18547 two31 = operands[4];
18548 vecmode = GET_MODE (large);
18549 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
18550
18551 /* Load up the value into the low element. We must ensure that the other
18552 elements are valid floats -- zero is the easiest such value. */
18553 if (MEM_P (input))
18554 {
18555 if (vecmode == V4SFmode)
18556 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
18557 else
18558 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
18559 }
18560 else
18561 {
18562 input = gen_rtx_REG (vecmode, REGNO (input));
18563 emit_move_insn (value, CONST0_RTX (vecmode));
18564 if (vecmode == V4SFmode)
18565 emit_insn (gen_sse_movss (value, value, input));
18566 else
18567 emit_insn (gen_sse2_movsd (value, value, input));
18568 }
18569
18570 emit_move_insn (large, two31);
18571 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
18572
18573 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
18574 emit_insn (gen_rtx_SET (VOIDmode, large, x));
18575
18576 x = gen_rtx_AND (vecmode, zero_or_two31, large);
18577 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
18578
18579 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
18580 emit_insn (gen_rtx_SET (VOIDmode, value, x));
18581
18582 large = gen_rtx_REG (V4SImode, REGNO (large));
18583 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
18584
18585 x = gen_rtx_REG (V4SImode, REGNO (value));
18586 if (vecmode == V4SFmode)
18587 emit_insn (gen_fix_truncv4sfv4si2 (x, value));
18588 else
18589 emit_insn (gen_sse2_cvttpd2dq (x, value));
18590 value = x;
18591
18592 emit_insn (gen_xorv4si3 (value, value, large));
18593 }
18594
18595 /* Convert an unsigned DImode value into a DFmode, using only SSE.
18596 Expects the 64-bit DImode to be supplied in a pair of integral
18597 registers. Requires SSE2; will use SSE3 if available. For x86_32,
18598 -mfpmath=sse, !optimize_size only. */
18599
18600 void
18601 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
18602 {
18603 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
18604 rtx int_xmm, fp_xmm;
18605 rtx biases, exponents;
18606 rtx x;
18607
18608 int_xmm = gen_reg_rtx (V4SImode);
18609 if (TARGET_INTER_UNIT_MOVES_TO_VEC)
18610 emit_insn (gen_movdi_to_sse (int_xmm, input));
18611 else if (TARGET_SSE_SPLIT_REGS)
18612 {
18613 emit_clobber (int_xmm);
18614 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
18615 }
18616 else
18617 {
18618 x = gen_reg_rtx (V2DImode);
18619 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
18620 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
18621 }
18622
18623 x = gen_rtx_CONST_VECTOR (V4SImode,
18624 gen_rtvec (4, GEN_INT (0x43300000UL),
18625 GEN_INT (0x45300000UL),
18626 const0_rtx, const0_rtx));
18627 exponents = validize_mem (force_const_mem (V4SImode, x));
18628
18629 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
18630 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
18631
18632 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
18633 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
18634 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
18635 (0x1.0p84 + double(fp_value_hi_xmm)).
18636 Note these exponents differ by 32. */
18637
18638 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
18639
18640 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
18641 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
18642 real_ldexp (&bias_lo_rvt, &dconst1, 52);
18643 real_ldexp (&bias_hi_rvt, &dconst1, 84);
18644 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
18645 x = const_double_from_real_value (bias_hi_rvt, DFmode);
18646 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
18647 biases = validize_mem (force_const_mem (V2DFmode, biases));
18648 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
18649
18650 /* Add the upper and lower DFmode values together. */
18651 if (TARGET_SSE3)
18652 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
18653 else
18654 {
18655 x = copy_to_mode_reg (V2DFmode, fp_xmm);
18656 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
18657 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
18658 }
18659
18660 ix86_expand_vector_extract (false, target, fp_xmm, 0);
18661 }
18662
18663 /* Not used, but eases macroization of patterns. */
18664 void
18665 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
18666 rtx input ATTRIBUTE_UNUSED)
18667 {
18668 gcc_unreachable ();
18669 }
18670
18671 /* Convert an unsigned SImode value into a DFmode. Only currently used
18672 for SSE, but applicable anywhere. */
18673
18674 void
18675 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
18676 {
18677 REAL_VALUE_TYPE TWO31r;
18678 rtx x, fp;
18679
18680 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
18681 NULL, 1, OPTAB_DIRECT);
18682
18683 fp = gen_reg_rtx (DFmode);
18684 emit_insn (gen_floatsidf2 (fp, x));
18685
18686 real_ldexp (&TWO31r, &dconst1, 31);
18687 x = const_double_from_real_value (TWO31r, DFmode);
18688
18689 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
18690 if (x != target)
18691 emit_move_insn (target, x);
18692 }
18693
18694 /* Convert a signed DImode value into a DFmode. Only used for SSE in
18695 32-bit mode; otherwise we have a direct convert instruction. */
18696
18697 void
18698 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
18699 {
18700 REAL_VALUE_TYPE TWO32r;
18701 rtx fp_lo, fp_hi, x;
18702
18703 fp_lo = gen_reg_rtx (DFmode);
18704 fp_hi = gen_reg_rtx (DFmode);
18705
18706 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
18707
18708 real_ldexp (&TWO32r, &dconst1, 32);
18709 x = const_double_from_real_value (TWO32r, DFmode);
18710 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
18711
18712 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
18713
18714 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
18715 0, OPTAB_DIRECT);
18716 if (x != target)
18717 emit_move_insn (target, x);
18718 }
18719
18720 /* Convert an unsigned SImode value into a SFmode, using only SSE.
18721 For x86_32, -mfpmath=sse, !optimize_size only. */
18722 void
18723 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
18724 {
18725 REAL_VALUE_TYPE ONE16r;
18726 rtx fp_hi, fp_lo, int_hi, int_lo, x;
18727
18728 real_ldexp (&ONE16r, &dconst1, 16);
18729 x = const_double_from_real_value (ONE16r, SFmode);
18730 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
18731 NULL, 0, OPTAB_DIRECT);
18732 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
18733 NULL, 0, OPTAB_DIRECT);
18734 fp_hi = gen_reg_rtx (SFmode);
18735 fp_lo = gen_reg_rtx (SFmode);
18736 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
18737 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
18738 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
18739 0, OPTAB_DIRECT);
18740 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
18741 0, OPTAB_DIRECT);
18742 if (!rtx_equal_p (target, fp_hi))
18743 emit_move_insn (target, fp_hi);
18744 }
18745
18746 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
18747 a vector of unsigned ints VAL to vector of floats TARGET. */
18748
18749 void
18750 ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
18751 {
18752 rtx tmp[8];
18753 REAL_VALUE_TYPE TWO16r;
18754 enum machine_mode intmode = GET_MODE (val);
18755 enum machine_mode fltmode = GET_MODE (target);
18756 rtx (*cvt) (rtx, rtx);
18757
18758 if (intmode == V4SImode)
18759 cvt = gen_floatv4siv4sf2;
18760 else
18761 cvt = gen_floatv8siv8sf2;
18762 tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
18763 tmp[0] = force_reg (intmode, tmp[0]);
18764 tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
18765 OPTAB_DIRECT);
18766 tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
18767 NULL_RTX, 1, OPTAB_DIRECT);
18768 tmp[3] = gen_reg_rtx (fltmode);
18769 emit_insn (cvt (tmp[3], tmp[1]));
18770 tmp[4] = gen_reg_rtx (fltmode);
18771 emit_insn (cvt (tmp[4], tmp[2]));
18772 real_ldexp (&TWO16r, &dconst1, 16);
18773 tmp[5] = const_double_from_real_value (TWO16r, SFmode);
18774 tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
18775 tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
18776 OPTAB_DIRECT);
18777 tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
18778 OPTAB_DIRECT);
18779 if (tmp[7] != target)
18780 emit_move_insn (target, tmp[7]);
18781 }
18782
18783 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
18784 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
18785 This is done by doing just signed conversion if < 0x1p31, and otherwise by
18786 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
18787
18788 rtx
18789 ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
18790 {
18791 REAL_VALUE_TYPE TWO31r;
18792 rtx two31r, tmp[4];
18793 enum machine_mode mode = GET_MODE (val);
18794 enum machine_mode scalarmode = GET_MODE_INNER (mode);
18795 enum machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
18796 rtx (*cmp) (rtx, rtx, rtx, rtx);
18797 int i;
18798
18799 for (i = 0; i < 3; i++)
18800 tmp[i] = gen_reg_rtx (mode);
18801 real_ldexp (&TWO31r, &dconst1, 31);
18802 two31r = const_double_from_real_value (TWO31r, scalarmode);
18803 two31r = ix86_build_const_vector (mode, 1, two31r);
18804 two31r = force_reg (mode, two31r);
18805 switch (mode)
18806 {
18807 case V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
18808 case V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
18809 case V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
18810 case V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
18811 default: gcc_unreachable ();
18812 }
18813 tmp[3] = gen_rtx_LE (mode, two31r, val);
18814 emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
18815 tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
18816 0, OPTAB_DIRECT);
18817 if (intmode == V4SImode || TARGET_AVX2)
18818 *xorp = expand_simple_binop (intmode, ASHIFT,
18819 gen_lowpart (intmode, tmp[0]),
18820 GEN_INT (31), NULL_RTX, 0,
18821 OPTAB_DIRECT);
18822 else
18823 {
18824 rtx two31 = GEN_INT ((unsigned HOST_WIDE_INT) 1 << 31);
18825 two31 = ix86_build_const_vector (intmode, 1, two31);
18826 *xorp = expand_simple_binop (intmode, AND,
18827 gen_lowpart (intmode, tmp[0]),
18828 two31, NULL_RTX, 0,
18829 OPTAB_DIRECT);
18830 }
18831 return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
18832 0, OPTAB_DIRECT);
18833 }
18834
18835 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
18836 then replicate the value for all elements of the vector
18837 register. */
18838
18839 rtx
18840 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
18841 {
18842 int i, n_elt;
18843 rtvec v;
18844 enum machine_mode scalar_mode;
18845
18846 switch (mode)
18847 {
18848 case V64QImode:
18849 case V32QImode:
18850 case V16QImode:
18851 case V32HImode:
18852 case V16HImode:
18853 case V8HImode:
18854 case V16SImode:
18855 case V8SImode:
18856 case V4SImode:
18857 case V8DImode:
18858 case V4DImode:
18859 case V2DImode:
18860 gcc_assert (vect);
18861 case V16SFmode:
18862 case V8SFmode:
18863 case V4SFmode:
18864 case V8DFmode:
18865 case V4DFmode:
18866 case V2DFmode:
18867 n_elt = GET_MODE_NUNITS (mode);
18868 v = rtvec_alloc (n_elt);
18869 scalar_mode = GET_MODE_INNER (mode);
18870
18871 RTVEC_ELT (v, 0) = value;
18872
18873 for (i = 1; i < n_elt; ++i)
18874 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
18875
18876 return gen_rtx_CONST_VECTOR (mode, v);
18877
18878 default:
18879 gcc_unreachable ();
18880 }
18881 }
18882
18883 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
18884 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
18885 for an SSE register. If VECT is true, then replicate the mask for
18886 all elements of the vector register. If INVERT is true, then create
18887 a mask excluding the sign bit. */
18888
18889 rtx
18890 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
18891 {
18892 enum machine_mode vec_mode, imode;
18893 HOST_WIDE_INT hi, lo;
18894 int shift = 63;
18895 rtx v;
18896 rtx mask;
18897
18898 /* Find the sign bit, sign extended to 2*HWI. */
18899 switch (mode)
18900 {
18901 case V16SImode:
18902 case V16SFmode:
18903 case V8SImode:
18904 case V4SImode:
18905 case V8SFmode:
18906 case V4SFmode:
18907 vec_mode = mode;
18908 mode = GET_MODE_INNER (mode);
18909 imode = SImode;
18910 lo = 0x80000000, hi = lo < 0;
18911 break;
18912
18913 case V8DImode:
18914 case V4DImode:
18915 case V2DImode:
18916 case V8DFmode:
18917 case V4DFmode:
18918 case V2DFmode:
18919 vec_mode = mode;
18920 mode = GET_MODE_INNER (mode);
18921 imode = DImode;
18922 if (HOST_BITS_PER_WIDE_INT >= 64)
18923 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
18924 else
18925 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
18926 break;
18927
18928 case TImode:
18929 case TFmode:
18930 vec_mode = VOIDmode;
18931 if (HOST_BITS_PER_WIDE_INT >= 64)
18932 {
18933 imode = TImode;
18934 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
18935 }
18936 else
18937 {
18938 rtvec vec;
18939
18940 imode = DImode;
18941 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
18942
18943 if (invert)
18944 {
18945 lo = ~lo, hi = ~hi;
18946 v = constm1_rtx;
18947 }
18948 else
18949 v = const0_rtx;
18950
18951 mask = immed_double_const (lo, hi, imode);
18952
18953 vec = gen_rtvec (2, v, mask);
18954 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
18955 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
18956
18957 return v;
18958 }
18959 break;
18960
18961 default:
18962 gcc_unreachable ();
18963 }
18964
18965 if (invert)
18966 lo = ~lo, hi = ~hi;
18967
18968 /* Force this value into the low part of a fp vector constant. */
18969 mask = immed_double_const (lo, hi, imode);
18970 mask = gen_lowpart (mode, mask);
18971
18972 if (vec_mode == VOIDmode)
18973 return force_reg (mode, mask);
18974
18975 v = ix86_build_const_vector (vec_mode, vect, mask);
18976 return force_reg (vec_mode, v);
18977 }
18978
18979 /* Generate code for floating point ABS or NEG. */
18980
18981 void
18982 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
18983 rtx operands[])
18984 {
18985 rtx mask, set, dst, src;
18986 bool use_sse = false;
18987 bool vector_mode = VECTOR_MODE_P (mode);
18988 enum machine_mode vmode = mode;
18989
18990 if (vector_mode)
18991 use_sse = true;
18992 else if (mode == TFmode)
18993 use_sse = true;
18994 else if (TARGET_SSE_MATH)
18995 {
18996 use_sse = SSE_FLOAT_MODE_P (mode);
18997 if (mode == SFmode)
18998 vmode = V4SFmode;
18999 else if (mode == DFmode)
19000 vmode = V2DFmode;
19001 }
19002
19003 /* NEG and ABS performed with SSE use bitwise mask operations.
19004 Create the appropriate mask now. */
19005 if (use_sse)
19006 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
19007 else
19008 mask = NULL_RTX;
19009
19010 dst = operands[0];
19011 src = operands[1];
19012
19013 set = gen_rtx_fmt_e (code, mode, src);
19014 set = gen_rtx_SET (VOIDmode, dst, set);
19015
19016 if (mask)
19017 {
19018 rtx use, clob;
19019 rtvec par;
19020
19021 use = gen_rtx_USE (VOIDmode, mask);
19022 if (vector_mode)
19023 par = gen_rtvec (2, set, use);
19024 else
19025 {
19026 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
19027 par = gen_rtvec (3, set, use, clob);
19028 }
19029 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
19030 }
19031 else
19032 emit_insn (set);
19033 }
19034
19035 /* Expand a copysign operation. Special case operand 0 being a constant. */
19036
19037 void
19038 ix86_expand_copysign (rtx operands[])
19039 {
19040 enum machine_mode mode, vmode;
19041 rtx dest, op0, op1, mask, nmask;
19042
19043 dest = operands[0];
19044 op0 = operands[1];
19045 op1 = operands[2];
19046
19047 mode = GET_MODE (dest);
19048
19049 if (mode == SFmode)
19050 vmode = V4SFmode;
19051 else if (mode == DFmode)
19052 vmode = V2DFmode;
19053 else
19054 vmode = mode;
19055
19056 if (GET_CODE (op0) == CONST_DOUBLE)
19057 {
19058 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
19059
19060 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
19061 op0 = simplify_unary_operation (ABS, mode, op0, mode);
19062
19063 if (mode == SFmode || mode == DFmode)
19064 {
19065 if (op0 == CONST0_RTX (mode))
19066 op0 = CONST0_RTX (vmode);
19067 else
19068 {
19069 rtx v = ix86_build_const_vector (vmode, false, op0);
19070
19071 op0 = force_reg (vmode, v);
19072 }
19073 }
19074 else if (op0 != CONST0_RTX (mode))
19075 op0 = force_reg (mode, op0);
19076
19077 mask = ix86_build_signbit_mask (vmode, 0, 0);
19078
19079 if (mode == SFmode)
19080 copysign_insn = gen_copysignsf3_const;
19081 else if (mode == DFmode)
19082 copysign_insn = gen_copysigndf3_const;
19083 else
19084 copysign_insn = gen_copysigntf3_const;
19085
19086 emit_insn (copysign_insn (dest, op0, op1, mask));
19087 }
19088 else
19089 {
19090 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
19091
19092 nmask = ix86_build_signbit_mask (vmode, 0, 1);
19093 mask = ix86_build_signbit_mask (vmode, 0, 0);
19094
19095 if (mode == SFmode)
19096 copysign_insn = gen_copysignsf3_var;
19097 else if (mode == DFmode)
19098 copysign_insn = gen_copysigndf3_var;
19099 else
19100 copysign_insn = gen_copysigntf3_var;
19101
19102 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
19103 }
19104 }
19105
19106 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
19107 be a constant, and so has already been expanded into a vector constant. */
19108
19109 void
19110 ix86_split_copysign_const (rtx operands[])
19111 {
19112 enum machine_mode mode, vmode;
19113 rtx dest, op0, mask, x;
19114
19115 dest = operands[0];
19116 op0 = operands[1];
19117 mask = operands[3];
19118
19119 mode = GET_MODE (dest);
19120 vmode = GET_MODE (mask);
19121
19122 dest = simplify_gen_subreg (vmode, dest, mode, 0);
19123 x = gen_rtx_AND (vmode, dest, mask);
19124 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19125
19126 if (op0 != CONST0_RTX (vmode))
19127 {
19128 x = gen_rtx_IOR (vmode, dest, op0);
19129 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19130 }
19131 }
19132
19133 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
19134 so we have to do two masks. */
19135
19136 void
19137 ix86_split_copysign_var (rtx operands[])
19138 {
19139 enum machine_mode mode, vmode;
19140 rtx dest, scratch, op0, op1, mask, nmask, x;
19141
19142 dest = operands[0];
19143 scratch = operands[1];
19144 op0 = operands[2];
19145 op1 = operands[3];
19146 nmask = operands[4];
19147 mask = operands[5];
19148
19149 mode = GET_MODE (dest);
19150 vmode = GET_MODE (mask);
19151
19152 if (rtx_equal_p (op0, op1))
19153 {
19154 /* Shouldn't happen often (it's useless, obviously), but when it does
19155 we'd generate incorrect code if we continue below. */
19156 emit_move_insn (dest, op0);
19157 return;
19158 }
19159
19160 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
19161 {
19162 gcc_assert (REGNO (op1) == REGNO (scratch));
19163
19164 x = gen_rtx_AND (vmode, scratch, mask);
19165 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19166
19167 dest = mask;
19168 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19169 x = gen_rtx_NOT (vmode, dest);
19170 x = gen_rtx_AND (vmode, x, op0);
19171 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19172 }
19173 else
19174 {
19175 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
19176 {
19177 x = gen_rtx_AND (vmode, scratch, mask);
19178 }
19179 else /* alternative 2,4 */
19180 {
19181 gcc_assert (REGNO (mask) == REGNO (scratch));
19182 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
19183 x = gen_rtx_AND (vmode, scratch, op1);
19184 }
19185 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19186
19187 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
19188 {
19189 dest = simplify_gen_subreg (vmode, op0, mode, 0);
19190 x = gen_rtx_AND (vmode, dest, nmask);
19191 }
19192 else /* alternative 3,4 */
19193 {
19194 gcc_assert (REGNO (nmask) == REGNO (dest));
19195 dest = nmask;
19196 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19197 x = gen_rtx_AND (vmode, dest, op0);
19198 }
19199 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19200 }
19201
19202 x = gen_rtx_IOR (vmode, dest, scratch);
19203 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19204 }
19205
19206 /* Return TRUE or FALSE depending on whether the first SET in INSN
19207 has source and destination with matching CC modes, and that the
19208 CC mode is at least as constrained as REQ_MODE. */
19209
19210 bool
19211 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
19212 {
19213 rtx set;
19214 enum machine_mode set_mode;
19215
19216 set = PATTERN (insn);
19217 if (GET_CODE (set) == PARALLEL)
19218 set = XVECEXP (set, 0, 0);
19219 gcc_assert (GET_CODE (set) == SET);
19220 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
19221
19222 set_mode = GET_MODE (SET_DEST (set));
19223 switch (set_mode)
19224 {
19225 case CCNOmode:
19226 if (req_mode != CCNOmode
19227 && (req_mode != CCmode
19228 || XEXP (SET_SRC (set), 1) != const0_rtx))
19229 return false;
19230 break;
19231 case CCmode:
19232 if (req_mode == CCGCmode)
19233 return false;
19234 /* FALLTHRU */
19235 case CCGCmode:
19236 if (req_mode == CCGOCmode || req_mode == CCNOmode)
19237 return false;
19238 /* FALLTHRU */
19239 case CCGOCmode:
19240 if (req_mode == CCZmode)
19241 return false;
19242 /* FALLTHRU */
19243 case CCZmode:
19244 break;
19245
19246 case CCAmode:
19247 case CCCmode:
19248 case CCOmode:
19249 case CCSmode:
19250 if (set_mode != req_mode)
19251 return false;
19252 break;
19253
19254 default:
19255 gcc_unreachable ();
19256 }
19257
19258 return GET_MODE (SET_SRC (set)) == set_mode;
19259 }
19260
19261 /* Generate insn patterns to do an integer compare of OPERANDS. */
19262
19263 static rtx
19264 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
19265 {
19266 enum machine_mode cmpmode;
19267 rtx tmp, flags;
19268
19269 cmpmode = SELECT_CC_MODE (code, op0, op1);
19270 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
19271
19272 /* This is very simple, but making the interface the same as in the
19273 FP case makes the rest of the code easier. */
19274 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
19275 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
19276
19277 /* Return the test that should be put into the flags user, i.e.
19278 the bcc, scc, or cmov instruction. */
19279 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
19280 }
19281
19282 /* Figure out whether to use ordered or unordered fp comparisons.
19283 Return the appropriate mode to use. */
19284
19285 enum machine_mode
19286 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
19287 {
19288 /* ??? In order to make all comparisons reversible, we do all comparisons
19289 non-trapping when compiling for IEEE. Once gcc is able to distinguish
19290 all forms trapping and nontrapping comparisons, we can make inequality
19291 comparisons trapping again, since it results in better code when using
19292 FCOM based compares. */
19293 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
19294 }
19295
19296 enum machine_mode
19297 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
19298 {
19299 enum machine_mode mode = GET_MODE (op0);
19300
19301 if (SCALAR_FLOAT_MODE_P (mode))
19302 {
19303 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
19304 return ix86_fp_compare_mode (code);
19305 }
19306
19307 switch (code)
19308 {
19309 /* Only zero flag is needed. */
19310 case EQ: /* ZF=0 */
19311 case NE: /* ZF!=0 */
19312 return CCZmode;
19313 /* Codes needing carry flag. */
19314 case GEU: /* CF=0 */
19315 case LTU: /* CF=1 */
19316 /* Detect overflow checks. They need just the carry flag. */
19317 if (GET_CODE (op0) == PLUS
19318 && rtx_equal_p (op1, XEXP (op0, 0)))
19319 return CCCmode;
19320 else
19321 return CCmode;
19322 case GTU: /* CF=0 & ZF=0 */
19323 case LEU: /* CF=1 | ZF=1 */
19324 return CCmode;
19325 /* Codes possibly doable only with sign flag when
19326 comparing against zero. */
19327 case GE: /* SF=OF or SF=0 */
19328 case LT: /* SF<>OF or SF=1 */
19329 if (op1 == const0_rtx)
19330 return CCGOCmode;
19331 else
19332 /* For other cases Carry flag is not required. */
19333 return CCGCmode;
19334 /* Codes doable only with sign flag when comparing
19335 against zero, but we miss jump instruction for it
19336 so we need to use relational tests against overflow
19337 that thus needs to be zero. */
19338 case GT: /* ZF=0 & SF=OF */
19339 case LE: /* ZF=1 | SF<>OF */
19340 if (op1 == const0_rtx)
19341 return CCNOmode;
19342 else
19343 return CCGCmode;
19344 /* strcmp pattern do (use flags) and combine may ask us for proper
19345 mode. */
19346 case USE:
19347 return CCmode;
19348 default:
19349 gcc_unreachable ();
19350 }
19351 }
19352
19353 /* Return the fixed registers used for condition codes. */
19354
19355 static bool
19356 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
19357 {
19358 *p1 = FLAGS_REG;
19359 *p2 = FPSR_REG;
19360 return true;
19361 }
19362
19363 /* If two condition code modes are compatible, return a condition code
19364 mode which is compatible with both. Otherwise, return
19365 VOIDmode. */
19366
19367 static enum machine_mode
19368 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
19369 {
19370 if (m1 == m2)
19371 return m1;
19372
19373 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
19374 return VOIDmode;
19375
19376 if ((m1 == CCGCmode && m2 == CCGOCmode)
19377 || (m1 == CCGOCmode && m2 == CCGCmode))
19378 return CCGCmode;
19379
19380 if (m1 == CCZmode && (m2 == CCGCmode || m2 == CCGOCmode))
19381 return m2;
19382 else if (m2 == CCZmode && (m1 == CCGCmode || m1 == CCGOCmode))
19383 return m1;
19384
19385 switch (m1)
19386 {
19387 default:
19388 gcc_unreachable ();
19389
19390 case CCmode:
19391 case CCGCmode:
19392 case CCGOCmode:
19393 case CCNOmode:
19394 case CCAmode:
19395 case CCCmode:
19396 case CCOmode:
19397 case CCSmode:
19398 case CCZmode:
19399 switch (m2)
19400 {
19401 default:
19402 return VOIDmode;
19403
19404 case CCmode:
19405 case CCGCmode:
19406 case CCGOCmode:
19407 case CCNOmode:
19408 case CCAmode:
19409 case CCCmode:
19410 case CCOmode:
19411 case CCSmode:
19412 case CCZmode:
19413 return CCmode;
19414 }
19415
19416 case CCFPmode:
19417 case CCFPUmode:
19418 /* These are only compatible with themselves, which we already
19419 checked above. */
19420 return VOIDmode;
19421 }
19422 }
19423
19424
19425 /* Return a comparison we can do and that it is equivalent to
19426 swap_condition (code) apart possibly from orderedness.
19427 But, never change orderedness if TARGET_IEEE_FP, returning
19428 UNKNOWN in that case if necessary. */
19429
19430 static enum rtx_code
19431 ix86_fp_swap_condition (enum rtx_code code)
19432 {
19433 switch (code)
19434 {
19435 case GT: /* GTU - CF=0 & ZF=0 */
19436 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
19437 case GE: /* GEU - CF=0 */
19438 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
19439 case UNLT: /* LTU - CF=1 */
19440 return TARGET_IEEE_FP ? UNKNOWN : GT;
19441 case UNLE: /* LEU - CF=1 | ZF=1 */
19442 return TARGET_IEEE_FP ? UNKNOWN : GE;
19443 default:
19444 return swap_condition (code);
19445 }
19446 }
19447
19448 /* Return cost of comparison CODE using the best strategy for performance.
19449 All following functions do use number of instructions as a cost metrics.
19450 In future this should be tweaked to compute bytes for optimize_size and
19451 take into account performance of various instructions on various CPUs. */
19452
19453 static int
19454 ix86_fp_comparison_cost (enum rtx_code code)
19455 {
19456 int arith_cost;
19457
19458 /* The cost of code using bit-twiddling on %ah. */
19459 switch (code)
19460 {
19461 case UNLE:
19462 case UNLT:
19463 case LTGT:
19464 case GT:
19465 case GE:
19466 case UNORDERED:
19467 case ORDERED:
19468 case UNEQ:
19469 arith_cost = 4;
19470 break;
19471 case LT:
19472 case NE:
19473 case EQ:
19474 case UNGE:
19475 arith_cost = TARGET_IEEE_FP ? 5 : 4;
19476 break;
19477 case LE:
19478 case UNGT:
19479 arith_cost = TARGET_IEEE_FP ? 6 : 4;
19480 break;
19481 default:
19482 gcc_unreachable ();
19483 }
19484
19485 switch (ix86_fp_comparison_strategy (code))
19486 {
19487 case IX86_FPCMP_COMI:
19488 return arith_cost > 4 ? 3 : 2;
19489 case IX86_FPCMP_SAHF:
19490 return arith_cost > 4 ? 4 : 3;
19491 default:
19492 return arith_cost;
19493 }
19494 }
19495
19496 /* Return strategy to use for floating-point. We assume that fcomi is always
19497 preferrable where available, since that is also true when looking at size
19498 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
19499
19500 enum ix86_fpcmp_strategy
19501 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED)
19502 {
19503 /* Do fcomi/sahf based test when profitable. */
19504
19505 if (TARGET_CMOVE)
19506 return IX86_FPCMP_COMI;
19507
19508 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
19509 return IX86_FPCMP_SAHF;
19510
19511 return IX86_FPCMP_ARITH;
19512 }
19513
19514 /* Swap, force into registers, or otherwise massage the two operands
19515 to a fp comparison. The operands are updated in place; the new
19516 comparison code is returned. */
19517
19518 static enum rtx_code
19519 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
19520 {
19521 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
19522 rtx op0 = *pop0, op1 = *pop1;
19523 enum machine_mode op_mode = GET_MODE (op0);
19524 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
19525
19526 /* All of the unordered compare instructions only work on registers.
19527 The same is true of the fcomi compare instructions. The XFmode
19528 compare instructions require registers except when comparing
19529 against zero or when converting operand 1 from fixed point to
19530 floating point. */
19531
19532 if (!is_sse
19533 && (fpcmp_mode == CCFPUmode
19534 || (op_mode == XFmode
19535 && ! (standard_80387_constant_p (op0) == 1
19536 || standard_80387_constant_p (op1) == 1)
19537 && GET_CODE (op1) != FLOAT)
19538 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
19539 {
19540 op0 = force_reg (op_mode, op0);
19541 op1 = force_reg (op_mode, op1);
19542 }
19543 else
19544 {
19545 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
19546 things around if they appear profitable, otherwise force op0
19547 into a register. */
19548
19549 if (standard_80387_constant_p (op0) == 0
19550 || (MEM_P (op0)
19551 && ! (standard_80387_constant_p (op1) == 0
19552 || MEM_P (op1))))
19553 {
19554 enum rtx_code new_code = ix86_fp_swap_condition (code);
19555 if (new_code != UNKNOWN)
19556 {
19557 rtx tmp;
19558 tmp = op0, op0 = op1, op1 = tmp;
19559 code = new_code;
19560 }
19561 }
19562
19563 if (!REG_P (op0))
19564 op0 = force_reg (op_mode, op0);
19565
19566 if (CONSTANT_P (op1))
19567 {
19568 int tmp = standard_80387_constant_p (op1);
19569 if (tmp == 0)
19570 op1 = validize_mem (force_const_mem (op_mode, op1));
19571 else if (tmp == 1)
19572 {
19573 if (TARGET_CMOVE)
19574 op1 = force_reg (op_mode, op1);
19575 }
19576 else
19577 op1 = force_reg (op_mode, op1);
19578 }
19579 }
19580
19581 /* Try to rearrange the comparison to make it cheaper. */
19582 if (ix86_fp_comparison_cost (code)
19583 > ix86_fp_comparison_cost (swap_condition (code))
19584 && (REG_P (op1) || can_create_pseudo_p ()))
19585 {
19586 rtx tmp;
19587 tmp = op0, op0 = op1, op1 = tmp;
19588 code = swap_condition (code);
19589 if (!REG_P (op0))
19590 op0 = force_reg (op_mode, op0);
19591 }
19592
19593 *pop0 = op0;
19594 *pop1 = op1;
19595 return code;
19596 }
19597
19598 /* Convert comparison codes we use to represent FP comparison to integer
19599 code that will result in proper branch. Return UNKNOWN if no such code
19600 is available. */
19601
19602 enum rtx_code
19603 ix86_fp_compare_code_to_integer (enum rtx_code code)
19604 {
19605 switch (code)
19606 {
19607 case GT:
19608 return GTU;
19609 case GE:
19610 return GEU;
19611 case ORDERED:
19612 case UNORDERED:
19613 return code;
19614 break;
19615 case UNEQ:
19616 return EQ;
19617 break;
19618 case UNLT:
19619 return LTU;
19620 break;
19621 case UNLE:
19622 return LEU;
19623 break;
19624 case LTGT:
19625 return NE;
19626 break;
19627 default:
19628 return UNKNOWN;
19629 }
19630 }
19631
19632 /* Generate insn patterns to do a floating point compare of OPERANDS. */
19633
19634 static rtx
19635 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
19636 {
19637 enum machine_mode fpcmp_mode, intcmp_mode;
19638 rtx tmp, tmp2;
19639
19640 fpcmp_mode = ix86_fp_compare_mode (code);
19641 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
19642
19643 /* Do fcomi/sahf based test when profitable. */
19644 switch (ix86_fp_comparison_strategy (code))
19645 {
19646 case IX86_FPCMP_COMI:
19647 intcmp_mode = fpcmp_mode;
19648 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19649 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
19650 tmp);
19651 emit_insn (tmp);
19652 break;
19653
19654 case IX86_FPCMP_SAHF:
19655 intcmp_mode = fpcmp_mode;
19656 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19657 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
19658 tmp);
19659
19660 if (!scratch)
19661 scratch = gen_reg_rtx (HImode);
19662 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
19663 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
19664 break;
19665
19666 case IX86_FPCMP_ARITH:
19667 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
19668 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19669 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
19670 if (!scratch)
19671 scratch = gen_reg_rtx (HImode);
19672 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
19673
19674 /* In the unordered case, we have to check C2 for NaN's, which
19675 doesn't happen to work out to anything nice combination-wise.
19676 So do some bit twiddling on the value we've got in AH to come
19677 up with an appropriate set of condition codes. */
19678
19679 intcmp_mode = CCNOmode;
19680 switch (code)
19681 {
19682 case GT:
19683 case UNGT:
19684 if (code == GT || !TARGET_IEEE_FP)
19685 {
19686 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
19687 code = EQ;
19688 }
19689 else
19690 {
19691 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19692 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
19693 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
19694 intcmp_mode = CCmode;
19695 code = GEU;
19696 }
19697 break;
19698 case LT:
19699 case UNLT:
19700 if (code == LT && TARGET_IEEE_FP)
19701 {
19702 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19703 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
19704 intcmp_mode = CCmode;
19705 code = EQ;
19706 }
19707 else
19708 {
19709 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
19710 code = NE;
19711 }
19712 break;
19713 case GE:
19714 case UNGE:
19715 if (code == GE || !TARGET_IEEE_FP)
19716 {
19717 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
19718 code = EQ;
19719 }
19720 else
19721 {
19722 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19723 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
19724 code = NE;
19725 }
19726 break;
19727 case LE:
19728 case UNLE:
19729 if (code == LE && TARGET_IEEE_FP)
19730 {
19731 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19732 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
19733 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
19734 intcmp_mode = CCmode;
19735 code = LTU;
19736 }
19737 else
19738 {
19739 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
19740 code = NE;
19741 }
19742 break;
19743 case EQ:
19744 case UNEQ:
19745 if (code == EQ && TARGET_IEEE_FP)
19746 {
19747 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19748 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
19749 intcmp_mode = CCmode;
19750 code = EQ;
19751 }
19752 else
19753 {
19754 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
19755 code = NE;
19756 }
19757 break;
19758 case NE:
19759 case LTGT:
19760 if (code == NE && TARGET_IEEE_FP)
19761 {
19762 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19763 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
19764 GEN_INT (0x40)));
19765 code = NE;
19766 }
19767 else
19768 {
19769 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
19770 code = EQ;
19771 }
19772 break;
19773
19774 case UNORDERED:
19775 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
19776 code = NE;
19777 break;
19778 case ORDERED:
19779 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
19780 code = EQ;
19781 break;
19782
19783 default:
19784 gcc_unreachable ();
19785 }
19786 break;
19787
19788 default:
19789 gcc_unreachable();
19790 }
19791
19792 /* Return the test that should be put into the flags user, i.e.
19793 the bcc, scc, or cmov instruction. */
19794 return gen_rtx_fmt_ee (code, VOIDmode,
19795 gen_rtx_REG (intcmp_mode, FLAGS_REG),
19796 const0_rtx);
19797 }
19798
19799 static rtx
19800 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
19801 {
19802 rtx ret;
19803
19804 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
19805 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
19806
19807 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
19808 {
19809 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
19810 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
19811 }
19812 else
19813 ret = ix86_expand_int_compare (code, op0, op1);
19814
19815 return ret;
19816 }
19817
19818 void
19819 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
19820 {
19821 enum machine_mode mode = GET_MODE (op0);
19822 rtx tmp;
19823
19824 switch (mode)
19825 {
19826 case SFmode:
19827 case DFmode:
19828 case XFmode:
19829 case QImode:
19830 case HImode:
19831 case SImode:
19832 simple:
19833 tmp = ix86_expand_compare (code, op0, op1);
19834 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
19835 gen_rtx_LABEL_REF (VOIDmode, label),
19836 pc_rtx);
19837 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
19838 return;
19839
19840 case DImode:
19841 if (TARGET_64BIT)
19842 goto simple;
19843 case TImode:
19844 /* Expand DImode branch into multiple compare+branch. */
19845 {
19846 rtx lo[2], hi[2], label2;
19847 enum rtx_code code1, code2, code3;
19848 enum machine_mode submode;
19849
19850 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
19851 {
19852 tmp = op0, op0 = op1, op1 = tmp;
19853 code = swap_condition (code);
19854 }
19855
19856 split_double_mode (mode, &op0, 1, lo+0, hi+0);
19857 split_double_mode (mode, &op1, 1, lo+1, hi+1);
19858
19859 submode = mode == DImode ? SImode : DImode;
19860
19861 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
19862 avoid two branches. This costs one extra insn, so disable when
19863 optimizing for size. */
19864
19865 if ((code == EQ || code == NE)
19866 && (!optimize_insn_for_size_p ()
19867 || hi[1] == const0_rtx || lo[1] == const0_rtx))
19868 {
19869 rtx xor0, xor1;
19870
19871 xor1 = hi[0];
19872 if (hi[1] != const0_rtx)
19873 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
19874 NULL_RTX, 0, OPTAB_WIDEN);
19875
19876 xor0 = lo[0];
19877 if (lo[1] != const0_rtx)
19878 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
19879 NULL_RTX, 0, OPTAB_WIDEN);
19880
19881 tmp = expand_binop (submode, ior_optab, xor1, xor0,
19882 NULL_RTX, 0, OPTAB_WIDEN);
19883
19884 ix86_expand_branch (code, tmp, const0_rtx, label);
19885 return;
19886 }
19887
19888 /* Otherwise, if we are doing less-than or greater-or-equal-than,
19889 op1 is a constant and the low word is zero, then we can just
19890 examine the high word. Similarly for low word -1 and
19891 less-or-equal-than or greater-than. */
19892
19893 if (CONST_INT_P (hi[1]))
19894 switch (code)
19895 {
19896 case LT: case LTU: case GE: case GEU:
19897 if (lo[1] == const0_rtx)
19898 {
19899 ix86_expand_branch (code, hi[0], hi[1], label);
19900 return;
19901 }
19902 break;
19903 case LE: case LEU: case GT: case GTU:
19904 if (lo[1] == constm1_rtx)
19905 {
19906 ix86_expand_branch (code, hi[0], hi[1], label);
19907 return;
19908 }
19909 break;
19910 default:
19911 break;
19912 }
19913
19914 /* Otherwise, we need two or three jumps. */
19915
19916 label2 = gen_label_rtx ();
19917
19918 code1 = code;
19919 code2 = swap_condition (code);
19920 code3 = unsigned_condition (code);
19921
19922 switch (code)
19923 {
19924 case LT: case GT: case LTU: case GTU:
19925 break;
19926
19927 case LE: code1 = LT; code2 = GT; break;
19928 case GE: code1 = GT; code2 = LT; break;
19929 case LEU: code1 = LTU; code2 = GTU; break;
19930 case GEU: code1 = GTU; code2 = LTU; break;
19931
19932 case EQ: code1 = UNKNOWN; code2 = NE; break;
19933 case NE: code2 = UNKNOWN; break;
19934
19935 default:
19936 gcc_unreachable ();
19937 }
19938
19939 /*
19940 * a < b =>
19941 * if (hi(a) < hi(b)) goto true;
19942 * if (hi(a) > hi(b)) goto false;
19943 * if (lo(a) < lo(b)) goto true;
19944 * false:
19945 */
19946
19947 if (code1 != UNKNOWN)
19948 ix86_expand_branch (code1, hi[0], hi[1], label);
19949 if (code2 != UNKNOWN)
19950 ix86_expand_branch (code2, hi[0], hi[1], label2);
19951
19952 ix86_expand_branch (code3, lo[0], lo[1], label);
19953
19954 if (code2 != UNKNOWN)
19955 emit_label (label2);
19956 return;
19957 }
19958
19959 default:
19960 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
19961 goto simple;
19962 }
19963 }
19964
19965 /* Split branch based on floating point condition. */
19966 void
19967 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
19968 rtx target1, rtx target2, rtx tmp, rtx pushed)
19969 {
19970 rtx condition;
19971 rtx i;
19972
19973 if (target2 != pc_rtx)
19974 {
19975 rtx tmp = target2;
19976 code = reverse_condition_maybe_unordered (code);
19977 target2 = target1;
19978 target1 = tmp;
19979 }
19980
19981 condition = ix86_expand_fp_compare (code, op1, op2,
19982 tmp);
19983
19984 /* Remove pushed operand from stack. */
19985 if (pushed)
19986 ix86_free_from_memory (GET_MODE (pushed));
19987
19988 i = emit_jump_insn (gen_rtx_SET
19989 (VOIDmode, pc_rtx,
19990 gen_rtx_IF_THEN_ELSE (VOIDmode,
19991 condition, target1, target2)));
19992 if (split_branch_probability >= 0)
19993 add_int_reg_note (i, REG_BR_PROB, split_branch_probability);
19994 }
19995
19996 void
19997 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
19998 {
19999 rtx ret;
20000
20001 gcc_assert (GET_MODE (dest) == QImode);
20002
20003 ret = ix86_expand_compare (code, op0, op1);
20004 PUT_MODE (ret, QImode);
20005 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
20006 }
20007
20008 /* Expand comparison setting or clearing carry flag. Return true when
20009 successful and set pop for the operation. */
20010 static bool
20011 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
20012 {
20013 enum machine_mode mode =
20014 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
20015
20016 /* Do not handle double-mode compares that go through special path. */
20017 if (mode == (TARGET_64BIT ? TImode : DImode))
20018 return false;
20019
20020 if (SCALAR_FLOAT_MODE_P (mode))
20021 {
20022 rtx compare_op, compare_seq;
20023
20024 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
20025
20026 /* Shortcut: following common codes never translate
20027 into carry flag compares. */
20028 if (code == EQ || code == NE || code == UNEQ || code == LTGT
20029 || code == ORDERED || code == UNORDERED)
20030 return false;
20031
20032 /* These comparisons require zero flag; swap operands so they won't. */
20033 if ((code == GT || code == UNLE || code == LE || code == UNGT)
20034 && !TARGET_IEEE_FP)
20035 {
20036 rtx tmp = op0;
20037 op0 = op1;
20038 op1 = tmp;
20039 code = swap_condition (code);
20040 }
20041
20042 /* Try to expand the comparison and verify that we end up with
20043 carry flag based comparison. This fails to be true only when
20044 we decide to expand comparison using arithmetic that is not
20045 too common scenario. */
20046 start_sequence ();
20047 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
20048 compare_seq = get_insns ();
20049 end_sequence ();
20050
20051 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
20052 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
20053 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
20054 else
20055 code = GET_CODE (compare_op);
20056
20057 if (code != LTU && code != GEU)
20058 return false;
20059
20060 emit_insn (compare_seq);
20061 *pop = compare_op;
20062 return true;
20063 }
20064
20065 if (!INTEGRAL_MODE_P (mode))
20066 return false;
20067
20068 switch (code)
20069 {
20070 case LTU:
20071 case GEU:
20072 break;
20073
20074 /* Convert a==0 into (unsigned)a<1. */
20075 case EQ:
20076 case NE:
20077 if (op1 != const0_rtx)
20078 return false;
20079 op1 = const1_rtx;
20080 code = (code == EQ ? LTU : GEU);
20081 break;
20082
20083 /* Convert a>b into b<a or a>=b-1. */
20084 case GTU:
20085 case LEU:
20086 if (CONST_INT_P (op1))
20087 {
20088 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
20089 /* Bail out on overflow. We still can swap operands but that
20090 would force loading of the constant into register. */
20091 if (op1 == const0_rtx
20092 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
20093 return false;
20094 code = (code == GTU ? GEU : LTU);
20095 }
20096 else
20097 {
20098 rtx tmp = op1;
20099 op1 = op0;
20100 op0 = tmp;
20101 code = (code == GTU ? LTU : GEU);
20102 }
20103 break;
20104
20105 /* Convert a>=0 into (unsigned)a<0x80000000. */
20106 case LT:
20107 case GE:
20108 if (mode == DImode || op1 != const0_rtx)
20109 return false;
20110 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20111 code = (code == LT ? GEU : LTU);
20112 break;
20113 case LE:
20114 case GT:
20115 if (mode == DImode || op1 != constm1_rtx)
20116 return false;
20117 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20118 code = (code == LE ? GEU : LTU);
20119 break;
20120
20121 default:
20122 return false;
20123 }
20124 /* Swapping operands may cause constant to appear as first operand. */
20125 if (!nonimmediate_operand (op0, VOIDmode))
20126 {
20127 if (!can_create_pseudo_p ())
20128 return false;
20129 op0 = force_reg (mode, op0);
20130 }
20131 *pop = ix86_expand_compare (code, op0, op1);
20132 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
20133 return true;
20134 }
20135
20136 bool
20137 ix86_expand_int_movcc (rtx operands[])
20138 {
20139 enum rtx_code code = GET_CODE (operands[1]), compare_code;
20140 rtx compare_seq, compare_op;
20141 enum machine_mode mode = GET_MODE (operands[0]);
20142 bool sign_bit_compare_p = false;
20143 rtx op0 = XEXP (operands[1], 0);
20144 rtx op1 = XEXP (operands[1], 1);
20145
20146 if (GET_MODE (op0) == TImode
20147 || (GET_MODE (op0) == DImode
20148 && !TARGET_64BIT))
20149 return false;
20150
20151 start_sequence ();
20152 compare_op = ix86_expand_compare (code, op0, op1);
20153 compare_seq = get_insns ();
20154 end_sequence ();
20155
20156 compare_code = GET_CODE (compare_op);
20157
20158 if ((op1 == const0_rtx && (code == GE || code == LT))
20159 || (op1 == constm1_rtx && (code == GT || code == LE)))
20160 sign_bit_compare_p = true;
20161
20162 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
20163 HImode insns, we'd be swallowed in word prefix ops. */
20164
20165 if ((mode != HImode || TARGET_FAST_PREFIX)
20166 && (mode != (TARGET_64BIT ? TImode : DImode))
20167 && CONST_INT_P (operands[2])
20168 && CONST_INT_P (operands[3]))
20169 {
20170 rtx out = operands[0];
20171 HOST_WIDE_INT ct = INTVAL (operands[2]);
20172 HOST_WIDE_INT cf = INTVAL (operands[3]);
20173 HOST_WIDE_INT diff;
20174
20175 diff = ct - cf;
20176 /* Sign bit compares are better done using shifts than we do by using
20177 sbb. */
20178 if (sign_bit_compare_p
20179 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
20180 {
20181 /* Detect overlap between destination and compare sources. */
20182 rtx tmp = out;
20183
20184 if (!sign_bit_compare_p)
20185 {
20186 rtx flags;
20187 bool fpcmp = false;
20188
20189 compare_code = GET_CODE (compare_op);
20190
20191 flags = XEXP (compare_op, 0);
20192
20193 if (GET_MODE (flags) == CCFPmode
20194 || GET_MODE (flags) == CCFPUmode)
20195 {
20196 fpcmp = true;
20197 compare_code
20198 = ix86_fp_compare_code_to_integer (compare_code);
20199 }
20200
20201 /* To simplify rest of code, restrict to the GEU case. */
20202 if (compare_code == LTU)
20203 {
20204 HOST_WIDE_INT tmp = ct;
20205 ct = cf;
20206 cf = tmp;
20207 compare_code = reverse_condition (compare_code);
20208 code = reverse_condition (code);
20209 }
20210 else
20211 {
20212 if (fpcmp)
20213 PUT_CODE (compare_op,
20214 reverse_condition_maybe_unordered
20215 (GET_CODE (compare_op)));
20216 else
20217 PUT_CODE (compare_op,
20218 reverse_condition (GET_CODE (compare_op)));
20219 }
20220 diff = ct - cf;
20221
20222 if (reg_overlap_mentioned_p (out, op0)
20223 || reg_overlap_mentioned_p (out, op1))
20224 tmp = gen_reg_rtx (mode);
20225
20226 if (mode == DImode)
20227 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
20228 else
20229 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
20230 flags, compare_op));
20231 }
20232 else
20233 {
20234 if (code == GT || code == GE)
20235 code = reverse_condition (code);
20236 else
20237 {
20238 HOST_WIDE_INT tmp = ct;
20239 ct = cf;
20240 cf = tmp;
20241 diff = ct - cf;
20242 }
20243 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
20244 }
20245
20246 if (diff == 1)
20247 {
20248 /*
20249 * cmpl op0,op1
20250 * sbbl dest,dest
20251 * [addl dest, ct]
20252 *
20253 * Size 5 - 8.
20254 */
20255 if (ct)
20256 tmp = expand_simple_binop (mode, PLUS,
20257 tmp, GEN_INT (ct),
20258 copy_rtx (tmp), 1, OPTAB_DIRECT);
20259 }
20260 else if (cf == -1)
20261 {
20262 /*
20263 * cmpl op0,op1
20264 * sbbl dest,dest
20265 * orl $ct, dest
20266 *
20267 * Size 8.
20268 */
20269 tmp = expand_simple_binop (mode, IOR,
20270 tmp, GEN_INT (ct),
20271 copy_rtx (tmp), 1, OPTAB_DIRECT);
20272 }
20273 else if (diff == -1 && ct)
20274 {
20275 /*
20276 * cmpl op0,op1
20277 * sbbl dest,dest
20278 * notl dest
20279 * [addl dest, cf]
20280 *
20281 * Size 8 - 11.
20282 */
20283 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20284 if (cf)
20285 tmp = expand_simple_binop (mode, PLUS,
20286 copy_rtx (tmp), GEN_INT (cf),
20287 copy_rtx (tmp), 1, OPTAB_DIRECT);
20288 }
20289 else
20290 {
20291 /*
20292 * cmpl op0,op1
20293 * sbbl dest,dest
20294 * [notl dest]
20295 * andl cf - ct, dest
20296 * [addl dest, ct]
20297 *
20298 * Size 8 - 11.
20299 */
20300
20301 if (cf == 0)
20302 {
20303 cf = ct;
20304 ct = 0;
20305 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20306 }
20307
20308 tmp = expand_simple_binop (mode, AND,
20309 copy_rtx (tmp),
20310 gen_int_mode (cf - ct, mode),
20311 copy_rtx (tmp), 1, OPTAB_DIRECT);
20312 if (ct)
20313 tmp = expand_simple_binop (mode, PLUS,
20314 copy_rtx (tmp), GEN_INT (ct),
20315 copy_rtx (tmp), 1, OPTAB_DIRECT);
20316 }
20317
20318 if (!rtx_equal_p (tmp, out))
20319 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
20320
20321 return true;
20322 }
20323
20324 if (diff < 0)
20325 {
20326 enum machine_mode cmp_mode = GET_MODE (op0);
20327
20328 HOST_WIDE_INT tmp;
20329 tmp = ct, ct = cf, cf = tmp;
20330 diff = -diff;
20331
20332 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20333 {
20334 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20335
20336 /* We may be reversing unordered compare to normal compare, that
20337 is not valid in general (we may convert non-trapping condition
20338 to trapping one), however on i386 we currently emit all
20339 comparisons unordered. */
20340 compare_code = reverse_condition_maybe_unordered (compare_code);
20341 code = reverse_condition_maybe_unordered (code);
20342 }
20343 else
20344 {
20345 compare_code = reverse_condition (compare_code);
20346 code = reverse_condition (code);
20347 }
20348 }
20349
20350 compare_code = UNKNOWN;
20351 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
20352 && CONST_INT_P (op1))
20353 {
20354 if (op1 == const0_rtx
20355 && (code == LT || code == GE))
20356 compare_code = code;
20357 else if (op1 == constm1_rtx)
20358 {
20359 if (code == LE)
20360 compare_code = LT;
20361 else if (code == GT)
20362 compare_code = GE;
20363 }
20364 }
20365
20366 /* Optimize dest = (op0 < 0) ? -1 : cf. */
20367 if (compare_code != UNKNOWN
20368 && GET_MODE (op0) == GET_MODE (out)
20369 && (cf == -1 || ct == -1))
20370 {
20371 /* If lea code below could be used, only optimize
20372 if it results in a 2 insn sequence. */
20373
20374 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
20375 || diff == 3 || diff == 5 || diff == 9)
20376 || (compare_code == LT && ct == -1)
20377 || (compare_code == GE && cf == -1))
20378 {
20379 /*
20380 * notl op1 (if necessary)
20381 * sarl $31, op1
20382 * orl cf, op1
20383 */
20384 if (ct != -1)
20385 {
20386 cf = ct;
20387 ct = -1;
20388 code = reverse_condition (code);
20389 }
20390
20391 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
20392
20393 out = expand_simple_binop (mode, IOR,
20394 out, GEN_INT (cf),
20395 out, 1, OPTAB_DIRECT);
20396 if (out != operands[0])
20397 emit_move_insn (operands[0], out);
20398
20399 return true;
20400 }
20401 }
20402
20403
20404 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
20405 || diff == 3 || diff == 5 || diff == 9)
20406 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
20407 && (mode != DImode
20408 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
20409 {
20410 /*
20411 * xorl dest,dest
20412 * cmpl op1,op2
20413 * setcc dest
20414 * lea cf(dest*(ct-cf)),dest
20415 *
20416 * Size 14.
20417 *
20418 * This also catches the degenerate setcc-only case.
20419 */
20420
20421 rtx tmp;
20422 int nops;
20423
20424 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
20425
20426 nops = 0;
20427 /* On x86_64 the lea instruction operates on Pmode, so we need
20428 to get arithmetics done in proper mode to match. */
20429 if (diff == 1)
20430 tmp = copy_rtx (out);
20431 else
20432 {
20433 rtx out1;
20434 out1 = copy_rtx (out);
20435 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
20436 nops++;
20437 if (diff & 1)
20438 {
20439 tmp = gen_rtx_PLUS (mode, tmp, out1);
20440 nops++;
20441 }
20442 }
20443 if (cf != 0)
20444 {
20445 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
20446 nops++;
20447 }
20448 if (!rtx_equal_p (tmp, out))
20449 {
20450 if (nops == 1)
20451 out = force_operand (tmp, copy_rtx (out));
20452 else
20453 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
20454 }
20455 if (!rtx_equal_p (out, operands[0]))
20456 emit_move_insn (operands[0], copy_rtx (out));
20457
20458 return true;
20459 }
20460
20461 /*
20462 * General case: Jumpful:
20463 * xorl dest,dest cmpl op1, op2
20464 * cmpl op1, op2 movl ct, dest
20465 * setcc dest jcc 1f
20466 * decl dest movl cf, dest
20467 * andl (cf-ct),dest 1:
20468 * addl ct,dest
20469 *
20470 * Size 20. Size 14.
20471 *
20472 * This is reasonably steep, but branch mispredict costs are
20473 * high on modern cpus, so consider failing only if optimizing
20474 * for space.
20475 */
20476
20477 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
20478 && BRANCH_COST (optimize_insn_for_speed_p (),
20479 false) >= 2)
20480 {
20481 if (cf == 0)
20482 {
20483 enum machine_mode cmp_mode = GET_MODE (op0);
20484
20485 cf = ct;
20486 ct = 0;
20487
20488 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20489 {
20490 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20491
20492 /* We may be reversing unordered compare to normal compare,
20493 that is not valid in general (we may convert non-trapping
20494 condition to trapping one), however on i386 we currently
20495 emit all comparisons unordered. */
20496 code = reverse_condition_maybe_unordered (code);
20497 }
20498 else
20499 {
20500 code = reverse_condition (code);
20501 if (compare_code != UNKNOWN)
20502 compare_code = reverse_condition (compare_code);
20503 }
20504 }
20505
20506 if (compare_code != UNKNOWN)
20507 {
20508 /* notl op1 (if needed)
20509 sarl $31, op1
20510 andl (cf-ct), op1
20511 addl ct, op1
20512
20513 For x < 0 (resp. x <= -1) there will be no notl,
20514 so if possible swap the constants to get rid of the
20515 complement.
20516 True/false will be -1/0 while code below (store flag
20517 followed by decrement) is 0/-1, so the constants need
20518 to be exchanged once more. */
20519
20520 if (compare_code == GE || !cf)
20521 {
20522 code = reverse_condition (code);
20523 compare_code = LT;
20524 }
20525 else
20526 {
20527 HOST_WIDE_INT tmp = cf;
20528 cf = ct;
20529 ct = tmp;
20530 }
20531
20532 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
20533 }
20534 else
20535 {
20536 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
20537
20538 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
20539 constm1_rtx,
20540 copy_rtx (out), 1, OPTAB_DIRECT);
20541 }
20542
20543 out = expand_simple_binop (mode, AND, copy_rtx (out),
20544 gen_int_mode (cf - ct, mode),
20545 copy_rtx (out), 1, OPTAB_DIRECT);
20546 if (ct)
20547 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
20548 copy_rtx (out), 1, OPTAB_DIRECT);
20549 if (!rtx_equal_p (out, operands[0]))
20550 emit_move_insn (operands[0], copy_rtx (out));
20551
20552 return true;
20553 }
20554 }
20555
20556 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
20557 {
20558 /* Try a few things more with specific constants and a variable. */
20559
20560 optab op;
20561 rtx var, orig_out, out, tmp;
20562
20563 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
20564 return false;
20565
20566 /* If one of the two operands is an interesting constant, load a
20567 constant with the above and mask it in with a logical operation. */
20568
20569 if (CONST_INT_P (operands[2]))
20570 {
20571 var = operands[3];
20572 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
20573 operands[3] = constm1_rtx, op = and_optab;
20574 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
20575 operands[3] = const0_rtx, op = ior_optab;
20576 else
20577 return false;
20578 }
20579 else if (CONST_INT_P (operands[3]))
20580 {
20581 var = operands[2];
20582 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
20583 operands[2] = constm1_rtx, op = and_optab;
20584 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
20585 operands[2] = const0_rtx, op = ior_optab;
20586 else
20587 return false;
20588 }
20589 else
20590 return false;
20591
20592 orig_out = operands[0];
20593 tmp = gen_reg_rtx (mode);
20594 operands[0] = tmp;
20595
20596 /* Recurse to get the constant loaded. */
20597 if (ix86_expand_int_movcc (operands) == 0)
20598 return false;
20599
20600 /* Mask in the interesting variable. */
20601 out = expand_binop (mode, op, var, tmp, orig_out, 0,
20602 OPTAB_WIDEN);
20603 if (!rtx_equal_p (out, orig_out))
20604 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
20605
20606 return true;
20607 }
20608
20609 /*
20610 * For comparison with above,
20611 *
20612 * movl cf,dest
20613 * movl ct,tmp
20614 * cmpl op1,op2
20615 * cmovcc tmp,dest
20616 *
20617 * Size 15.
20618 */
20619
20620 if (! nonimmediate_operand (operands[2], mode))
20621 operands[2] = force_reg (mode, operands[2]);
20622 if (! nonimmediate_operand (operands[3], mode))
20623 operands[3] = force_reg (mode, operands[3]);
20624
20625 if (! register_operand (operands[2], VOIDmode)
20626 && (mode == QImode
20627 || ! register_operand (operands[3], VOIDmode)))
20628 operands[2] = force_reg (mode, operands[2]);
20629
20630 if (mode == QImode
20631 && ! register_operand (operands[3], VOIDmode))
20632 operands[3] = force_reg (mode, operands[3]);
20633
20634 emit_insn (compare_seq);
20635 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
20636 gen_rtx_IF_THEN_ELSE (mode,
20637 compare_op, operands[2],
20638 operands[3])));
20639 return true;
20640 }
20641
20642 /* Swap, force into registers, or otherwise massage the two operands
20643 to an sse comparison with a mask result. Thus we differ a bit from
20644 ix86_prepare_fp_compare_args which expects to produce a flags result.
20645
20646 The DEST operand exists to help determine whether to commute commutative
20647 operators. The POP0/POP1 operands are updated in place. The new
20648 comparison code is returned, or UNKNOWN if not implementable. */
20649
20650 static enum rtx_code
20651 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
20652 rtx *pop0, rtx *pop1)
20653 {
20654 rtx tmp;
20655
20656 switch (code)
20657 {
20658 case LTGT:
20659 case UNEQ:
20660 /* AVX supports all the needed comparisons. */
20661 if (TARGET_AVX)
20662 break;
20663 /* We have no LTGT as an operator. We could implement it with
20664 NE & ORDERED, but this requires an extra temporary. It's
20665 not clear that it's worth it. */
20666 return UNKNOWN;
20667
20668 case LT:
20669 case LE:
20670 case UNGT:
20671 case UNGE:
20672 /* These are supported directly. */
20673 break;
20674
20675 case EQ:
20676 case NE:
20677 case UNORDERED:
20678 case ORDERED:
20679 /* AVX has 3 operand comparisons, no need to swap anything. */
20680 if (TARGET_AVX)
20681 break;
20682 /* For commutative operators, try to canonicalize the destination
20683 operand to be first in the comparison - this helps reload to
20684 avoid extra moves. */
20685 if (!dest || !rtx_equal_p (dest, *pop1))
20686 break;
20687 /* FALLTHRU */
20688
20689 case GE:
20690 case GT:
20691 case UNLE:
20692 case UNLT:
20693 /* These are not supported directly before AVX, and furthermore
20694 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
20695 comparison operands to transform into something that is
20696 supported. */
20697 tmp = *pop0;
20698 *pop0 = *pop1;
20699 *pop1 = tmp;
20700 code = swap_condition (code);
20701 break;
20702
20703 default:
20704 gcc_unreachable ();
20705 }
20706
20707 return code;
20708 }
20709
20710 /* Detect conditional moves that exactly match min/max operational
20711 semantics. Note that this is IEEE safe, as long as we don't
20712 interchange the operands.
20713
20714 Returns FALSE if this conditional move doesn't match a MIN/MAX,
20715 and TRUE if the operation is successful and instructions are emitted. */
20716
20717 static bool
20718 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
20719 rtx cmp_op1, rtx if_true, rtx if_false)
20720 {
20721 enum machine_mode mode;
20722 bool is_min;
20723 rtx tmp;
20724
20725 if (code == LT)
20726 ;
20727 else if (code == UNGE)
20728 {
20729 tmp = if_true;
20730 if_true = if_false;
20731 if_false = tmp;
20732 }
20733 else
20734 return false;
20735
20736 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
20737 is_min = true;
20738 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
20739 is_min = false;
20740 else
20741 return false;
20742
20743 mode = GET_MODE (dest);
20744
20745 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
20746 but MODE may be a vector mode and thus not appropriate. */
20747 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
20748 {
20749 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
20750 rtvec v;
20751
20752 if_true = force_reg (mode, if_true);
20753 v = gen_rtvec (2, if_true, if_false);
20754 tmp = gen_rtx_UNSPEC (mode, v, u);
20755 }
20756 else
20757 {
20758 code = is_min ? SMIN : SMAX;
20759 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
20760 }
20761
20762 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
20763 return true;
20764 }
20765
20766 /* Expand an sse vector comparison. Return the register with the result. */
20767
20768 static rtx
20769 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
20770 rtx op_true, rtx op_false)
20771 {
20772 enum machine_mode mode = GET_MODE (dest);
20773 enum machine_mode cmp_ops_mode = GET_MODE (cmp_op0);
20774
20775 /* In general case result of comparison can differ from operands' type. */
20776 enum machine_mode cmp_mode;
20777
20778 /* In AVX512F the result of comparison is an integer mask. */
20779 bool maskcmp = false;
20780 rtx x;
20781
20782 if (GET_MODE_SIZE (cmp_ops_mode) == 64)
20783 {
20784 cmp_mode = mode_for_size (GET_MODE_NUNITS (cmp_ops_mode), MODE_INT, 0);
20785 gcc_assert (cmp_mode != BLKmode);
20786
20787 maskcmp = true;
20788 }
20789 else
20790 cmp_mode = cmp_ops_mode;
20791
20792
20793 cmp_op0 = force_reg (cmp_ops_mode, cmp_op0);
20794 if (!nonimmediate_operand (cmp_op1, cmp_ops_mode))
20795 cmp_op1 = force_reg (cmp_ops_mode, cmp_op1);
20796
20797 if (optimize
20798 || reg_overlap_mentioned_p (dest, op_true)
20799 || reg_overlap_mentioned_p (dest, op_false))
20800 dest = gen_reg_rtx (maskcmp ? cmp_mode : mode);
20801
20802 /* Compare patterns for int modes are unspec in AVX512F only. */
20803 if (maskcmp && (code == GT || code == EQ))
20804 {
20805 rtx (*gen)(rtx, rtx, rtx);
20806
20807 switch (cmp_ops_mode)
20808 {
20809 case V16SImode:
20810 gen = code == GT ? gen_avx512f_gtv16si3 : gen_avx512f_eqv16si3_1;
20811 break;
20812 case V8DImode:
20813 gen = code == GT ? gen_avx512f_gtv8di3 : gen_avx512f_eqv8di3_1;
20814 break;
20815 default:
20816 gen = NULL;
20817 }
20818
20819 if (gen)
20820 {
20821 emit_insn (gen (dest, cmp_op0, cmp_op1));
20822 return dest;
20823 }
20824 }
20825 x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
20826
20827 if (cmp_mode != mode && !maskcmp)
20828 {
20829 x = force_reg (cmp_ops_mode, x);
20830 convert_move (dest, x, false);
20831 }
20832 else
20833 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20834
20835 return dest;
20836 }
20837
20838 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
20839 operations. This is used for both scalar and vector conditional moves. */
20840
20841 static void
20842 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
20843 {
20844 enum machine_mode mode = GET_MODE (dest);
20845 enum machine_mode cmpmode = GET_MODE (cmp);
20846
20847 /* In AVX512F the result of comparison is an integer mask. */
20848 bool maskcmp = (mode != cmpmode && TARGET_AVX512F);
20849
20850 rtx t2, t3, x;
20851
20852 if (vector_all_ones_operand (op_true, mode)
20853 && rtx_equal_p (op_false, CONST0_RTX (mode))
20854 && !maskcmp)
20855 {
20856 emit_insn (gen_rtx_SET (VOIDmode, dest, cmp));
20857 }
20858 else if (op_false == CONST0_RTX (mode)
20859 && !maskcmp)
20860 {
20861 op_true = force_reg (mode, op_true);
20862 x = gen_rtx_AND (mode, cmp, op_true);
20863 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20864 }
20865 else if (op_true == CONST0_RTX (mode)
20866 && !maskcmp)
20867 {
20868 op_false = force_reg (mode, op_false);
20869 x = gen_rtx_NOT (mode, cmp);
20870 x = gen_rtx_AND (mode, x, op_false);
20871 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20872 }
20873 else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode)
20874 && !maskcmp)
20875 {
20876 op_false = force_reg (mode, op_false);
20877 x = gen_rtx_IOR (mode, cmp, op_false);
20878 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20879 }
20880 else if (TARGET_XOP
20881 && !maskcmp)
20882 {
20883 op_true = force_reg (mode, op_true);
20884
20885 if (!nonimmediate_operand (op_false, mode))
20886 op_false = force_reg (mode, op_false);
20887
20888 emit_insn (gen_rtx_SET (mode, dest,
20889 gen_rtx_IF_THEN_ELSE (mode, cmp,
20890 op_true,
20891 op_false)));
20892 }
20893 else
20894 {
20895 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
20896 rtx d = dest;
20897
20898 if (!nonimmediate_operand (op_true, mode))
20899 op_true = force_reg (mode, op_true);
20900
20901 op_false = force_reg (mode, op_false);
20902
20903 switch (mode)
20904 {
20905 case V4SFmode:
20906 if (TARGET_SSE4_1)
20907 gen = gen_sse4_1_blendvps;
20908 break;
20909 case V2DFmode:
20910 if (TARGET_SSE4_1)
20911 gen = gen_sse4_1_blendvpd;
20912 break;
20913 case V16QImode:
20914 case V8HImode:
20915 case V4SImode:
20916 case V2DImode:
20917 if (TARGET_SSE4_1)
20918 {
20919 gen = gen_sse4_1_pblendvb;
20920 if (mode != V16QImode)
20921 d = gen_reg_rtx (V16QImode);
20922 op_false = gen_lowpart (V16QImode, op_false);
20923 op_true = gen_lowpart (V16QImode, op_true);
20924 cmp = gen_lowpart (V16QImode, cmp);
20925 }
20926 break;
20927 case V8SFmode:
20928 if (TARGET_AVX)
20929 gen = gen_avx_blendvps256;
20930 break;
20931 case V4DFmode:
20932 if (TARGET_AVX)
20933 gen = gen_avx_blendvpd256;
20934 break;
20935 case V32QImode:
20936 case V16HImode:
20937 case V8SImode:
20938 case V4DImode:
20939 if (TARGET_AVX2)
20940 {
20941 gen = gen_avx2_pblendvb;
20942 if (mode != V32QImode)
20943 d = gen_reg_rtx (V32QImode);
20944 op_false = gen_lowpart (V32QImode, op_false);
20945 op_true = gen_lowpart (V32QImode, op_true);
20946 cmp = gen_lowpart (V32QImode, cmp);
20947 }
20948 break;
20949
20950 case V16SImode:
20951 gen = gen_avx512f_blendmv16si;
20952 break;
20953 case V8DImode:
20954 gen = gen_avx512f_blendmv8di;
20955 break;
20956 case V8DFmode:
20957 gen = gen_avx512f_blendmv8df;
20958 break;
20959 case V16SFmode:
20960 gen = gen_avx512f_blendmv16sf;
20961 break;
20962
20963 default:
20964 break;
20965 }
20966
20967 if (gen != NULL)
20968 {
20969 emit_insn (gen (d, op_false, op_true, cmp));
20970 if (d != dest)
20971 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
20972 }
20973 else
20974 {
20975 op_true = force_reg (mode, op_true);
20976
20977 t2 = gen_reg_rtx (mode);
20978 if (optimize)
20979 t3 = gen_reg_rtx (mode);
20980 else
20981 t3 = dest;
20982
20983 x = gen_rtx_AND (mode, op_true, cmp);
20984 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
20985
20986 x = gen_rtx_NOT (mode, cmp);
20987 x = gen_rtx_AND (mode, x, op_false);
20988 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
20989
20990 x = gen_rtx_IOR (mode, t3, t2);
20991 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20992 }
20993 }
20994 }
20995
20996 /* Expand a floating-point conditional move. Return true if successful. */
20997
20998 bool
20999 ix86_expand_fp_movcc (rtx operands[])
21000 {
21001 enum machine_mode mode = GET_MODE (operands[0]);
21002 enum rtx_code code = GET_CODE (operands[1]);
21003 rtx tmp, compare_op;
21004 rtx op0 = XEXP (operands[1], 0);
21005 rtx op1 = XEXP (operands[1], 1);
21006
21007 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
21008 {
21009 enum machine_mode cmode;
21010
21011 /* Since we've no cmove for sse registers, don't force bad register
21012 allocation just to gain access to it. Deny movcc when the
21013 comparison mode doesn't match the move mode. */
21014 cmode = GET_MODE (op0);
21015 if (cmode == VOIDmode)
21016 cmode = GET_MODE (op1);
21017 if (cmode != mode)
21018 return false;
21019
21020 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
21021 if (code == UNKNOWN)
21022 return false;
21023
21024 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
21025 operands[2], operands[3]))
21026 return true;
21027
21028 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
21029 operands[2], operands[3]);
21030 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
21031 return true;
21032 }
21033
21034 if (GET_MODE (op0) == TImode
21035 || (GET_MODE (op0) == DImode
21036 && !TARGET_64BIT))
21037 return false;
21038
21039 /* The floating point conditional move instructions don't directly
21040 support conditions resulting from a signed integer comparison. */
21041
21042 compare_op = ix86_expand_compare (code, op0, op1);
21043 if (!fcmov_comparison_operator (compare_op, VOIDmode))
21044 {
21045 tmp = gen_reg_rtx (QImode);
21046 ix86_expand_setcc (tmp, code, op0, op1);
21047
21048 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
21049 }
21050
21051 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
21052 gen_rtx_IF_THEN_ELSE (mode, compare_op,
21053 operands[2], operands[3])));
21054
21055 return true;
21056 }
21057
21058 /* Expand a floating-point vector conditional move; a vcond operation
21059 rather than a movcc operation. */
21060
21061 bool
21062 ix86_expand_fp_vcond (rtx operands[])
21063 {
21064 enum rtx_code code = GET_CODE (operands[3]);
21065 rtx cmp;
21066
21067 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
21068 &operands[4], &operands[5]);
21069 if (code == UNKNOWN)
21070 {
21071 rtx temp;
21072 switch (GET_CODE (operands[3]))
21073 {
21074 case LTGT:
21075 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
21076 operands[5], operands[0], operands[0]);
21077 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
21078 operands[5], operands[1], operands[2]);
21079 code = AND;
21080 break;
21081 case UNEQ:
21082 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
21083 operands[5], operands[0], operands[0]);
21084 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
21085 operands[5], operands[1], operands[2]);
21086 code = IOR;
21087 break;
21088 default:
21089 gcc_unreachable ();
21090 }
21091 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
21092 OPTAB_DIRECT);
21093 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21094 return true;
21095 }
21096
21097 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
21098 operands[5], operands[1], operands[2]))
21099 return true;
21100
21101 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
21102 operands[1], operands[2]);
21103 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21104 return true;
21105 }
21106
21107 /* Expand a signed/unsigned integral vector conditional move. */
21108
21109 bool
21110 ix86_expand_int_vcond (rtx operands[])
21111 {
21112 enum machine_mode data_mode = GET_MODE (operands[0]);
21113 enum machine_mode mode = GET_MODE (operands[4]);
21114 enum rtx_code code = GET_CODE (operands[3]);
21115 bool negate = false;
21116 rtx x, cop0, cop1;
21117
21118 cop0 = operands[4];
21119 cop1 = operands[5];
21120
21121 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
21122 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
21123 if ((code == LT || code == GE)
21124 && data_mode == mode
21125 && cop1 == CONST0_RTX (mode)
21126 && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
21127 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) > 1
21128 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) <= 8
21129 && (GET_MODE_SIZE (data_mode) == 16
21130 || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
21131 {
21132 rtx negop = operands[2 - (code == LT)];
21133 int shift = GET_MODE_BITSIZE (GET_MODE_INNER (data_mode)) - 1;
21134 if (negop == CONST1_RTX (data_mode))
21135 {
21136 rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
21137 operands[0], 1, OPTAB_DIRECT);
21138 if (res != operands[0])
21139 emit_move_insn (operands[0], res);
21140 return true;
21141 }
21142 else if (GET_MODE_INNER (data_mode) != DImode
21143 && vector_all_ones_operand (negop, data_mode))
21144 {
21145 rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
21146 operands[0], 0, OPTAB_DIRECT);
21147 if (res != operands[0])
21148 emit_move_insn (operands[0], res);
21149 return true;
21150 }
21151 }
21152
21153 if (!nonimmediate_operand (cop1, mode))
21154 cop1 = force_reg (mode, cop1);
21155 if (!general_operand (operands[1], data_mode))
21156 operands[1] = force_reg (data_mode, operands[1]);
21157 if (!general_operand (operands[2], data_mode))
21158 operands[2] = force_reg (data_mode, operands[2]);
21159
21160 /* XOP supports all of the comparisons on all 128-bit vector int types. */
21161 if (TARGET_XOP
21162 && (mode == V16QImode || mode == V8HImode
21163 || mode == V4SImode || mode == V2DImode))
21164 ;
21165 else
21166 {
21167 /* Canonicalize the comparison to EQ, GT, GTU. */
21168 switch (code)
21169 {
21170 case EQ:
21171 case GT:
21172 case GTU:
21173 break;
21174
21175 case NE:
21176 case LE:
21177 case LEU:
21178 code = reverse_condition (code);
21179 negate = true;
21180 break;
21181
21182 case GE:
21183 case GEU:
21184 code = reverse_condition (code);
21185 negate = true;
21186 /* FALLTHRU */
21187
21188 case LT:
21189 case LTU:
21190 code = swap_condition (code);
21191 x = cop0, cop0 = cop1, cop1 = x;
21192 break;
21193
21194 default:
21195 gcc_unreachable ();
21196 }
21197
21198 /* Only SSE4.1/SSE4.2 supports V2DImode. */
21199 if (mode == V2DImode)
21200 {
21201 switch (code)
21202 {
21203 case EQ:
21204 /* SSE4.1 supports EQ. */
21205 if (!TARGET_SSE4_1)
21206 return false;
21207 break;
21208
21209 case GT:
21210 case GTU:
21211 /* SSE4.2 supports GT/GTU. */
21212 if (!TARGET_SSE4_2)
21213 return false;
21214 break;
21215
21216 default:
21217 gcc_unreachable ();
21218 }
21219 }
21220
21221 /* Unsigned parallel compare is not supported by the hardware.
21222 Play some tricks to turn this into a signed comparison
21223 against 0. */
21224 if (code == GTU)
21225 {
21226 cop0 = force_reg (mode, cop0);
21227
21228 switch (mode)
21229 {
21230 case V16SImode:
21231 case V8DImode:
21232 case V8SImode:
21233 case V4DImode:
21234 case V4SImode:
21235 case V2DImode:
21236 {
21237 rtx t1, t2, mask;
21238 rtx (*gen_sub3) (rtx, rtx, rtx);
21239
21240 switch (mode)
21241 {
21242 case V16SImode: gen_sub3 = gen_subv16si3; break;
21243 case V8DImode: gen_sub3 = gen_subv8di3; break;
21244 case V8SImode: gen_sub3 = gen_subv8si3; break;
21245 case V4DImode: gen_sub3 = gen_subv4di3; break;
21246 case V4SImode: gen_sub3 = gen_subv4si3; break;
21247 case V2DImode: gen_sub3 = gen_subv2di3; break;
21248 default:
21249 gcc_unreachable ();
21250 }
21251 /* Subtract (-(INT MAX) - 1) from both operands to make
21252 them signed. */
21253 mask = ix86_build_signbit_mask (mode, true, false);
21254 t1 = gen_reg_rtx (mode);
21255 emit_insn (gen_sub3 (t1, cop0, mask));
21256
21257 t2 = gen_reg_rtx (mode);
21258 emit_insn (gen_sub3 (t2, cop1, mask));
21259
21260 cop0 = t1;
21261 cop1 = t2;
21262 code = GT;
21263 }
21264 break;
21265
21266 case V32QImode:
21267 case V16HImode:
21268 case V16QImode:
21269 case V8HImode:
21270 /* Perform a parallel unsigned saturating subtraction. */
21271 x = gen_reg_rtx (mode);
21272 emit_insn (gen_rtx_SET (VOIDmode, x,
21273 gen_rtx_US_MINUS (mode, cop0, cop1)));
21274
21275 cop0 = x;
21276 cop1 = CONST0_RTX (mode);
21277 code = EQ;
21278 negate = !negate;
21279 break;
21280
21281 default:
21282 gcc_unreachable ();
21283 }
21284 }
21285 }
21286
21287 /* Allow the comparison to be done in one mode, but the movcc to
21288 happen in another mode. */
21289 if (data_mode == mode)
21290 {
21291 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
21292 operands[1+negate], operands[2-negate]);
21293 }
21294 else
21295 {
21296 gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
21297 x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1,
21298 operands[1+negate], operands[2-negate]);
21299 if (GET_MODE (x) == mode)
21300 x = gen_lowpart (data_mode, x);
21301 }
21302
21303 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
21304 operands[2-negate]);
21305 return true;
21306 }
21307
21308 static bool
21309 ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1)
21310 {
21311 enum machine_mode mode = GET_MODE (op0);
21312 switch (mode)
21313 {
21314 case V16SImode:
21315 emit_insn (gen_avx512f_vpermi2varv16si3 (target, op0,
21316 force_reg (V16SImode, mask),
21317 op1));
21318 return true;
21319 case V16SFmode:
21320 emit_insn (gen_avx512f_vpermi2varv16sf3 (target, op0,
21321 force_reg (V16SImode, mask),
21322 op1));
21323 return true;
21324 case V8DImode:
21325 emit_insn (gen_avx512f_vpermi2varv8di3 (target, op0,
21326 force_reg (V8DImode, mask), op1));
21327 return true;
21328 case V8DFmode:
21329 emit_insn (gen_avx512f_vpermi2varv8df3 (target, op0,
21330 force_reg (V8DImode, mask), op1));
21331 return true;
21332 default:
21333 return false;
21334 }
21335 }
21336
21337 /* Expand a variable vector permutation. */
21338
21339 void
21340 ix86_expand_vec_perm (rtx operands[])
21341 {
21342 rtx target = operands[0];
21343 rtx op0 = operands[1];
21344 rtx op1 = operands[2];
21345 rtx mask = operands[3];
21346 rtx t1, t2, t3, t4, t5, t6, t7, t8, vt, vt2, vec[32];
21347 enum machine_mode mode = GET_MODE (op0);
21348 enum machine_mode maskmode = GET_MODE (mask);
21349 int w, e, i;
21350 bool one_operand_shuffle = rtx_equal_p (op0, op1);
21351
21352 /* Number of elements in the vector. */
21353 w = GET_MODE_NUNITS (mode);
21354 e = GET_MODE_UNIT_SIZE (mode);
21355 gcc_assert (w <= 64);
21356
21357 if (ix86_expand_vec_perm_vpermi2 (target, op0, mask, op1))
21358 return;
21359
21360 if (TARGET_AVX2)
21361 {
21362 if (mode == V4DImode || mode == V4DFmode || mode == V16HImode)
21363 {
21364 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
21365 an constant shuffle operand. With a tiny bit of effort we can
21366 use VPERMD instead. A re-interpretation stall for V4DFmode is
21367 unfortunate but there's no avoiding it.
21368 Similarly for V16HImode we don't have instructions for variable
21369 shuffling, while for V32QImode we can use after preparing suitable
21370 masks vpshufb; vpshufb; vpermq; vpor. */
21371
21372 if (mode == V16HImode)
21373 {
21374 maskmode = mode = V32QImode;
21375 w = 32;
21376 e = 1;
21377 }
21378 else
21379 {
21380 maskmode = mode = V8SImode;
21381 w = 8;
21382 e = 4;
21383 }
21384 t1 = gen_reg_rtx (maskmode);
21385
21386 /* Replicate the low bits of the V4DImode mask into V8SImode:
21387 mask = { A B C D }
21388 t1 = { A A B B C C D D }. */
21389 for (i = 0; i < w / 2; ++i)
21390 vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
21391 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21392 vt = force_reg (maskmode, vt);
21393 mask = gen_lowpart (maskmode, mask);
21394 if (maskmode == V8SImode)
21395 emit_insn (gen_avx2_permvarv8si (t1, mask, vt));
21396 else
21397 emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
21398
21399 /* Multiply the shuffle indicies by two. */
21400 t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
21401 OPTAB_DIRECT);
21402
21403 /* Add one to the odd shuffle indicies:
21404 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
21405 for (i = 0; i < w / 2; ++i)
21406 {
21407 vec[i * 2] = const0_rtx;
21408 vec[i * 2 + 1] = const1_rtx;
21409 }
21410 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21411 vt = validize_mem (force_const_mem (maskmode, vt));
21412 t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
21413 OPTAB_DIRECT);
21414
21415 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
21416 operands[3] = mask = t1;
21417 target = gen_reg_rtx (mode);
21418 op0 = gen_lowpart (mode, op0);
21419 op1 = gen_lowpart (mode, op1);
21420 }
21421
21422 switch (mode)
21423 {
21424 case V8SImode:
21425 /* The VPERMD and VPERMPS instructions already properly ignore
21426 the high bits of the shuffle elements. No need for us to
21427 perform an AND ourselves. */
21428 if (one_operand_shuffle)
21429 {
21430 emit_insn (gen_avx2_permvarv8si (target, op0, mask));
21431 if (target != operands[0])
21432 emit_move_insn (operands[0],
21433 gen_lowpart (GET_MODE (operands[0]), target));
21434 }
21435 else
21436 {
21437 t1 = gen_reg_rtx (V8SImode);
21438 t2 = gen_reg_rtx (V8SImode);
21439 emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
21440 emit_insn (gen_avx2_permvarv8si (t2, op1, mask));
21441 goto merge_two;
21442 }
21443 return;
21444
21445 case V8SFmode:
21446 mask = gen_lowpart (V8SFmode, mask);
21447 if (one_operand_shuffle)
21448 emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
21449 else
21450 {
21451 t1 = gen_reg_rtx (V8SFmode);
21452 t2 = gen_reg_rtx (V8SFmode);
21453 emit_insn (gen_avx2_permvarv8sf (t1, op0, mask));
21454 emit_insn (gen_avx2_permvarv8sf (t2, op1, mask));
21455 goto merge_two;
21456 }
21457 return;
21458
21459 case V4SImode:
21460 /* By combining the two 128-bit input vectors into one 256-bit
21461 input vector, we can use VPERMD and VPERMPS for the full
21462 two-operand shuffle. */
21463 t1 = gen_reg_rtx (V8SImode);
21464 t2 = gen_reg_rtx (V8SImode);
21465 emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
21466 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
21467 emit_insn (gen_avx2_permvarv8si (t1, t1, t2));
21468 emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
21469 return;
21470
21471 case V4SFmode:
21472 t1 = gen_reg_rtx (V8SFmode);
21473 t2 = gen_reg_rtx (V8SImode);
21474 mask = gen_lowpart (V4SImode, mask);
21475 emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
21476 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
21477 emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
21478 emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
21479 return;
21480
21481 case V32QImode:
21482 t1 = gen_reg_rtx (V32QImode);
21483 t2 = gen_reg_rtx (V32QImode);
21484 t3 = gen_reg_rtx (V32QImode);
21485 vt2 = GEN_INT (128);
21486 for (i = 0; i < 32; i++)
21487 vec[i] = vt2;
21488 vt = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
21489 vt = force_reg (V32QImode, vt);
21490 for (i = 0; i < 32; i++)
21491 vec[i] = i < 16 ? vt2 : const0_rtx;
21492 vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
21493 vt2 = force_reg (V32QImode, vt2);
21494 /* From mask create two adjusted masks, which contain the same
21495 bits as mask in the low 7 bits of each vector element.
21496 The first mask will have the most significant bit clear
21497 if it requests element from the same 128-bit lane
21498 and MSB set if it requests element from the other 128-bit lane.
21499 The second mask will have the opposite values of the MSB,
21500 and additionally will have its 128-bit lanes swapped.
21501 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
21502 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
21503 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
21504 stands for other 12 bytes. */
21505 /* The bit whether element is from the same lane or the other
21506 lane is bit 4, so shift it up by 3 to the MSB position. */
21507 t5 = gen_reg_rtx (V4DImode);
21508 emit_insn (gen_ashlv4di3 (t5, gen_lowpart (V4DImode, mask),
21509 GEN_INT (3)));
21510 /* Clear MSB bits from the mask just in case it had them set. */
21511 emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
21512 /* After this t1 will have MSB set for elements from other lane. */
21513 emit_insn (gen_xorv32qi3 (t1, gen_lowpart (V32QImode, t5), vt2));
21514 /* Clear bits other than MSB. */
21515 emit_insn (gen_andv32qi3 (t1, t1, vt));
21516 /* Or in the lower bits from mask into t3. */
21517 emit_insn (gen_iorv32qi3 (t3, t1, t2));
21518 /* And invert MSB bits in t1, so MSB is set for elements from the same
21519 lane. */
21520 emit_insn (gen_xorv32qi3 (t1, t1, vt));
21521 /* Swap 128-bit lanes in t3. */
21522 t6 = gen_reg_rtx (V4DImode);
21523 emit_insn (gen_avx2_permv4di_1 (t6, gen_lowpart (V4DImode, t3),
21524 const2_rtx, GEN_INT (3),
21525 const0_rtx, const1_rtx));
21526 /* And or in the lower bits from mask into t1. */
21527 emit_insn (gen_iorv32qi3 (t1, t1, t2));
21528 if (one_operand_shuffle)
21529 {
21530 /* Each of these shuffles will put 0s in places where
21531 element from the other 128-bit lane is needed, otherwise
21532 will shuffle in the requested value. */
21533 emit_insn (gen_avx2_pshufbv32qi3 (t3, op0,
21534 gen_lowpart (V32QImode, t6)));
21535 emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
21536 /* For t3 the 128-bit lanes are swapped again. */
21537 t7 = gen_reg_rtx (V4DImode);
21538 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t3),
21539 const2_rtx, GEN_INT (3),
21540 const0_rtx, const1_rtx));
21541 /* And oring both together leads to the result. */
21542 emit_insn (gen_iorv32qi3 (target, t1,
21543 gen_lowpart (V32QImode, t7)));
21544 if (target != operands[0])
21545 emit_move_insn (operands[0],
21546 gen_lowpart (GET_MODE (operands[0]), target));
21547 return;
21548 }
21549
21550 t4 = gen_reg_rtx (V32QImode);
21551 /* Similarly to the above one_operand_shuffle code,
21552 just for repeated twice for each operand. merge_two:
21553 code will merge the two results together. */
21554 emit_insn (gen_avx2_pshufbv32qi3 (t4, op0,
21555 gen_lowpart (V32QImode, t6)));
21556 emit_insn (gen_avx2_pshufbv32qi3 (t3, op1,
21557 gen_lowpart (V32QImode, t6)));
21558 emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
21559 emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
21560 t7 = gen_reg_rtx (V4DImode);
21561 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t4),
21562 const2_rtx, GEN_INT (3),
21563 const0_rtx, const1_rtx));
21564 t8 = gen_reg_rtx (V4DImode);
21565 emit_insn (gen_avx2_permv4di_1 (t8, gen_lowpart (V4DImode, t3),
21566 const2_rtx, GEN_INT (3),
21567 const0_rtx, const1_rtx));
21568 emit_insn (gen_iorv32qi3 (t4, t2, gen_lowpart (V32QImode, t7)));
21569 emit_insn (gen_iorv32qi3 (t3, t1, gen_lowpart (V32QImode, t8)));
21570 t1 = t4;
21571 t2 = t3;
21572 goto merge_two;
21573
21574 default:
21575 gcc_assert (GET_MODE_SIZE (mode) <= 16);
21576 break;
21577 }
21578 }
21579
21580 if (TARGET_XOP)
21581 {
21582 /* The XOP VPPERM insn supports three inputs. By ignoring the
21583 one_operand_shuffle special case, we avoid creating another
21584 set of constant vectors in memory. */
21585 one_operand_shuffle = false;
21586
21587 /* mask = mask & {2*w-1, ...} */
21588 vt = GEN_INT (2*w - 1);
21589 }
21590 else
21591 {
21592 /* mask = mask & {w-1, ...} */
21593 vt = GEN_INT (w - 1);
21594 }
21595
21596 for (i = 0; i < w; i++)
21597 vec[i] = vt;
21598 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21599 mask = expand_simple_binop (maskmode, AND, mask, vt,
21600 NULL_RTX, 0, OPTAB_DIRECT);
21601
21602 /* For non-QImode operations, convert the word permutation control
21603 into a byte permutation control. */
21604 if (mode != V16QImode)
21605 {
21606 mask = expand_simple_binop (maskmode, ASHIFT, mask,
21607 GEN_INT (exact_log2 (e)),
21608 NULL_RTX, 0, OPTAB_DIRECT);
21609
21610 /* Convert mask to vector of chars. */
21611 mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
21612
21613 /* Replicate each of the input bytes into byte positions:
21614 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
21615 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
21616 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
21617 for (i = 0; i < 16; ++i)
21618 vec[i] = GEN_INT (i/e * e);
21619 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
21620 vt = validize_mem (force_const_mem (V16QImode, vt));
21621 if (TARGET_XOP)
21622 emit_insn (gen_xop_pperm (mask, mask, mask, vt));
21623 else
21624 emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
21625
21626 /* Convert it into the byte positions by doing
21627 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
21628 for (i = 0; i < 16; ++i)
21629 vec[i] = GEN_INT (i % e);
21630 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
21631 vt = validize_mem (force_const_mem (V16QImode, vt));
21632 emit_insn (gen_addv16qi3 (mask, mask, vt));
21633 }
21634
21635 /* The actual shuffle operations all operate on V16QImode. */
21636 op0 = gen_lowpart (V16QImode, op0);
21637 op1 = gen_lowpart (V16QImode, op1);
21638
21639 if (TARGET_XOP)
21640 {
21641 if (GET_MODE (target) != V16QImode)
21642 target = gen_reg_rtx (V16QImode);
21643 emit_insn (gen_xop_pperm (target, op0, op1, mask));
21644 if (target != operands[0])
21645 emit_move_insn (operands[0],
21646 gen_lowpart (GET_MODE (operands[0]), target));
21647 }
21648 else if (one_operand_shuffle)
21649 {
21650 if (GET_MODE (target) != V16QImode)
21651 target = gen_reg_rtx (V16QImode);
21652 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
21653 if (target != operands[0])
21654 emit_move_insn (operands[0],
21655 gen_lowpart (GET_MODE (operands[0]), target));
21656 }
21657 else
21658 {
21659 rtx xops[6];
21660 bool ok;
21661
21662 /* Shuffle the two input vectors independently. */
21663 t1 = gen_reg_rtx (V16QImode);
21664 t2 = gen_reg_rtx (V16QImode);
21665 emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
21666 emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
21667
21668 merge_two:
21669 /* Then merge them together. The key is whether any given control
21670 element contained a bit set that indicates the second word. */
21671 mask = operands[3];
21672 vt = GEN_INT (w);
21673 if (maskmode == V2DImode && !TARGET_SSE4_1)
21674 {
21675 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
21676 more shuffle to convert the V2DI input mask into a V4SI
21677 input mask. At which point the masking that expand_int_vcond
21678 will work as desired. */
21679 rtx t3 = gen_reg_rtx (V4SImode);
21680 emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
21681 const0_rtx, const0_rtx,
21682 const2_rtx, const2_rtx));
21683 mask = t3;
21684 maskmode = V4SImode;
21685 e = w = 4;
21686 }
21687
21688 for (i = 0; i < w; i++)
21689 vec[i] = vt;
21690 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21691 vt = force_reg (maskmode, vt);
21692 mask = expand_simple_binop (maskmode, AND, mask, vt,
21693 NULL_RTX, 0, OPTAB_DIRECT);
21694
21695 if (GET_MODE (target) != mode)
21696 target = gen_reg_rtx (mode);
21697 xops[0] = target;
21698 xops[1] = gen_lowpart (mode, t2);
21699 xops[2] = gen_lowpart (mode, t1);
21700 xops[3] = gen_rtx_EQ (maskmode, mask, vt);
21701 xops[4] = mask;
21702 xops[5] = vt;
21703 ok = ix86_expand_int_vcond (xops);
21704 gcc_assert (ok);
21705 if (target != operands[0])
21706 emit_move_insn (operands[0],
21707 gen_lowpart (GET_MODE (operands[0]), target));
21708 }
21709 }
21710
21711 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
21712 true if we should do zero extension, else sign extension. HIGH_P is
21713 true if we want the N/2 high elements, else the low elements. */
21714
21715 void
21716 ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
21717 {
21718 enum machine_mode imode = GET_MODE (src);
21719 rtx tmp;
21720
21721 if (TARGET_SSE4_1)
21722 {
21723 rtx (*unpack)(rtx, rtx);
21724 rtx (*extract)(rtx, rtx) = NULL;
21725 enum machine_mode halfmode = BLKmode;
21726
21727 switch (imode)
21728 {
21729 case V32QImode:
21730 if (unsigned_p)
21731 unpack = gen_avx2_zero_extendv16qiv16hi2;
21732 else
21733 unpack = gen_avx2_sign_extendv16qiv16hi2;
21734 halfmode = V16QImode;
21735 extract
21736 = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
21737 break;
21738 case V32HImode:
21739 if (unsigned_p)
21740 unpack = gen_avx512f_zero_extendv16hiv16si2;
21741 else
21742 unpack = gen_avx512f_sign_extendv16hiv16si2;
21743 halfmode = V16HImode;
21744 extract
21745 = high_p ? gen_vec_extract_hi_v32hi : gen_vec_extract_lo_v32hi;
21746 break;
21747 case V16HImode:
21748 if (unsigned_p)
21749 unpack = gen_avx2_zero_extendv8hiv8si2;
21750 else
21751 unpack = gen_avx2_sign_extendv8hiv8si2;
21752 halfmode = V8HImode;
21753 extract
21754 = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
21755 break;
21756 case V16SImode:
21757 if (unsigned_p)
21758 unpack = gen_avx512f_zero_extendv8siv8di2;
21759 else
21760 unpack = gen_avx512f_sign_extendv8siv8di2;
21761 halfmode = V8SImode;
21762 extract
21763 = high_p ? gen_vec_extract_hi_v16si : gen_vec_extract_lo_v16si;
21764 break;
21765 case V8SImode:
21766 if (unsigned_p)
21767 unpack = gen_avx2_zero_extendv4siv4di2;
21768 else
21769 unpack = gen_avx2_sign_extendv4siv4di2;
21770 halfmode = V4SImode;
21771 extract
21772 = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si;
21773 break;
21774 case V16QImode:
21775 if (unsigned_p)
21776 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
21777 else
21778 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
21779 break;
21780 case V8HImode:
21781 if (unsigned_p)
21782 unpack = gen_sse4_1_zero_extendv4hiv4si2;
21783 else
21784 unpack = gen_sse4_1_sign_extendv4hiv4si2;
21785 break;
21786 case V4SImode:
21787 if (unsigned_p)
21788 unpack = gen_sse4_1_zero_extendv2siv2di2;
21789 else
21790 unpack = gen_sse4_1_sign_extendv2siv2di2;
21791 break;
21792 default:
21793 gcc_unreachable ();
21794 }
21795
21796 if (GET_MODE_SIZE (imode) >= 32)
21797 {
21798 tmp = gen_reg_rtx (halfmode);
21799 emit_insn (extract (tmp, src));
21800 }
21801 else if (high_p)
21802 {
21803 /* Shift higher 8 bytes to lower 8 bytes. */
21804 tmp = gen_reg_rtx (V1TImode);
21805 emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, src),
21806 GEN_INT (64)));
21807 tmp = gen_lowpart (imode, tmp);
21808 }
21809 else
21810 tmp = src;
21811
21812 emit_insn (unpack (dest, tmp));
21813 }
21814 else
21815 {
21816 rtx (*unpack)(rtx, rtx, rtx);
21817
21818 switch (imode)
21819 {
21820 case V16QImode:
21821 if (high_p)
21822 unpack = gen_vec_interleave_highv16qi;
21823 else
21824 unpack = gen_vec_interleave_lowv16qi;
21825 break;
21826 case V8HImode:
21827 if (high_p)
21828 unpack = gen_vec_interleave_highv8hi;
21829 else
21830 unpack = gen_vec_interleave_lowv8hi;
21831 break;
21832 case V4SImode:
21833 if (high_p)
21834 unpack = gen_vec_interleave_highv4si;
21835 else
21836 unpack = gen_vec_interleave_lowv4si;
21837 break;
21838 default:
21839 gcc_unreachable ();
21840 }
21841
21842 if (unsigned_p)
21843 tmp = force_reg (imode, CONST0_RTX (imode));
21844 else
21845 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
21846 src, pc_rtx, pc_rtx);
21847
21848 rtx tmp2 = gen_reg_rtx (imode);
21849 emit_insn (unpack (tmp2, src, tmp));
21850 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), tmp2));
21851 }
21852 }
21853
21854 /* Expand conditional increment or decrement using adb/sbb instructions.
21855 The default case using setcc followed by the conditional move can be
21856 done by generic code. */
21857 bool
21858 ix86_expand_int_addcc (rtx operands[])
21859 {
21860 enum rtx_code code = GET_CODE (operands[1]);
21861 rtx flags;
21862 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
21863 rtx compare_op;
21864 rtx val = const0_rtx;
21865 bool fpcmp = false;
21866 enum machine_mode mode;
21867 rtx op0 = XEXP (operands[1], 0);
21868 rtx op1 = XEXP (operands[1], 1);
21869
21870 if (operands[3] != const1_rtx
21871 && operands[3] != constm1_rtx)
21872 return false;
21873 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
21874 return false;
21875 code = GET_CODE (compare_op);
21876
21877 flags = XEXP (compare_op, 0);
21878
21879 if (GET_MODE (flags) == CCFPmode
21880 || GET_MODE (flags) == CCFPUmode)
21881 {
21882 fpcmp = true;
21883 code = ix86_fp_compare_code_to_integer (code);
21884 }
21885
21886 if (code != LTU)
21887 {
21888 val = constm1_rtx;
21889 if (fpcmp)
21890 PUT_CODE (compare_op,
21891 reverse_condition_maybe_unordered
21892 (GET_CODE (compare_op)));
21893 else
21894 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
21895 }
21896
21897 mode = GET_MODE (operands[0]);
21898
21899 /* Construct either adc or sbb insn. */
21900 if ((code == LTU) == (operands[3] == constm1_rtx))
21901 {
21902 switch (mode)
21903 {
21904 case QImode:
21905 insn = gen_subqi3_carry;
21906 break;
21907 case HImode:
21908 insn = gen_subhi3_carry;
21909 break;
21910 case SImode:
21911 insn = gen_subsi3_carry;
21912 break;
21913 case DImode:
21914 insn = gen_subdi3_carry;
21915 break;
21916 default:
21917 gcc_unreachable ();
21918 }
21919 }
21920 else
21921 {
21922 switch (mode)
21923 {
21924 case QImode:
21925 insn = gen_addqi3_carry;
21926 break;
21927 case HImode:
21928 insn = gen_addhi3_carry;
21929 break;
21930 case SImode:
21931 insn = gen_addsi3_carry;
21932 break;
21933 case DImode:
21934 insn = gen_adddi3_carry;
21935 break;
21936 default:
21937 gcc_unreachable ();
21938 }
21939 }
21940 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
21941
21942 return true;
21943 }
21944
21945
21946 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
21947 but works for floating pointer parameters and nonoffsetable memories.
21948 For pushes, it returns just stack offsets; the values will be saved
21949 in the right order. Maximally three parts are generated. */
21950
21951 static int
21952 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
21953 {
21954 int size;
21955
21956 if (!TARGET_64BIT)
21957 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
21958 else
21959 size = (GET_MODE_SIZE (mode) + 4) / 8;
21960
21961 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
21962 gcc_assert (size >= 2 && size <= 4);
21963
21964 /* Optimize constant pool reference to immediates. This is used by fp
21965 moves, that force all constants to memory to allow combining. */
21966 if (MEM_P (operand) && MEM_READONLY_P (operand))
21967 {
21968 rtx tmp = maybe_get_pool_constant (operand);
21969 if (tmp)
21970 operand = tmp;
21971 }
21972
21973 if (MEM_P (operand) && !offsettable_memref_p (operand))
21974 {
21975 /* The only non-offsetable memories we handle are pushes. */
21976 int ok = push_operand (operand, VOIDmode);
21977
21978 gcc_assert (ok);
21979
21980 operand = copy_rtx (operand);
21981 PUT_MODE (operand, word_mode);
21982 parts[0] = parts[1] = parts[2] = parts[3] = operand;
21983 return size;
21984 }
21985
21986 if (GET_CODE (operand) == CONST_VECTOR)
21987 {
21988 enum machine_mode imode = int_mode_for_mode (mode);
21989 /* Caution: if we looked through a constant pool memory above,
21990 the operand may actually have a different mode now. That's
21991 ok, since we want to pun this all the way back to an integer. */
21992 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
21993 gcc_assert (operand != NULL);
21994 mode = imode;
21995 }
21996
21997 if (!TARGET_64BIT)
21998 {
21999 if (mode == DImode)
22000 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22001 else
22002 {
22003 int i;
22004
22005 if (REG_P (operand))
22006 {
22007 gcc_assert (reload_completed);
22008 for (i = 0; i < size; i++)
22009 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
22010 }
22011 else if (offsettable_memref_p (operand))
22012 {
22013 operand = adjust_address (operand, SImode, 0);
22014 parts[0] = operand;
22015 for (i = 1; i < size; i++)
22016 parts[i] = adjust_address (operand, SImode, 4 * i);
22017 }
22018 else if (GET_CODE (operand) == CONST_DOUBLE)
22019 {
22020 REAL_VALUE_TYPE r;
22021 long l[4];
22022
22023 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22024 switch (mode)
22025 {
22026 case TFmode:
22027 real_to_target (l, &r, mode);
22028 parts[3] = gen_int_mode (l[3], SImode);
22029 parts[2] = gen_int_mode (l[2], SImode);
22030 break;
22031 case XFmode:
22032 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
22033 long double may not be 80-bit. */
22034 real_to_target (l, &r, mode);
22035 parts[2] = gen_int_mode (l[2], SImode);
22036 break;
22037 case DFmode:
22038 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
22039 break;
22040 default:
22041 gcc_unreachable ();
22042 }
22043 parts[1] = gen_int_mode (l[1], SImode);
22044 parts[0] = gen_int_mode (l[0], SImode);
22045 }
22046 else
22047 gcc_unreachable ();
22048 }
22049 }
22050 else
22051 {
22052 if (mode == TImode)
22053 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22054 if (mode == XFmode || mode == TFmode)
22055 {
22056 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
22057 if (REG_P (operand))
22058 {
22059 gcc_assert (reload_completed);
22060 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
22061 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
22062 }
22063 else if (offsettable_memref_p (operand))
22064 {
22065 operand = adjust_address (operand, DImode, 0);
22066 parts[0] = operand;
22067 parts[1] = adjust_address (operand, upper_mode, 8);
22068 }
22069 else if (GET_CODE (operand) == CONST_DOUBLE)
22070 {
22071 REAL_VALUE_TYPE r;
22072 long l[4];
22073
22074 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22075 real_to_target (l, &r, mode);
22076
22077 /* Do not use shift by 32 to avoid warning on 32bit systems. */
22078 if (HOST_BITS_PER_WIDE_INT >= 64)
22079 parts[0]
22080 = gen_int_mode
22081 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
22082 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
22083 DImode);
22084 else
22085 parts[0] = immed_double_const (l[0], l[1], DImode);
22086
22087 if (upper_mode == SImode)
22088 parts[1] = gen_int_mode (l[2], SImode);
22089 else if (HOST_BITS_PER_WIDE_INT >= 64)
22090 parts[1]
22091 = gen_int_mode
22092 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
22093 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
22094 DImode);
22095 else
22096 parts[1] = immed_double_const (l[2], l[3], DImode);
22097 }
22098 else
22099 gcc_unreachable ();
22100 }
22101 }
22102
22103 return size;
22104 }
22105
22106 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
22107 Return false when normal moves are needed; true when all required
22108 insns have been emitted. Operands 2-4 contain the input values
22109 int the correct order; operands 5-7 contain the output values. */
22110
22111 void
22112 ix86_split_long_move (rtx operands[])
22113 {
22114 rtx part[2][4];
22115 int nparts, i, j;
22116 int push = 0;
22117 int collisions = 0;
22118 enum machine_mode mode = GET_MODE (operands[0]);
22119 bool collisionparts[4];
22120
22121 /* The DFmode expanders may ask us to move double.
22122 For 64bit target this is single move. By hiding the fact
22123 here we simplify i386.md splitters. */
22124 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
22125 {
22126 /* Optimize constant pool reference to immediates. This is used by
22127 fp moves, that force all constants to memory to allow combining. */
22128
22129 if (MEM_P (operands[1])
22130 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
22131 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
22132 operands[1] = get_pool_constant (XEXP (operands[1], 0));
22133 if (push_operand (operands[0], VOIDmode))
22134 {
22135 operands[0] = copy_rtx (operands[0]);
22136 PUT_MODE (operands[0], word_mode);
22137 }
22138 else
22139 operands[0] = gen_lowpart (DImode, operands[0]);
22140 operands[1] = gen_lowpart (DImode, operands[1]);
22141 emit_move_insn (operands[0], operands[1]);
22142 return;
22143 }
22144
22145 /* The only non-offsettable memory we handle is push. */
22146 if (push_operand (operands[0], VOIDmode))
22147 push = 1;
22148 else
22149 gcc_assert (!MEM_P (operands[0])
22150 || offsettable_memref_p (operands[0]));
22151
22152 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
22153 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
22154
22155 /* When emitting push, take care for source operands on the stack. */
22156 if (push && MEM_P (operands[1])
22157 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
22158 {
22159 rtx src_base = XEXP (part[1][nparts - 1], 0);
22160
22161 /* Compensate for the stack decrement by 4. */
22162 if (!TARGET_64BIT && nparts == 3
22163 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
22164 src_base = plus_constant (Pmode, src_base, 4);
22165
22166 /* src_base refers to the stack pointer and is
22167 automatically decreased by emitted push. */
22168 for (i = 0; i < nparts; i++)
22169 part[1][i] = change_address (part[1][i],
22170 GET_MODE (part[1][i]), src_base);
22171 }
22172
22173 /* We need to do copy in the right order in case an address register
22174 of the source overlaps the destination. */
22175 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
22176 {
22177 rtx tmp;
22178
22179 for (i = 0; i < nparts; i++)
22180 {
22181 collisionparts[i]
22182 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
22183 if (collisionparts[i])
22184 collisions++;
22185 }
22186
22187 /* Collision in the middle part can be handled by reordering. */
22188 if (collisions == 1 && nparts == 3 && collisionparts [1])
22189 {
22190 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
22191 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
22192 }
22193 else if (collisions == 1
22194 && nparts == 4
22195 && (collisionparts [1] || collisionparts [2]))
22196 {
22197 if (collisionparts [1])
22198 {
22199 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
22200 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
22201 }
22202 else
22203 {
22204 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
22205 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
22206 }
22207 }
22208
22209 /* If there are more collisions, we can't handle it by reordering.
22210 Do an lea to the last part and use only one colliding move. */
22211 else if (collisions > 1)
22212 {
22213 rtx base;
22214
22215 collisions = 1;
22216
22217 base = part[0][nparts - 1];
22218
22219 /* Handle the case when the last part isn't valid for lea.
22220 Happens in 64-bit mode storing the 12-byte XFmode. */
22221 if (GET_MODE (base) != Pmode)
22222 base = gen_rtx_REG (Pmode, REGNO (base));
22223
22224 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
22225 part[1][0] = replace_equiv_address (part[1][0], base);
22226 for (i = 1; i < nparts; i++)
22227 {
22228 tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i);
22229 part[1][i] = replace_equiv_address (part[1][i], tmp);
22230 }
22231 }
22232 }
22233
22234 if (push)
22235 {
22236 if (!TARGET_64BIT)
22237 {
22238 if (nparts == 3)
22239 {
22240 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
22241 emit_insn (ix86_gen_add3 (stack_pointer_rtx,
22242 stack_pointer_rtx, GEN_INT (-4)));
22243 emit_move_insn (part[0][2], part[1][2]);
22244 }
22245 else if (nparts == 4)
22246 {
22247 emit_move_insn (part[0][3], part[1][3]);
22248 emit_move_insn (part[0][2], part[1][2]);
22249 }
22250 }
22251 else
22252 {
22253 /* In 64bit mode we don't have 32bit push available. In case this is
22254 register, it is OK - we will just use larger counterpart. We also
22255 retype memory - these comes from attempt to avoid REX prefix on
22256 moving of second half of TFmode value. */
22257 if (GET_MODE (part[1][1]) == SImode)
22258 {
22259 switch (GET_CODE (part[1][1]))
22260 {
22261 case MEM:
22262 part[1][1] = adjust_address (part[1][1], DImode, 0);
22263 break;
22264
22265 case REG:
22266 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
22267 break;
22268
22269 default:
22270 gcc_unreachable ();
22271 }
22272
22273 if (GET_MODE (part[1][0]) == SImode)
22274 part[1][0] = part[1][1];
22275 }
22276 }
22277 emit_move_insn (part[0][1], part[1][1]);
22278 emit_move_insn (part[0][0], part[1][0]);
22279 return;
22280 }
22281
22282 /* Choose correct order to not overwrite the source before it is copied. */
22283 if ((REG_P (part[0][0])
22284 && REG_P (part[1][1])
22285 && (REGNO (part[0][0]) == REGNO (part[1][1])
22286 || (nparts == 3
22287 && REGNO (part[0][0]) == REGNO (part[1][2]))
22288 || (nparts == 4
22289 && REGNO (part[0][0]) == REGNO (part[1][3]))))
22290 || (collisions > 0
22291 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
22292 {
22293 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
22294 {
22295 operands[2 + i] = part[0][j];
22296 operands[6 + i] = part[1][j];
22297 }
22298 }
22299 else
22300 {
22301 for (i = 0; i < nparts; i++)
22302 {
22303 operands[2 + i] = part[0][i];
22304 operands[6 + i] = part[1][i];
22305 }
22306 }
22307
22308 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
22309 if (optimize_insn_for_size_p ())
22310 {
22311 for (j = 0; j < nparts - 1; j++)
22312 if (CONST_INT_P (operands[6 + j])
22313 && operands[6 + j] != const0_rtx
22314 && REG_P (operands[2 + j]))
22315 for (i = j; i < nparts - 1; i++)
22316 if (CONST_INT_P (operands[7 + i])
22317 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
22318 operands[7 + i] = operands[2 + j];
22319 }
22320
22321 for (i = 0; i < nparts; i++)
22322 emit_move_insn (operands[2 + i], operands[6 + i]);
22323
22324 return;
22325 }
22326
22327 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
22328 left shift by a constant, either using a single shift or
22329 a sequence of add instructions. */
22330
22331 static void
22332 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
22333 {
22334 rtx (*insn)(rtx, rtx, rtx);
22335
22336 if (count == 1
22337 || (count * ix86_cost->add <= ix86_cost->shift_const
22338 && !optimize_insn_for_size_p ()))
22339 {
22340 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
22341 while (count-- > 0)
22342 emit_insn (insn (operand, operand, operand));
22343 }
22344 else
22345 {
22346 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
22347 emit_insn (insn (operand, operand, GEN_INT (count)));
22348 }
22349 }
22350
22351 void
22352 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
22353 {
22354 rtx (*gen_ashl3)(rtx, rtx, rtx);
22355 rtx (*gen_shld)(rtx, rtx, rtx);
22356 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22357
22358 rtx low[2], high[2];
22359 int count;
22360
22361 if (CONST_INT_P (operands[2]))
22362 {
22363 split_double_mode (mode, operands, 2, low, high);
22364 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22365
22366 if (count >= half_width)
22367 {
22368 emit_move_insn (high[0], low[1]);
22369 emit_move_insn (low[0], const0_rtx);
22370
22371 if (count > half_width)
22372 ix86_expand_ashl_const (high[0], count - half_width, mode);
22373 }
22374 else
22375 {
22376 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
22377
22378 if (!rtx_equal_p (operands[0], operands[1]))
22379 emit_move_insn (operands[0], operands[1]);
22380
22381 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
22382 ix86_expand_ashl_const (low[0], count, mode);
22383 }
22384 return;
22385 }
22386
22387 split_double_mode (mode, operands, 1, low, high);
22388
22389 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
22390
22391 if (operands[1] == const1_rtx)
22392 {
22393 /* Assuming we've chosen a QImode capable registers, then 1 << N
22394 can be done with two 32/64-bit shifts, no branches, no cmoves. */
22395 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
22396 {
22397 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
22398
22399 ix86_expand_clear (low[0]);
22400 ix86_expand_clear (high[0]);
22401 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
22402
22403 d = gen_lowpart (QImode, low[0]);
22404 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
22405 s = gen_rtx_EQ (QImode, flags, const0_rtx);
22406 emit_insn (gen_rtx_SET (VOIDmode, d, s));
22407
22408 d = gen_lowpart (QImode, high[0]);
22409 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
22410 s = gen_rtx_NE (QImode, flags, const0_rtx);
22411 emit_insn (gen_rtx_SET (VOIDmode, d, s));
22412 }
22413
22414 /* Otherwise, we can get the same results by manually performing
22415 a bit extract operation on bit 5/6, and then performing the two
22416 shifts. The two methods of getting 0/1 into low/high are exactly
22417 the same size. Avoiding the shift in the bit extract case helps
22418 pentium4 a bit; no one else seems to care much either way. */
22419 else
22420 {
22421 enum machine_mode half_mode;
22422 rtx (*gen_lshr3)(rtx, rtx, rtx);
22423 rtx (*gen_and3)(rtx, rtx, rtx);
22424 rtx (*gen_xor3)(rtx, rtx, rtx);
22425 HOST_WIDE_INT bits;
22426 rtx x;
22427
22428 if (mode == DImode)
22429 {
22430 half_mode = SImode;
22431 gen_lshr3 = gen_lshrsi3;
22432 gen_and3 = gen_andsi3;
22433 gen_xor3 = gen_xorsi3;
22434 bits = 5;
22435 }
22436 else
22437 {
22438 half_mode = DImode;
22439 gen_lshr3 = gen_lshrdi3;
22440 gen_and3 = gen_anddi3;
22441 gen_xor3 = gen_xordi3;
22442 bits = 6;
22443 }
22444
22445 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
22446 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
22447 else
22448 x = gen_lowpart (half_mode, operands[2]);
22449 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
22450
22451 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
22452 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
22453 emit_move_insn (low[0], high[0]);
22454 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
22455 }
22456
22457 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
22458 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
22459 return;
22460 }
22461
22462 if (operands[1] == constm1_rtx)
22463 {
22464 /* For -1 << N, we can avoid the shld instruction, because we
22465 know that we're shifting 0...31/63 ones into a -1. */
22466 emit_move_insn (low[0], constm1_rtx);
22467 if (optimize_insn_for_size_p ())
22468 emit_move_insn (high[0], low[0]);
22469 else
22470 emit_move_insn (high[0], constm1_rtx);
22471 }
22472 else
22473 {
22474 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
22475
22476 if (!rtx_equal_p (operands[0], operands[1]))
22477 emit_move_insn (operands[0], operands[1]);
22478
22479 split_double_mode (mode, operands, 1, low, high);
22480 emit_insn (gen_shld (high[0], low[0], operands[2]));
22481 }
22482
22483 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
22484
22485 if (TARGET_CMOVE && scratch)
22486 {
22487 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22488 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22489
22490 ix86_expand_clear (scratch);
22491 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
22492 }
22493 else
22494 {
22495 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
22496 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
22497
22498 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
22499 }
22500 }
22501
22502 void
22503 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
22504 {
22505 rtx (*gen_ashr3)(rtx, rtx, rtx)
22506 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
22507 rtx (*gen_shrd)(rtx, rtx, rtx);
22508 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22509
22510 rtx low[2], high[2];
22511 int count;
22512
22513 if (CONST_INT_P (operands[2]))
22514 {
22515 split_double_mode (mode, operands, 2, low, high);
22516 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22517
22518 if (count == GET_MODE_BITSIZE (mode) - 1)
22519 {
22520 emit_move_insn (high[0], high[1]);
22521 emit_insn (gen_ashr3 (high[0], high[0],
22522 GEN_INT (half_width - 1)));
22523 emit_move_insn (low[0], high[0]);
22524
22525 }
22526 else if (count >= half_width)
22527 {
22528 emit_move_insn (low[0], high[1]);
22529 emit_move_insn (high[0], low[0]);
22530 emit_insn (gen_ashr3 (high[0], high[0],
22531 GEN_INT (half_width - 1)));
22532
22533 if (count > half_width)
22534 emit_insn (gen_ashr3 (low[0], low[0],
22535 GEN_INT (count - half_width)));
22536 }
22537 else
22538 {
22539 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22540
22541 if (!rtx_equal_p (operands[0], operands[1]))
22542 emit_move_insn (operands[0], operands[1]);
22543
22544 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
22545 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
22546 }
22547 }
22548 else
22549 {
22550 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22551
22552 if (!rtx_equal_p (operands[0], operands[1]))
22553 emit_move_insn (operands[0], operands[1]);
22554
22555 split_double_mode (mode, operands, 1, low, high);
22556
22557 emit_insn (gen_shrd (low[0], high[0], operands[2]));
22558 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
22559
22560 if (TARGET_CMOVE && scratch)
22561 {
22562 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22563 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22564
22565 emit_move_insn (scratch, high[0]);
22566 emit_insn (gen_ashr3 (scratch, scratch,
22567 GEN_INT (half_width - 1)));
22568 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
22569 scratch));
22570 }
22571 else
22572 {
22573 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
22574 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
22575
22576 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
22577 }
22578 }
22579 }
22580
22581 void
22582 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
22583 {
22584 rtx (*gen_lshr3)(rtx, rtx, rtx)
22585 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
22586 rtx (*gen_shrd)(rtx, rtx, rtx);
22587 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22588
22589 rtx low[2], high[2];
22590 int count;
22591
22592 if (CONST_INT_P (operands[2]))
22593 {
22594 split_double_mode (mode, operands, 2, low, high);
22595 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22596
22597 if (count >= half_width)
22598 {
22599 emit_move_insn (low[0], high[1]);
22600 ix86_expand_clear (high[0]);
22601
22602 if (count > half_width)
22603 emit_insn (gen_lshr3 (low[0], low[0],
22604 GEN_INT (count - half_width)));
22605 }
22606 else
22607 {
22608 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22609
22610 if (!rtx_equal_p (operands[0], operands[1]))
22611 emit_move_insn (operands[0], operands[1]);
22612
22613 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
22614 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
22615 }
22616 }
22617 else
22618 {
22619 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22620
22621 if (!rtx_equal_p (operands[0], operands[1]))
22622 emit_move_insn (operands[0], operands[1]);
22623
22624 split_double_mode (mode, operands, 1, low, high);
22625
22626 emit_insn (gen_shrd (low[0], high[0], operands[2]));
22627 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
22628
22629 if (TARGET_CMOVE && scratch)
22630 {
22631 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22632 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22633
22634 ix86_expand_clear (scratch);
22635 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
22636 scratch));
22637 }
22638 else
22639 {
22640 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
22641 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
22642
22643 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
22644 }
22645 }
22646 }
22647
22648 /* Predict just emitted jump instruction to be taken with probability PROB. */
22649 static void
22650 predict_jump (int prob)
22651 {
22652 rtx insn = get_last_insn ();
22653 gcc_assert (JUMP_P (insn));
22654 add_int_reg_note (insn, REG_BR_PROB, prob);
22655 }
22656
22657 /* Helper function for the string operations below. Dest VARIABLE whether
22658 it is aligned to VALUE bytes. If true, jump to the label. */
22659 static rtx
22660 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
22661 {
22662 rtx label = gen_label_rtx ();
22663 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
22664 if (GET_MODE (variable) == DImode)
22665 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
22666 else
22667 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
22668 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
22669 1, label);
22670 if (epilogue)
22671 predict_jump (REG_BR_PROB_BASE * 50 / 100);
22672 else
22673 predict_jump (REG_BR_PROB_BASE * 90 / 100);
22674 return label;
22675 }
22676
22677 /* Adjust COUNTER by the VALUE. */
22678 static void
22679 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
22680 {
22681 rtx (*gen_add)(rtx, rtx, rtx)
22682 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
22683
22684 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
22685 }
22686
22687 /* Zero extend possibly SImode EXP to Pmode register. */
22688 rtx
22689 ix86_zero_extend_to_Pmode (rtx exp)
22690 {
22691 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
22692 }
22693
22694 /* Divide COUNTREG by SCALE. */
22695 static rtx
22696 scale_counter (rtx countreg, int scale)
22697 {
22698 rtx sc;
22699
22700 if (scale == 1)
22701 return countreg;
22702 if (CONST_INT_P (countreg))
22703 return GEN_INT (INTVAL (countreg) / scale);
22704 gcc_assert (REG_P (countreg));
22705
22706 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
22707 GEN_INT (exact_log2 (scale)),
22708 NULL, 1, OPTAB_DIRECT);
22709 return sc;
22710 }
22711
22712 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
22713 DImode for constant loop counts. */
22714
22715 static enum machine_mode
22716 counter_mode (rtx count_exp)
22717 {
22718 if (GET_MODE (count_exp) != VOIDmode)
22719 return GET_MODE (count_exp);
22720 if (!CONST_INT_P (count_exp))
22721 return Pmode;
22722 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
22723 return DImode;
22724 return SImode;
22725 }
22726
22727 /* Copy the address to a Pmode register. This is used for x32 to
22728 truncate DImode TLS address to a SImode register. */
22729
22730 static rtx
22731 ix86_copy_addr_to_reg (rtx addr)
22732 {
22733 if (GET_MODE (addr) == Pmode)
22734 return copy_addr_to_reg (addr);
22735 else
22736 {
22737 gcc_assert (GET_MODE (addr) == DImode && Pmode == SImode);
22738 return gen_rtx_SUBREG (SImode, copy_to_mode_reg (DImode, addr), 0);
22739 }
22740 }
22741
22742 /* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR
22743 to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT
22744 specified in bytes. When ISSETMEM is TRUE, output the equivalent loop to set
22745 memory by VALUE (supposed to be in MODE).
22746
22747 The size is rounded down to whole number of chunk size moved at once.
22748 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
22749
22750
22751 static void
22752 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
22753 rtx destptr, rtx srcptr, rtx value,
22754 rtx count, enum machine_mode mode, int unroll,
22755 int expected_size, bool issetmem)
22756 {
22757 rtx out_label, top_label, iter, tmp;
22758 enum machine_mode iter_mode = counter_mode (count);
22759 int piece_size_n = GET_MODE_SIZE (mode) * unroll;
22760 rtx piece_size = GEN_INT (piece_size_n);
22761 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
22762 rtx size;
22763 int i;
22764
22765 top_label = gen_label_rtx ();
22766 out_label = gen_label_rtx ();
22767 iter = gen_reg_rtx (iter_mode);
22768
22769 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
22770 NULL, 1, OPTAB_DIRECT);
22771 /* Those two should combine. */
22772 if (piece_size == const1_rtx)
22773 {
22774 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
22775 true, out_label);
22776 predict_jump (REG_BR_PROB_BASE * 10 / 100);
22777 }
22778 emit_move_insn (iter, const0_rtx);
22779
22780 emit_label (top_label);
22781
22782 tmp = convert_modes (Pmode, iter_mode, iter, true);
22783
22784 /* This assert could be relaxed - in this case we'll need to compute
22785 smallest power of two, containing in PIECE_SIZE_N and pass it to
22786 offset_address. */
22787 gcc_assert ((piece_size_n & (piece_size_n - 1)) == 0);
22788 destmem = offset_address (destmem, tmp, piece_size_n);
22789 destmem = adjust_address (destmem, mode, 0);
22790
22791 if (!issetmem)
22792 {
22793 srcmem = offset_address (srcmem, copy_rtx (tmp), piece_size_n);
22794 srcmem = adjust_address (srcmem, mode, 0);
22795
22796 /* When unrolling for chips that reorder memory reads and writes,
22797 we can save registers by using single temporary.
22798 Also using 4 temporaries is overkill in 32bit mode. */
22799 if (!TARGET_64BIT && 0)
22800 {
22801 for (i = 0; i < unroll; i++)
22802 {
22803 if (i)
22804 {
22805 destmem =
22806 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
22807 srcmem =
22808 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
22809 }
22810 emit_move_insn (destmem, srcmem);
22811 }
22812 }
22813 else
22814 {
22815 rtx tmpreg[4];
22816 gcc_assert (unroll <= 4);
22817 for (i = 0; i < unroll; i++)
22818 {
22819 tmpreg[i] = gen_reg_rtx (mode);
22820 if (i)
22821 {
22822 srcmem =
22823 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
22824 }
22825 emit_move_insn (tmpreg[i], srcmem);
22826 }
22827 for (i = 0; i < unroll; i++)
22828 {
22829 if (i)
22830 {
22831 destmem =
22832 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
22833 }
22834 emit_move_insn (destmem, tmpreg[i]);
22835 }
22836 }
22837 }
22838 else
22839 for (i = 0; i < unroll; i++)
22840 {
22841 if (i)
22842 destmem =
22843 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
22844 emit_move_insn (destmem, value);
22845 }
22846
22847 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
22848 true, OPTAB_LIB_WIDEN);
22849 if (tmp != iter)
22850 emit_move_insn (iter, tmp);
22851
22852 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
22853 true, top_label);
22854 if (expected_size != -1)
22855 {
22856 expected_size /= GET_MODE_SIZE (mode) * unroll;
22857 if (expected_size == 0)
22858 predict_jump (0);
22859 else if (expected_size > REG_BR_PROB_BASE)
22860 predict_jump (REG_BR_PROB_BASE - 1);
22861 else
22862 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
22863 }
22864 else
22865 predict_jump (REG_BR_PROB_BASE * 80 / 100);
22866 iter = ix86_zero_extend_to_Pmode (iter);
22867 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
22868 true, OPTAB_LIB_WIDEN);
22869 if (tmp != destptr)
22870 emit_move_insn (destptr, tmp);
22871 if (!issetmem)
22872 {
22873 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
22874 true, OPTAB_LIB_WIDEN);
22875 if (tmp != srcptr)
22876 emit_move_insn (srcptr, tmp);
22877 }
22878 emit_label (out_label);
22879 }
22880
22881 /* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument.
22882 When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored.
22883 When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored.
22884 For setmem case, VALUE is a promoted to a wider size ORIG_VALUE.
22885 ORIG_VALUE is the original value passed to memset to fill the memory with.
22886 Other arguments have same meaning as for previous function. */
22887
22888 static void
22889 expand_set_or_movmem_via_rep (rtx destmem, rtx srcmem,
22890 rtx destptr, rtx srcptr, rtx value, rtx orig_value,
22891 rtx count,
22892 enum machine_mode mode, bool issetmem)
22893 {
22894 rtx destexp;
22895 rtx srcexp;
22896 rtx countreg;
22897 HOST_WIDE_INT rounded_count;
22898
22899 /* If possible, it is shorter to use rep movs.
22900 TODO: Maybe it is better to move this logic to decide_alg. */
22901 if (mode == QImode && CONST_INT_P (count) && !(INTVAL (count) & 3)
22902 && (!issetmem || orig_value == const0_rtx))
22903 mode = SImode;
22904
22905 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
22906 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
22907
22908 countreg = ix86_zero_extend_to_Pmode (scale_counter (count,
22909 GET_MODE_SIZE (mode)));
22910 if (mode != QImode)
22911 {
22912 destexp = gen_rtx_ASHIFT (Pmode, countreg,
22913 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
22914 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
22915 }
22916 else
22917 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
22918 if ((!issetmem || orig_value == const0_rtx) && CONST_INT_P (count))
22919 {
22920 rounded_count = (INTVAL (count)
22921 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
22922 destmem = shallow_copy_rtx (destmem);
22923 set_mem_size (destmem, rounded_count);
22924 }
22925 else if (MEM_SIZE_KNOWN_P (destmem))
22926 clear_mem_size (destmem);
22927
22928 if (issetmem)
22929 {
22930 value = force_reg (mode, gen_lowpart (mode, value));
22931 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
22932 }
22933 else
22934 {
22935 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
22936 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
22937 if (mode != QImode)
22938 {
22939 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
22940 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
22941 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
22942 }
22943 else
22944 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
22945 if (CONST_INT_P (count))
22946 {
22947 rounded_count = (INTVAL (count)
22948 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
22949 srcmem = shallow_copy_rtx (srcmem);
22950 set_mem_size (srcmem, rounded_count);
22951 }
22952 else
22953 {
22954 if (MEM_SIZE_KNOWN_P (srcmem))
22955 clear_mem_size (srcmem);
22956 }
22957 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
22958 destexp, srcexp));
22959 }
22960 }
22961
22962 /* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to
22963 DESTMEM.
22964 SRC is passed by pointer to be updated on return.
22965 Return value is updated DST. */
22966 static rtx
22967 emit_memmov (rtx destmem, rtx *srcmem, rtx destptr, rtx srcptr,
22968 HOST_WIDE_INT size_to_move)
22969 {
22970 rtx dst = destmem, src = *srcmem, adjust, tempreg;
22971 enum insn_code code;
22972 enum machine_mode move_mode;
22973 int piece_size, i;
22974
22975 /* Find the widest mode in which we could perform moves.
22976 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
22977 it until move of such size is supported. */
22978 piece_size = 1 << floor_log2 (size_to_move);
22979 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
22980 code = optab_handler (mov_optab, move_mode);
22981 while (code == CODE_FOR_nothing && piece_size > 1)
22982 {
22983 piece_size >>= 1;
22984 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
22985 code = optab_handler (mov_optab, move_mode);
22986 }
22987
22988 /* Find the corresponding vector mode with the same size as MOVE_MODE.
22989 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
22990 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
22991 {
22992 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
22993 move_mode = mode_for_vector (word_mode, nunits);
22994 code = optab_handler (mov_optab, move_mode);
22995 if (code == CODE_FOR_nothing)
22996 {
22997 move_mode = word_mode;
22998 piece_size = GET_MODE_SIZE (move_mode);
22999 code = optab_handler (mov_optab, move_mode);
23000 }
23001 }
23002 gcc_assert (code != CODE_FOR_nothing);
23003
23004 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23005 src = adjust_automodify_address_nv (src, move_mode, srcptr, 0);
23006
23007 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23008 gcc_assert (size_to_move % piece_size == 0);
23009 adjust = GEN_INT (piece_size);
23010 for (i = 0; i < size_to_move; i += piece_size)
23011 {
23012 /* We move from memory to memory, so we'll need to do it via
23013 a temporary register. */
23014 tempreg = gen_reg_rtx (move_mode);
23015 emit_insn (GEN_FCN (code) (tempreg, src));
23016 emit_insn (GEN_FCN (code) (dst, tempreg));
23017
23018 emit_move_insn (destptr,
23019 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23020 emit_move_insn (srcptr,
23021 gen_rtx_PLUS (Pmode, copy_rtx (srcptr), adjust));
23022
23023 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23024 piece_size);
23025 src = adjust_automodify_address_nv (src, move_mode, srcptr,
23026 piece_size);
23027 }
23028
23029 /* Update DST and SRC rtx. */
23030 *srcmem = src;
23031 return dst;
23032 }
23033
23034 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
23035 static void
23036 expand_movmem_epilogue (rtx destmem, rtx srcmem,
23037 rtx destptr, rtx srcptr, rtx count, int max_size)
23038 {
23039 rtx src, dest;
23040 if (CONST_INT_P (count))
23041 {
23042 HOST_WIDE_INT countval = INTVAL (count);
23043 HOST_WIDE_INT epilogue_size = countval % max_size;
23044 int i;
23045
23046 /* For now MAX_SIZE should be a power of 2. This assert could be
23047 relaxed, but it'll require a bit more complicated epilogue
23048 expanding. */
23049 gcc_assert ((max_size & (max_size - 1)) == 0);
23050 for (i = max_size; i >= 1; i >>= 1)
23051 {
23052 if (epilogue_size & i)
23053 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23054 }
23055 return;
23056 }
23057 if (max_size > 8)
23058 {
23059 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
23060 count, 1, OPTAB_DIRECT);
23061 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
23062 count, QImode, 1, 4, false);
23063 return;
23064 }
23065
23066 /* When there are stringops, we can cheaply increase dest and src pointers.
23067 Otherwise we save code size by maintaining offset (zero is readily
23068 available from preceding rep operation) and using x86 addressing modes.
23069 */
23070 if (TARGET_SINGLE_STRINGOP)
23071 {
23072 if (max_size > 4)
23073 {
23074 rtx label = ix86_expand_aligntest (count, 4, true);
23075 src = change_address (srcmem, SImode, srcptr);
23076 dest = change_address (destmem, SImode, destptr);
23077 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23078 emit_label (label);
23079 LABEL_NUSES (label) = 1;
23080 }
23081 if (max_size > 2)
23082 {
23083 rtx label = ix86_expand_aligntest (count, 2, true);
23084 src = change_address (srcmem, HImode, srcptr);
23085 dest = change_address (destmem, HImode, destptr);
23086 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23087 emit_label (label);
23088 LABEL_NUSES (label) = 1;
23089 }
23090 if (max_size > 1)
23091 {
23092 rtx label = ix86_expand_aligntest (count, 1, true);
23093 src = change_address (srcmem, QImode, srcptr);
23094 dest = change_address (destmem, QImode, destptr);
23095 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23096 emit_label (label);
23097 LABEL_NUSES (label) = 1;
23098 }
23099 }
23100 else
23101 {
23102 rtx offset = force_reg (Pmode, const0_rtx);
23103 rtx tmp;
23104
23105 if (max_size > 4)
23106 {
23107 rtx label = ix86_expand_aligntest (count, 4, true);
23108 src = change_address (srcmem, SImode, srcptr);
23109 dest = change_address (destmem, SImode, destptr);
23110 emit_move_insn (dest, src);
23111 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
23112 true, OPTAB_LIB_WIDEN);
23113 if (tmp != offset)
23114 emit_move_insn (offset, tmp);
23115 emit_label (label);
23116 LABEL_NUSES (label) = 1;
23117 }
23118 if (max_size > 2)
23119 {
23120 rtx label = ix86_expand_aligntest (count, 2, true);
23121 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23122 src = change_address (srcmem, HImode, tmp);
23123 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23124 dest = change_address (destmem, HImode, tmp);
23125 emit_move_insn (dest, src);
23126 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
23127 true, OPTAB_LIB_WIDEN);
23128 if (tmp != offset)
23129 emit_move_insn (offset, tmp);
23130 emit_label (label);
23131 LABEL_NUSES (label) = 1;
23132 }
23133 if (max_size > 1)
23134 {
23135 rtx label = ix86_expand_aligntest (count, 1, true);
23136 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23137 src = change_address (srcmem, QImode, tmp);
23138 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23139 dest = change_address (destmem, QImode, tmp);
23140 emit_move_insn (dest, src);
23141 emit_label (label);
23142 LABEL_NUSES (label) = 1;
23143 }
23144 }
23145 }
23146
23147 /* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM
23148 with value PROMOTED_VAL.
23149 SRC is passed by pointer to be updated on return.
23150 Return value is updated DST. */
23151 static rtx
23152 emit_memset (rtx destmem, rtx destptr, rtx promoted_val,
23153 HOST_WIDE_INT size_to_move)
23154 {
23155 rtx dst = destmem, adjust;
23156 enum insn_code code;
23157 enum machine_mode move_mode;
23158 int piece_size, i;
23159
23160 /* Find the widest mode in which we could perform moves.
23161 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23162 it until move of such size is supported. */
23163 move_mode = GET_MODE (promoted_val);
23164 if (move_mode == VOIDmode)
23165 move_mode = QImode;
23166 if (size_to_move < GET_MODE_SIZE (move_mode))
23167 {
23168 move_mode = mode_for_size (size_to_move * BITS_PER_UNIT, MODE_INT, 0);
23169 promoted_val = gen_lowpart (move_mode, promoted_val);
23170 }
23171 piece_size = GET_MODE_SIZE (move_mode);
23172 code = optab_handler (mov_optab, move_mode);
23173 gcc_assert (code != CODE_FOR_nothing && promoted_val != NULL_RTX);
23174
23175 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23176
23177 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23178 gcc_assert (size_to_move % piece_size == 0);
23179 adjust = GEN_INT (piece_size);
23180 for (i = 0; i < size_to_move; i += piece_size)
23181 {
23182 if (piece_size <= GET_MODE_SIZE (word_mode))
23183 {
23184 emit_insn (gen_strset (destptr, dst, promoted_val));
23185 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23186 piece_size);
23187 continue;
23188 }
23189
23190 emit_insn (GEN_FCN (code) (dst, promoted_val));
23191
23192 emit_move_insn (destptr,
23193 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23194
23195 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23196 piece_size);
23197 }
23198
23199 /* Update DST rtx. */
23200 return dst;
23201 }
23202 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23203 static void
23204 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
23205 rtx count, int max_size)
23206 {
23207 count =
23208 expand_simple_binop (counter_mode (count), AND, count,
23209 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
23210 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
23211 gen_lowpart (QImode, value), count, QImode,
23212 1, max_size / 2, true);
23213 }
23214
23215 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23216 static void
23217 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value,
23218 rtx count, int max_size)
23219 {
23220 rtx dest;
23221
23222 if (CONST_INT_P (count))
23223 {
23224 HOST_WIDE_INT countval = INTVAL (count);
23225 HOST_WIDE_INT epilogue_size = countval % max_size;
23226 int i;
23227
23228 /* For now MAX_SIZE should be a power of 2. This assert could be
23229 relaxed, but it'll require a bit more complicated epilogue
23230 expanding. */
23231 gcc_assert ((max_size & (max_size - 1)) == 0);
23232 for (i = max_size; i >= 1; i >>= 1)
23233 {
23234 if (epilogue_size & i)
23235 {
23236 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23237 destmem = emit_memset (destmem, destptr, vec_value, i);
23238 else
23239 destmem = emit_memset (destmem, destptr, value, i);
23240 }
23241 }
23242 return;
23243 }
23244 if (max_size > 32)
23245 {
23246 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
23247 return;
23248 }
23249 if (max_size > 16)
23250 {
23251 rtx label = ix86_expand_aligntest (count, 16, true);
23252 if (TARGET_64BIT)
23253 {
23254 dest = change_address (destmem, DImode, destptr);
23255 emit_insn (gen_strset (destptr, dest, value));
23256 dest = adjust_automodify_address_nv (dest, DImode, destptr, 8);
23257 emit_insn (gen_strset (destptr, dest, value));
23258 }
23259 else
23260 {
23261 dest = change_address (destmem, SImode, destptr);
23262 emit_insn (gen_strset (destptr, dest, value));
23263 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23264 emit_insn (gen_strset (destptr, dest, value));
23265 dest = adjust_automodify_address_nv (dest, SImode, destptr, 8);
23266 emit_insn (gen_strset (destptr, dest, value));
23267 dest = adjust_automodify_address_nv (dest, SImode, destptr, 12);
23268 emit_insn (gen_strset (destptr, dest, value));
23269 }
23270 emit_label (label);
23271 LABEL_NUSES (label) = 1;
23272 }
23273 if (max_size > 8)
23274 {
23275 rtx label = ix86_expand_aligntest (count, 8, true);
23276 if (TARGET_64BIT)
23277 {
23278 dest = change_address (destmem, DImode, destptr);
23279 emit_insn (gen_strset (destptr, dest, value));
23280 }
23281 else
23282 {
23283 dest = change_address (destmem, SImode, destptr);
23284 emit_insn (gen_strset (destptr, dest, value));
23285 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23286 emit_insn (gen_strset (destptr, dest, value));
23287 }
23288 emit_label (label);
23289 LABEL_NUSES (label) = 1;
23290 }
23291 if (max_size > 4)
23292 {
23293 rtx label = ix86_expand_aligntest (count, 4, true);
23294 dest = change_address (destmem, SImode, destptr);
23295 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
23296 emit_label (label);
23297 LABEL_NUSES (label) = 1;
23298 }
23299 if (max_size > 2)
23300 {
23301 rtx label = ix86_expand_aligntest (count, 2, true);
23302 dest = change_address (destmem, HImode, destptr);
23303 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
23304 emit_label (label);
23305 LABEL_NUSES (label) = 1;
23306 }
23307 if (max_size > 1)
23308 {
23309 rtx label = ix86_expand_aligntest (count, 1, true);
23310 dest = change_address (destmem, QImode, destptr);
23311 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
23312 emit_label (label);
23313 LABEL_NUSES (label) = 1;
23314 }
23315 }
23316
23317 /* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to
23318 DESTMEM to align it to DESIRED_ALIGNMENT. Original alignment is ALIGN.
23319 Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are
23320 ignored.
23321 Return value is updated DESTMEM. */
23322 static rtx
23323 expand_set_or_movmem_prologue (rtx destmem, rtx srcmem,
23324 rtx destptr, rtx srcptr, rtx value,
23325 rtx vec_value, rtx count, int align,
23326 int desired_alignment, bool issetmem)
23327 {
23328 int i;
23329 for (i = 1; i < desired_alignment; i <<= 1)
23330 {
23331 if (align <= i)
23332 {
23333 rtx label = ix86_expand_aligntest (destptr, i, false);
23334 if (issetmem)
23335 {
23336 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23337 destmem = emit_memset (destmem, destptr, vec_value, i);
23338 else
23339 destmem = emit_memset (destmem, destptr, value, i);
23340 }
23341 else
23342 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23343 ix86_adjust_counter (count, i);
23344 emit_label (label);
23345 LABEL_NUSES (label) = 1;
23346 set_mem_align (destmem, i * 2 * BITS_PER_UNIT);
23347 }
23348 }
23349 return destmem;
23350 }
23351
23352 /* Test if COUNT&SIZE is nonzero and if so, expand movme
23353 or setmem sequence that is valid for SIZE..2*SIZE-1 bytes
23354 and jump to DONE_LABEL. */
23355 static void
23356 expand_small_movmem_or_setmem (rtx destmem, rtx srcmem,
23357 rtx destptr, rtx srcptr,
23358 rtx value, rtx vec_value,
23359 rtx count, int size,
23360 rtx done_label, bool issetmem)
23361 {
23362 rtx label = ix86_expand_aligntest (count, size, false);
23363 enum machine_mode mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 1);
23364 rtx modesize;
23365 int n;
23366
23367 /* If we do not have vector value to copy, we must reduce size. */
23368 if (issetmem)
23369 {
23370 if (!vec_value)
23371 {
23372 if (GET_MODE (value) == VOIDmode && size > 8)
23373 mode = Pmode;
23374 else if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (value)))
23375 mode = GET_MODE (value);
23376 }
23377 else
23378 mode = GET_MODE (vec_value), value = vec_value;
23379 }
23380 else
23381 {
23382 /* Choose appropriate vector mode. */
23383 if (size >= 32)
23384 mode = TARGET_AVX ? V32QImode : TARGET_SSE ? V16QImode : DImode;
23385 else if (size >= 16)
23386 mode = TARGET_SSE ? V16QImode : DImode;
23387 srcmem = change_address (srcmem, mode, srcptr);
23388 }
23389 destmem = change_address (destmem, mode, destptr);
23390 modesize = GEN_INT (GET_MODE_SIZE (mode));
23391 gcc_assert (GET_MODE_SIZE (mode) <= size);
23392 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
23393 {
23394 if (issetmem)
23395 emit_move_insn (destmem, gen_lowpart (mode, value));
23396 else
23397 {
23398 emit_move_insn (destmem, srcmem);
23399 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23400 }
23401 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23402 }
23403
23404 destmem = offset_address (destmem, count, 1);
23405 destmem = offset_address (destmem, GEN_INT (-2 * size),
23406 GET_MODE_SIZE (mode));
23407 if (!issetmem)
23408 {
23409 srcmem = offset_address (srcmem, count, 1);
23410 srcmem = offset_address (srcmem, GEN_INT (-2 * size),
23411 GET_MODE_SIZE (mode));
23412 }
23413 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
23414 {
23415 if (issetmem)
23416 emit_move_insn (destmem, gen_lowpart (mode, value));
23417 else
23418 {
23419 emit_move_insn (destmem, srcmem);
23420 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23421 }
23422 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23423 }
23424 emit_jump_insn (gen_jump (done_label));
23425 emit_barrier ();
23426
23427 emit_label (label);
23428 LABEL_NUSES (label) = 1;
23429 }
23430
23431 /* Handle small memcpy (up to SIZE that is supposed to be small power of 2.
23432 and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN
23433 bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can
23434 proceed with an loop copying SIZE bytes at once. Do moves in MODE.
23435 DONE_LABEL is a label after the whole copying sequence. The label is created
23436 on demand if *DONE_LABEL is NULL.
23437 MIN_SIZE is minimal size of block copied. This value gets adjusted for new
23438 bounds after the initial copies.
23439
23440 DESTMEM/SRCMEM are memory expressions pointing to the copies block,
23441 DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether
23442 we will dispatch to a library call for large blocks.
23443
23444 In pseudocode we do:
23445
23446 if (COUNT < SIZE)
23447 {
23448 Assume that SIZE is 4. Bigger sizes are handled analogously
23449 if (COUNT & 4)
23450 {
23451 copy 4 bytes from SRCPTR to DESTPTR
23452 copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4
23453 goto done_label
23454 }
23455 if (!COUNT)
23456 goto done_label;
23457 copy 1 byte from SRCPTR to DESTPTR
23458 if (COUNT & 2)
23459 {
23460 copy 2 bytes from SRCPTR to DESTPTR
23461 copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2
23462 }
23463 }
23464 else
23465 {
23466 copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR
23467 copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE
23468
23469 OLD_DESPTR = DESTPTR;
23470 Align DESTPTR up to DESIRED_ALIGN
23471 SRCPTR += DESTPTR - OLD_DESTPTR
23472 COUNT -= DEST_PTR - OLD_DESTPTR
23473 if (DYNAMIC_CHECK)
23474 Round COUNT down to multiple of SIZE
23475 << optional caller supplied zero size guard is here >>
23476 << optional caller suppplied dynamic check is here >>
23477 << caller supplied main copy loop is here >>
23478 }
23479 done_label:
23480 */
23481 static void
23482 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves (rtx destmem, rtx srcmem,
23483 rtx *destptr, rtx *srcptr,
23484 enum machine_mode mode,
23485 rtx value, rtx vec_value,
23486 rtx *count,
23487 rtx *done_label,
23488 int size,
23489 int desired_align,
23490 int align,
23491 unsigned HOST_WIDE_INT *min_size,
23492 bool dynamic_check,
23493 bool issetmem)
23494 {
23495 rtx loop_label = NULL, label;
23496 int n;
23497 rtx modesize;
23498 int prolog_size = 0;
23499 rtx mode_value;
23500
23501 /* Chose proper value to copy. */
23502 if (issetmem && VECTOR_MODE_P (mode))
23503 mode_value = vec_value;
23504 else
23505 mode_value = value;
23506 gcc_assert (GET_MODE_SIZE (mode) <= size);
23507
23508 /* See if block is big or small, handle small blocks. */
23509 if (!CONST_INT_P (*count) && *min_size < (unsigned HOST_WIDE_INT)size)
23510 {
23511 int size2 = size;
23512 loop_label = gen_label_rtx ();
23513
23514 if (!*done_label)
23515 *done_label = gen_label_rtx ();
23516
23517 emit_cmp_and_jump_insns (*count, GEN_INT (size2), GE, 0, GET_MODE (*count),
23518 1, loop_label);
23519 size2 >>= 1;
23520
23521 /* Handle sizes > 3. */
23522 for (;size2 > 2; size2 >>= 1)
23523 expand_small_movmem_or_setmem (destmem, srcmem,
23524 *destptr, *srcptr,
23525 value, vec_value,
23526 *count,
23527 size2, *done_label, issetmem);
23528 /* Nothing to copy? Jump to DONE_LABEL if so */
23529 emit_cmp_and_jump_insns (*count, const0_rtx, EQ, 0, GET_MODE (*count),
23530 1, *done_label);
23531
23532 /* Do a byte copy. */
23533 destmem = change_address (destmem, QImode, *destptr);
23534 if (issetmem)
23535 emit_move_insn (destmem, gen_lowpart (QImode, value));
23536 else
23537 {
23538 srcmem = change_address (srcmem, QImode, *srcptr);
23539 emit_move_insn (destmem, srcmem);
23540 }
23541
23542 /* Handle sizes 2 and 3. */
23543 label = ix86_expand_aligntest (*count, 2, false);
23544 destmem = change_address (destmem, HImode, *destptr);
23545 destmem = offset_address (destmem, *count, 1);
23546 destmem = offset_address (destmem, GEN_INT (-2), 2);
23547 if (issetmem)
23548 emit_move_insn (destmem, gen_lowpart (HImode, value));
23549 else
23550 {
23551 srcmem = change_address (srcmem, HImode, *srcptr);
23552 srcmem = offset_address (srcmem, *count, 1);
23553 srcmem = offset_address (srcmem, GEN_INT (-2), 2);
23554 emit_move_insn (destmem, srcmem);
23555 }
23556
23557 emit_label (label);
23558 LABEL_NUSES (label) = 1;
23559 emit_jump_insn (gen_jump (*done_label));
23560 emit_barrier ();
23561 }
23562 else
23563 gcc_assert (*min_size >= (unsigned HOST_WIDE_INT)size
23564 || UINTVAL (*count) >= (unsigned HOST_WIDE_INT)size);
23565
23566 /* Start memcpy for COUNT >= SIZE. */
23567 if (loop_label)
23568 {
23569 emit_label (loop_label);
23570 LABEL_NUSES (loop_label) = 1;
23571 }
23572
23573 /* Copy first desired_align bytes. */
23574 if (!issetmem)
23575 srcmem = change_address (srcmem, mode, *srcptr);
23576 destmem = change_address (destmem, mode, *destptr);
23577 modesize = GEN_INT (GET_MODE_SIZE (mode));
23578 for (n = 0; prolog_size < desired_align - align; n++)
23579 {
23580 if (issetmem)
23581 emit_move_insn (destmem, mode_value);
23582 else
23583 {
23584 emit_move_insn (destmem, srcmem);
23585 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23586 }
23587 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23588 prolog_size += GET_MODE_SIZE (mode);
23589 }
23590
23591
23592 /* Copy last SIZE bytes. */
23593 destmem = offset_address (destmem, *count, 1);
23594 destmem = offset_address (destmem,
23595 GEN_INT (-size - prolog_size),
23596 1);
23597 if (issetmem)
23598 emit_move_insn (destmem, mode_value);
23599 else
23600 {
23601 srcmem = offset_address (srcmem, *count, 1);
23602 srcmem = offset_address (srcmem,
23603 GEN_INT (-size - prolog_size),
23604 1);
23605 emit_move_insn (destmem, srcmem);
23606 }
23607 for (n = 1; n * GET_MODE_SIZE (mode) < size; n++)
23608 {
23609 destmem = offset_address (destmem, modesize, 1);
23610 if (issetmem)
23611 emit_move_insn (destmem, mode_value);
23612 else
23613 {
23614 srcmem = offset_address (srcmem, modesize, 1);
23615 emit_move_insn (destmem, srcmem);
23616 }
23617 }
23618
23619 /* Align destination. */
23620 if (desired_align > 1 && desired_align > align)
23621 {
23622 rtx saveddest = *destptr;
23623
23624 gcc_assert (desired_align <= size);
23625 /* Align destptr up, place it to new register. */
23626 *destptr = expand_simple_binop (GET_MODE (*destptr), PLUS, *destptr,
23627 GEN_INT (prolog_size),
23628 NULL_RTX, 1, OPTAB_DIRECT);
23629 *destptr = expand_simple_binop (GET_MODE (*destptr), AND, *destptr,
23630 GEN_INT (-desired_align),
23631 *destptr, 1, OPTAB_DIRECT);
23632 /* See how many bytes we skipped. */
23633 saveddest = expand_simple_binop (GET_MODE (*destptr), MINUS, saveddest,
23634 *destptr,
23635 saveddest, 1, OPTAB_DIRECT);
23636 /* Adjust srcptr and count. */
23637 if (!issetmem)
23638 *srcptr = expand_simple_binop (GET_MODE (*srcptr), MINUS, *srcptr, saveddest,
23639 *srcptr, 1, OPTAB_DIRECT);
23640 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
23641 saveddest, *count, 1, OPTAB_DIRECT);
23642 /* We copied at most size + prolog_size. */
23643 if (*min_size > (unsigned HOST_WIDE_INT)(size + prolog_size))
23644 *min_size = (*min_size - size) & ~(unsigned HOST_WIDE_INT)(size - 1);
23645 else
23646 *min_size = 0;
23647
23648 /* Our loops always round down the bock size, but for dispatch to library
23649 we need precise value. */
23650 if (dynamic_check)
23651 *count = expand_simple_binop (GET_MODE (*count), AND, *count,
23652 GEN_INT (-size), *count, 1, OPTAB_DIRECT);
23653 }
23654 else
23655 {
23656 gcc_assert (prolog_size == 0);
23657 /* Decrease count, so we won't end up copying last word twice. */
23658 if (!CONST_INT_P (*count))
23659 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
23660 constm1_rtx, *count, 1, OPTAB_DIRECT);
23661 else
23662 *count = GEN_INT ((UINTVAL (*count) - 1) & ~(unsigned HOST_WIDE_INT)(size - 1));
23663 if (*min_size)
23664 *min_size = (*min_size - 1) & ~(unsigned HOST_WIDE_INT)(size - 1);
23665 }
23666 }
23667
23668
23669 /* This function is like the previous one, except here we know how many bytes
23670 need to be copied. That allows us to update alignment not only of DST, which
23671 is returned, but also of SRC, which is passed as a pointer for that
23672 reason. */
23673 static rtx
23674 expand_set_or_movmem_constant_prologue (rtx dst, rtx *srcp, rtx destreg,
23675 rtx srcreg, rtx value, rtx vec_value,
23676 int desired_align, int align_bytes,
23677 bool issetmem)
23678 {
23679 rtx src = NULL;
23680 rtx orig_dst = dst;
23681 rtx orig_src = NULL;
23682 int piece_size = 1;
23683 int copied_bytes = 0;
23684
23685 if (!issetmem)
23686 {
23687 gcc_assert (srcp != NULL);
23688 src = *srcp;
23689 orig_src = src;
23690 }
23691
23692 for (piece_size = 1;
23693 piece_size <= desired_align && copied_bytes < align_bytes;
23694 piece_size <<= 1)
23695 {
23696 if (align_bytes & piece_size)
23697 {
23698 if (issetmem)
23699 {
23700 if (vec_value && piece_size > GET_MODE_SIZE (GET_MODE (value)))
23701 dst = emit_memset (dst, destreg, vec_value, piece_size);
23702 else
23703 dst = emit_memset (dst, destreg, value, piece_size);
23704 }
23705 else
23706 dst = emit_memmov (dst, &src, destreg, srcreg, piece_size);
23707 copied_bytes += piece_size;
23708 }
23709 }
23710 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
23711 set_mem_align (dst, desired_align * BITS_PER_UNIT);
23712 if (MEM_SIZE_KNOWN_P (orig_dst))
23713 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
23714
23715 if (!issetmem)
23716 {
23717 int src_align_bytes = get_mem_align_offset (src, desired_align
23718 * BITS_PER_UNIT);
23719 if (src_align_bytes >= 0)
23720 src_align_bytes = desired_align - src_align_bytes;
23721 if (src_align_bytes >= 0)
23722 {
23723 unsigned int src_align;
23724 for (src_align = desired_align; src_align >= 2; src_align >>= 1)
23725 {
23726 if ((src_align_bytes & (src_align - 1))
23727 == (align_bytes & (src_align - 1)))
23728 break;
23729 }
23730 if (src_align > (unsigned int) desired_align)
23731 src_align = desired_align;
23732 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
23733 set_mem_align (src, src_align * BITS_PER_UNIT);
23734 }
23735 if (MEM_SIZE_KNOWN_P (orig_src))
23736 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
23737 *srcp = src;
23738 }
23739
23740 return dst;
23741 }
23742
23743 /* Return true if ALG can be used in current context.
23744 Assume we expand memset if MEMSET is true. */
23745 static bool
23746 alg_usable_p (enum stringop_alg alg, bool memset)
23747 {
23748 if (alg == no_stringop)
23749 return false;
23750 if (alg == vector_loop)
23751 return TARGET_SSE || TARGET_AVX;
23752 /* Algorithms using the rep prefix want at least edi and ecx;
23753 additionally, memset wants eax and memcpy wants esi. Don't
23754 consider such algorithms if the user has appropriated those
23755 registers for their own purposes. */
23756 if (alg == rep_prefix_1_byte
23757 || alg == rep_prefix_4_byte
23758 || alg == rep_prefix_8_byte)
23759 return !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
23760 || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
23761 return true;
23762 }
23763
23764 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
23765 static enum stringop_alg
23766 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
23767 unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size,
23768 bool memset, bool zero_memset, int *dynamic_check, bool *noalign)
23769 {
23770 const struct stringop_algs * algs;
23771 bool optimize_for_speed;
23772 int max = -1;
23773 const struct processor_costs *cost;
23774 int i;
23775 bool any_alg_usable_p = false;
23776
23777 *noalign = false;
23778 *dynamic_check = -1;
23779
23780 /* Even if the string operation call is cold, we still might spend a lot
23781 of time processing large blocks. */
23782 if (optimize_function_for_size_p (cfun)
23783 || (optimize_insn_for_size_p ()
23784 && (max_size < 256
23785 || (expected_size != -1 && expected_size < 256))))
23786 optimize_for_speed = false;
23787 else
23788 optimize_for_speed = true;
23789
23790 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
23791 if (memset)
23792 algs = &cost->memset[TARGET_64BIT != 0];
23793 else
23794 algs = &cost->memcpy[TARGET_64BIT != 0];
23795
23796 /* See maximal size for user defined algorithm. */
23797 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
23798 {
23799 enum stringop_alg candidate = algs->size[i].alg;
23800 bool usable = alg_usable_p (candidate, memset);
23801 any_alg_usable_p |= usable;
23802
23803 if (candidate != libcall && candidate && usable)
23804 max = algs->size[i].max;
23805 }
23806
23807 /* If expected size is not known but max size is small enough
23808 so inline version is a win, set expected size into
23809 the range. */
23810 if (max > 1 && (unsigned HOST_WIDE_INT) max >= max_size
23811 && expected_size == -1)
23812 expected_size = min_size / 2 + max_size / 2;
23813
23814 /* If user specified the algorithm, honnor it if possible. */
23815 if (ix86_stringop_alg != no_stringop
23816 && alg_usable_p (ix86_stringop_alg, memset))
23817 return ix86_stringop_alg;
23818 /* rep; movq or rep; movl is the smallest variant. */
23819 else if (!optimize_for_speed)
23820 {
23821 *noalign = true;
23822 if (!count || (count & 3) || (memset && !zero_memset))
23823 return alg_usable_p (rep_prefix_1_byte, memset)
23824 ? rep_prefix_1_byte : loop_1_byte;
23825 else
23826 return alg_usable_p (rep_prefix_4_byte, memset)
23827 ? rep_prefix_4_byte : loop;
23828 }
23829 /* Very tiny blocks are best handled via the loop, REP is expensive to
23830 setup. */
23831 else if (expected_size != -1 && expected_size < 4)
23832 return loop_1_byte;
23833 else if (expected_size != -1)
23834 {
23835 enum stringop_alg alg = libcall;
23836 bool alg_noalign = false;
23837 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
23838 {
23839 /* We get here if the algorithms that were not libcall-based
23840 were rep-prefix based and we are unable to use rep prefixes
23841 based on global register usage. Break out of the loop and
23842 use the heuristic below. */
23843 if (algs->size[i].max == 0)
23844 break;
23845 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
23846 {
23847 enum stringop_alg candidate = algs->size[i].alg;
23848
23849 if (candidate != libcall && alg_usable_p (candidate, memset))
23850 {
23851 alg = candidate;
23852 alg_noalign = algs->size[i].noalign;
23853 }
23854 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
23855 last non-libcall inline algorithm. */
23856 if (TARGET_INLINE_ALL_STRINGOPS)
23857 {
23858 /* When the current size is best to be copied by a libcall,
23859 but we are still forced to inline, run the heuristic below
23860 that will pick code for medium sized blocks. */
23861 if (alg != libcall)
23862 {
23863 *noalign = alg_noalign;
23864 return alg;
23865 }
23866 break;
23867 }
23868 else if (alg_usable_p (candidate, memset))
23869 {
23870 *noalign = algs->size[i].noalign;
23871 return candidate;
23872 }
23873 }
23874 }
23875 }
23876 /* When asked to inline the call anyway, try to pick meaningful choice.
23877 We look for maximal size of block that is faster to copy by hand and
23878 take blocks of at most of that size guessing that average size will
23879 be roughly half of the block.
23880
23881 If this turns out to be bad, we might simply specify the preferred
23882 choice in ix86_costs. */
23883 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
23884 && (algs->unknown_size == libcall
23885 || !alg_usable_p (algs->unknown_size, memset)))
23886 {
23887 enum stringop_alg alg;
23888
23889 /* If there aren't any usable algorithms, then recursing on
23890 smaller sizes isn't going to find anything. Just return the
23891 simple byte-at-a-time copy loop. */
23892 if (!any_alg_usable_p)
23893 {
23894 /* Pick something reasonable. */
23895 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
23896 *dynamic_check = 128;
23897 return loop_1_byte;
23898 }
23899 if (max == -1)
23900 max = 4096;
23901 alg = decide_alg (count, max / 2, min_size, max_size, memset,
23902 zero_memset, dynamic_check, noalign);
23903 gcc_assert (*dynamic_check == -1);
23904 gcc_assert (alg != libcall);
23905 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
23906 *dynamic_check = max;
23907 return alg;
23908 }
23909 return (alg_usable_p (algs->unknown_size, memset)
23910 ? algs->unknown_size : libcall);
23911 }
23912
23913 /* Decide on alignment. We know that the operand is already aligned to ALIGN
23914 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
23915 static int
23916 decide_alignment (int align,
23917 enum stringop_alg alg,
23918 int expected_size,
23919 enum machine_mode move_mode)
23920 {
23921 int desired_align = 0;
23922
23923 gcc_assert (alg != no_stringop);
23924
23925 if (alg == libcall)
23926 return 0;
23927 if (move_mode == VOIDmode)
23928 return 0;
23929
23930 desired_align = GET_MODE_SIZE (move_mode);
23931 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
23932 copying whole cacheline at once. */
23933 if (TARGET_PENTIUMPRO
23934 && (alg == rep_prefix_4_byte || alg == rep_prefix_1_byte))
23935 desired_align = 8;
23936
23937 if (optimize_size)
23938 desired_align = 1;
23939 if (desired_align < align)
23940 desired_align = align;
23941 if (expected_size != -1 && expected_size < 4)
23942 desired_align = align;
23943
23944 return desired_align;
23945 }
23946
23947
23948 /* Helper function for memcpy. For QImode value 0xXY produce
23949 0xXYXYXYXY of wide specified by MODE. This is essentially
23950 a * 0x10101010, but we can do slightly better than
23951 synth_mult by unwinding the sequence by hand on CPUs with
23952 slow multiply. */
23953 static rtx
23954 promote_duplicated_reg (enum machine_mode mode, rtx val)
23955 {
23956 enum machine_mode valmode = GET_MODE (val);
23957 rtx tmp;
23958 int nops = mode == DImode ? 3 : 2;
23959
23960 gcc_assert (mode == SImode || mode == DImode || val == const0_rtx);
23961 if (val == const0_rtx)
23962 return copy_to_mode_reg (mode, CONST0_RTX (mode));
23963 if (CONST_INT_P (val))
23964 {
23965 HOST_WIDE_INT v = INTVAL (val) & 255;
23966
23967 v |= v << 8;
23968 v |= v << 16;
23969 if (mode == DImode)
23970 v |= (v << 16) << 16;
23971 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
23972 }
23973
23974 if (valmode == VOIDmode)
23975 valmode = QImode;
23976 if (valmode != QImode)
23977 val = gen_lowpart (QImode, val);
23978 if (mode == QImode)
23979 return val;
23980 if (!TARGET_PARTIAL_REG_STALL)
23981 nops--;
23982 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
23983 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
23984 <= (ix86_cost->shift_const + ix86_cost->add) * nops
23985 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
23986 {
23987 rtx reg = convert_modes (mode, QImode, val, true);
23988 tmp = promote_duplicated_reg (mode, const1_rtx);
23989 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
23990 OPTAB_DIRECT);
23991 }
23992 else
23993 {
23994 rtx reg = convert_modes (mode, QImode, val, true);
23995
23996 if (!TARGET_PARTIAL_REG_STALL)
23997 if (mode == SImode)
23998 emit_insn (gen_movsi_insv_1 (reg, reg));
23999 else
24000 emit_insn (gen_movdi_insv_1 (reg, reg));
24001 else
24002 {
24003 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
24004 NULL, 1, OPTAB_DIRECT);
24005 reg =
24006 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24007 }
24008 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
24009 NULL, 1, OPTAB_DIRECT);
24010 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24011 if (mode == SImode)
24012 return reg;
24013 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
24014 NULL, 1, OPTAB_DIRECT);
24015 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24016 return reg;
24017 }
24018 }
24019
24020 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
24021 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
24022 alignment from ALIGN to DESIRED_ALIGN. */
24023 static rtx
24024 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align,
24025 int align)
24026 {
24027 rtx promoted_val;
24028
24029 if (TARGET_64BIT
24030 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
24031 promoted_val = promote_duplicated_reg (DImode, val);
24032 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
24033 promoted_val = promote_duplicated_reg (SImode, val);
24034 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
24035 promoted_val = promote_duplicated_reg (HImode, val);
24036 else
24037 promoted_val = val;
24038
24039 return promoted_val;
24040 }
24041
24042 /* Expand string move (memcpy) ot store (memset) operation. Use i386 string
24043 operations when profitable. The code depends upon architecture, block size
24044 and alignment, but always has one of the following overall structures:
24045
24046 Aligned move sequence:
24047
24048 1) Prologue guard: Conditional that jumps up to epilogues for small
24049 blocks that can be handled by epilogue alone. This is faster
24050 but also needed for correctness, since prologue assume the block
24051 is larger than the desired alignment.
24052
24053 Optional dynamic check for size and libcall for large
24054 blocks is emitted here too, with -minline-stringops-dynamically.
24055
24056 2) Prologue: copy first few bytes in order to get destination
24057 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
24058 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
24059 copied. We emit either a jump tree on power of two sized
24060 blocks, or a byte loop.
24061
24062 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24063 with specified algorithm.
24064
24065 4) Epilogue: code copying tail of the block that is too small to be
24066 handled by main body (or up to size guarded by prologue guard).
24067
24068 Misaligned move sequence
24069
24070 1) missaligned move prologue/epilogue containing:
24071 a) Prologue handling small memory blocks and jumping to done_label
24072 (skipped if blocks are known to be large enough)
24073 b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is
24074 needed by single possibly misaligned move
24075 (skipped if alignment is not needed)
24076 c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves
24077
24078 2) Zero size guard dispatching to done_label, if needed
24079
24080 3) dispatch to library call, if needed,
24081
24082 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24083 with specified algorithm. */
24084 bool
24085 ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
24086 rtx align_exp, rtx expected_align_exp,
24087 rtx expected_size_exp, rtx min_size_exp,
24088 rtx max_size_exp, rtx probable_max_size_exp,
24089 bool issetmem)
24090 {
24091 rtx destreg;
24092 rtx srcreg = NULL;
24093 rtx label = NULL;
24094 rtx tmp;
24095 rtx jump_around_label = NULL;
24096 HOST_WIDE_INT align = 1;
24097 unsigned HOST_WIDE_INT count = 0;
24098 HOST_WIDE_INT expected_size = -1;
24099 int size_needed = 0, epilogue_size_needed;
24100 int desired_align = 0, align_bytes = 0;
24101 enum stringop_alg alg;
24102 rtx promoted_val = NULL;
24103 rtx vec_promoted_val = NULL;
24104 bool force_loopy_epilogue = false;
24105 int dynamic_check;
24106 bool need_zero_guard = false;
24107 bool noalign;
24108 enum machine_mode move_mode = VOIDmode;
24109 int unroll_factor = 1;
24110 /* TODO: Once value ranges are available, fill in proper data. */
24111 unsigned HOST_WIDE_INT min_size = 0;
24112 unsigned HOST_WIDE_INT max_size = -1;
24113 unsigned HOST_WIDE_INT probable_max_size = -1;
24114 bool misaligned_prologue_used = false;
24115
24116 if (CONST_INT_P (align_exp))
24117 align = INTVAL (align_exp);
24118 /* i386 can do misaligned access on reasonably increased cost. */
24119 if (CONST_INT_P (expected_align_exp)
24120 && INTVAL (expected_align_exp) > align)
24121 align = INTVAL (expected_align_exp);
24122 /* ALIGN is the minimum of destination and source alignment, but we care here
24123 just about destination alignment. */
24124 else if (!issetmem
24125 && MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
24126 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
24127
24128 if (CONST_INT_P (count_exp))
24129 min_size = max_size = probable_max_size = count = expected_size
24130 = INTVAL (count_exp);
24131 else
24132 {
24133 if (min_size_exp)
24134 min_size = INTVAL (min_size_exp);
24135 if (max_size_exp)
24136 max_size = INTVAL (max_size_exp);
24137 if (probable_max_size_exp)
24138 probable_max_size = INTVAL (probable_max_size_exp);
24139 if (CONST_INT_P (expected_size_exp) && count == 0)
24140 expected_size = INTVAL (expected_size_exp);
24141 }
24142
24143 /* Make sure we don't need to care about overflow later on. */
24144 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
24145 return false;
24146
24147 /* Step 0: Decide on preferred algorithm, desired alignment and
24148 size of chunks to be copied by main loop. */
24149 alg = decide_alg (count, expected_size, min_size, probable_max_size,
24150 issetmem,
24151 issetmem && val_exp == const0_rtx,
24152 &dynamic_check, &noalign);
24153 if (alg == libcall)
24154 return false;
24155 gcc_assert (alg != no_stringop);
24156
24157 /* For now vector-version of memset is generated only for memory zeroing, as
24158 creating of promoted vector value is very cheap in this case. */
24159 if (issetmem && alg == vector_loop && val_exp != const0_rtx)
24160 alg = unrolled_loop;
24161
24162 if (!count)
24163 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
24164 destreg = ix86_copy_addr_to_reg (XEXP (dst, 0));
24165 if (!issetmem)
24166 srcreg = ix86_copy_addr_to_reg (XEXP (src, 0));
24167
24168 unroll_factor = 1;
24169 move_mode = word_mode;
24170 switch (alg)
24171 {
24172 case libcall:
24173 case no_stringop:
24174 case last_alg:
24175 gcc_unreachable ();
24176 case loop_1_byte:
24177 need_zero_guard = true;
24178 move_mode = QImode;
24179 break;
24180 case loop:
24181 need_zero_guard = true;
24182 break;
24183 case unrolled_loop:
24184 need_zero_guard = true;
24185 unroll_factor = (TARGET_64BIT ? 4 : 2);
24186 break;
24187 case vector_loop:
24188 need_zero_guard = true;
24189 unroll_factor = 4;
24190 /* Find the widest supported mode. */
24191 move_mode = word_mode;
24192 while (optab_handler (mov_optab, GET_MODE_WIDER_MODE (move_mode))
24193 != CODE_FOR_nothing)
24194 move_mode = GET_MODE_WIDER_MODE (move_mode);
24195
24196 /* Find the corresponding vector mode with the same size as MOVE_MODE.
24197 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
24198 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
24199 {
24200 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
24201 move_mode = mode_for_vector (word_mode, nunits);
24202 if (optab_handler (mov_optab, move_mode) == CODE_FOR_nothing)
24203 move_mode = word_mode;
24204 }
24205 gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing);
24206 break;
24207 case rep_prefix_8_byte:
24208 move_mode = DImode;
24209 break;
24210 case rep_prefix_4_byte:
24211 move_mode = SImode;
24212 break;
24213 case rep_prefix_1_byte:
24214 move_mode = QImode;
24215 break;
24216 }
24217 size_needed = GET_MODE_SIZE (move_mode) * unroll_factor;
24218 epilogue_size_needed = size_needed;
24219
24220 desired_align = decide_alignment (align, alg, expected_size, move_mode);
24221 if (!TARGET_ALIGN_STRINGOPS || noalign)
24222 align = desired_align;
24223
24224 /* Step 1: Prologue guard. */
24225
24226 /* Alignment code needs count to be in register. */
24227 if (CONST_INT_P (count_exp) && desired_align > align)
24228 {
24229 if (INTVAL (count_exp) > desired_align
24230 && INTVAL (count_exp) > size_needed)
24231 {
24232 align_bytes
24233 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
24234 if (align_bytes <= 0)
24235 align_bytes = 0;
24236 else
24237 align_bytes = desired_align - align_bytes;
24238 }
24239 if (align_bytes == 0)
24240 count_exp = force_reg (counter_mode (count_exp), count_exp);
24241 }
24242 gcc_assert (desired_align >= 1 && align >= 1);
24243
24244 /* Misaligned move sequences handle both prologue and epilogue at once.
24245 Default code generation results in a smaller code for large alignments
24246 and also avoids redundant job when sizes are known precisely. */
24247 misaligned_prologue_used
24248 = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES
24249 && MAX (desired_align, epilogue_size_needed) <= 32
24250 && desired_align <= epilogue_size_needed
24251 && ((desired_align > align && !align_bytes)
24252 || (!count && epilogue_size_needed > 1)));
24253
24254 /* Do the cheap promotion to allow better CSE across the
24255 main loop and epilogue (ie one load of the big constant in the
24256 front of all code.
24257 For now the misaligned move sequences do not have fast path
24258 without broadcasting. */
24259 if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used)))
24260 {
24261 if (alg == vector_loop)
24262 {
24263 gcc_assert (val_exp == const0_rtx);
24264 vec_promoted_val = promote_duplicated_reg (move_mode, val_exp);
24265 promoted_val = promote_duplicated_reg_to_size (val_exp,
24266 GET_MODE_SIZE (word_mode),
24267 desired_align, align);
24268 }
24269 else
24270 {
24271 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
24272 desired_align, align);
24273 }
24274 }
24275 /* Misaligned move sequences handles both prologues and epilogues at once.
24276 Default code generation results in smaller code for large alignments and
24277 also avoids redundant job when sizes are known precisely. */
24278 if (misaligned_prologue_used)
24279 {
24280 /* Misaligned move prologue handled small blocks by itself. */
24281 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves
24282 (dst, src, &destreg, &srcreg,
24283 move_mode, promoted_val, vec_promoted_val,
24284 &count_exp,
24285 &jump_around_label,
24286 desired_align < align
24287 ? MAX (desired_align, epilogue_size_needed) : epilogue_size_needed,
24288 desired_align, align, &min_size, dynamic_check, issetmem);
24289 if (!issetmem)
24290 src = change_address (src, BLKmode, srcreg);
24291 dst = change_address (dst, BLKmode, destreg);
24292 set_mem_align (dst, desired_align * BITS_PER_UNIT);
24293 epilogue_size_needed = 0;
24294 if (need_zero_guard && !min_size)
24295 {
24296 /* It is possible that we copied enough so the main loop will not
24297 execute. */
24298 gcc_assert (size_needed > 1);
24299 if (jump_around_label == NULL_RTX)
24300 jump_around_label = gen_label_rtx ();
24301 emit_cmp_and_jump_insns (count_exp,
24302 GEN_INT (size_needed),
24303 LTU, 0, counter_mode (count_exp), 1, jump_around_label);
24304 if (expected_size == -1
24305 || expected_size < (desired_align - align) / 2 + size_needed)
24306 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24307 else
24308 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24309 }
24310 }
24311 /* Ensure that alignment prologue won't copy past end of block. */
24312 else if (size_needed > 1 || (desired_align > 1 && desired_align > align))
24313 {
24314 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
24315 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
24316 Make sure it is power of 2. */
24317 epilogue_size_needed = 1 << (floor_log2 (epilogue_size_needed) + 1);
24318
24319 /* To improve performance of small blocks, we jump around the VAL
24320 promoting mode. This mean that if the promoted VAL is not constant,
24321 we might not use it in the epilogue and have to use byte
24322 loop variant. */
24323 if (issetmem && epilogue_size_needed > 2 && !promoted_val)
24324 force_loopy_epilogue = true;
24325 if ((count && count < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24326 || max_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24327 {
24328 /* If main algorithm works on QImode, no epilogue is needed.
24329 For small sizes just don't align anything. */
24330 if (size_needed == 1)
24331 desired_align = align;
24332 else
24333 goto epilogue;
24334 }
24335 else if (!count
24336 && min_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24337 {
24338 label = gen_label_rtx ();
24339 emit_cmp_and_jump_insns (count_exp,
24340 GEN_INT (epilogue_size_needed),
24341 LTU, 0, counter_mode (count_exp), 1, label);
24342 if (expected_size == -1 || expected_size < epilogue_size_needed)
24343 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24344 else
24345 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24346 }
24347 }
24348
24349 /* Emit code to decide on runtime whether library call or inline should be
24350 used. */
24351 if (dynamic_check != -1)
24352 {
24353 if (!issetmem && CONST_INT_P (count_exp))
24354 {
24355 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
24356 {
24357 emit_block_move_via_libcall (dst, src, count_exp, false);
24358 count_exp = const0_rtx;
24359 goto epilogue;
24360 }
24361 }
24362 else
24363 {
24364 rtx hot_label = gen_label_rtx ();
24365 if (jump_around_label == NULL_RTX)
24366 jump_around_label = gen_label_rtx ();
24367 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
24368 LEU, 0, GET_MODE (count_exp), 1, hot_label);
24369 predict_jump (REG_BR_PROB_BASE * 90 / 100);
24370 if (issetmem)
24371 set_storage_via_libcall (dst, count_exp, val_exp, false);
24372 else
24373 emit_block_move_via_libcall (dst, src, count_exp, false);
24374 emit_jump (jump_around_label);
24375 emit_label (hot_label);
24376 }
24377 }
24378
24379 /* Step 2: Alignment prologue. */
24380 /* Do the expensive promotion once we branched off the small blocks. */
24381 if (issetmem && !promoted_val)
24382 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
24383 desired_align, align);
24384
24385 if (desired_align > align && !misaligned_prologue_used)
24386 {
24387 if (align_bytes == 0)
24388 {
24389 /* Except for the first move in prologue, we no longer know
24390 constant offset in aliasing info. It don't seems to worth
24391 the pain to maintain it for the first move, so throw away
24392 the info early. */
24393 dst = change_address (dst, BLKmode, destreg);
24394 if (!issetmem)
24395 src = change_address (src, BLKmode, srcreg);
24396 dst = expand_set_or_movmem_prologue (dst, src, destreg, srcreg,
24397 promoted_val, vec_promoted_val,
24398 count_exp, align, desired_align,
24399 issetmem);
24400 /* At most desired_align - align bytes are copied. */
24401 if (min_size < (unsigned)(desired_align - align))
24402 min_size = 0;
24403 else
24404 min_size -= desired_align - align;
24405 }
24406 else
24407 {
24408 /* If we know how many bytes need to be stored before dst is
24409 sufficiently aligned, maintain aliasing info accurately. */
24410 dst = expand_set_or_movmem_constant_prologue (dst, &src, destreg,
24411 srcreg,
24412 promoted_val,
24413 vec_promoted_val,
24414 desired_align,
24415 align_bytes,
24416 issetmem);
24417
24418 count_exp = plus_constant (counter_mode (count_exp),
24419 count_exp, -align_bytes);
24420 count -= align_bytes;
24421 min_size -= align_bytes;
24422 max_size -= align_bytes;
24423 }
24424 if (need_zero_guard
24425 && !min_size
24426 && (count < (unsigned HOST_WIDE_INT) size_needed
24427 || (align_bytes == 0
24428 && count < ((unsigned HOST_WIDE_INT) size_needed
24429 + desired_align - align))))
24430 {
24431 /* It is possible that we copied enough so the main loop will not
24432 execute. */
24433 gcc_assert (size_needed > 1);
24434 if (label == NULL_RTX)
24435 label = gen_label_rtx ();
24436 emit_cmp_and_jump_insns (count_exp,
24437 GEN_INT (size_needed),
24438 LTU, 0, counter_mode (count_exp), 1, label);
24439 if (expected_size == -1
24440 || expected_size < (desired_align - align) / 2 + size_needed)
24441 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24442 else
24443 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24444 }
24445 }
24446 if (label && size_needed == 1)
24447 {
24448 emit_label (label);
24449 LABEL_NUSES (label) = 1;
24450 label = NULL;
24451 epilogue_size_needed = 1;
24452 if (issetmem)
24453 promoted_val = val_exp;
24454 }
24455 else if (label == NULL_RTX && !misaligned_prologue_used)
24456 epilogue_size_needed = size_needed;
24457
24458 /* Step 3: Main loop. */
24459
24460 switch (alg)
24461 {
24462 case libcall:
24463 case no_stringop:
24464 case last_alg:
24465 gcc_unreachable ();
24466 case loop_1_byte:
24467 case loop:
24468 case unrolled_loop:
24469 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, promoted_val,
24470 count_exp, move_mode, unroll_factor,
24471 expected_size, issetmem);
24472 break;
24473 case vector_loop:
24474 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg,
24475 vec_promoted_val, count_exp, move_mode,
24476 unroll_factor, expected_size, issetmem);
24477 break;
24478 case rep_prefix_8_byte:
24479 case rep_prefix_4_byte:
24480 case rep_prefix_1_byte:
24481 expand_set_or_movmem_via_rep (dst, src, destreg, srcreg, promoted_val,
24482 val_exp, count_exp, move_mode, issetmem);
24483 break;
24484 }
24485 /* Adjust properly the offset of src and dest memory for aliasing. */
24486 if (CONST_INT_P (count_exp))
24487 {
24488 if (!issetmem)
24489 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
24490 (count / size_needed) * size_needed);
24491 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
24492 (count / size_needed) * size_needed);
24493 }
24494 else
24495 {
24496 if (!issetmem)
24497 src = change_address (src, BLKmode, srcreg);
24498 dst = change_address (dst, BLKmode, destreg);
24499 }
24500
24501 /* Step 4: Epilogue to copy the remaining bytes. */
24502 epilogue:
24503 if (label)
24504 {
24505 /* When the main loop is done, COUNT_EXP might hold original count,
24506 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
24507 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
24508 bytes. Compensate if needed. */
24509
24510 if (size_needed < epilogue_size_needed)
24511 {
24512 tmp =
24513 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
24514 GEN_INT (size_needed - 1), count_exp, 1,
24515 OPTAB_DIRECT);
24516 if (tmp != count_exp)
24517 emit_move_insn (count_exp, tmp);
24518 }
24519 emit_label (label);
24520 LABEL_NUSES (label) = 1;
24521 }
24522
24523 if (count_exp != const0_rtx && epilogue_size_needed > 1)
24524 {
24525 if (force_loopy_epilogue)
24526 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
24527 epilogue_size_needed);
24528 else
24529 {
24530 if (issetmem)
24531 expand_setmem_epilogue (dst, destreg, promoted_val,
24532 vec_promoted_val, count_exp,
24533 epilogue_size_needed);
24534 else
24535 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
24536 epilogue_size_needed);
24537 }
24538 }
24539 if (jump_around_label)
24540 emit_label (jump_around_label);
24541 return true;
24542 }
24543
24544
24545 /* Expand the appropriate insns for doing strlen if not just doing
24546 repnz; scasb
24547
24548 out = result, initialized with the start address
24549 align_rtx = alignment of the address.
24550 scratch = scratch register, initialized with the startaddress when
24551 not aligned, otherwise undefined
24552
24553 This is just the body. It needs the initializations mentioned above and
24554 some address computing at the end. These things are done in i386.md. */
24555
24556 static void
24557 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
24558 {
24559 int align;
24560 rtx tmp;
24561 rtx align_2_label = NULL_RTX;
24562 rtx align_3_label = NULL_RTX;
24563 rtx align_4_label = gen_label_rtx ();
24564 rtx end_0_label = gen_label_rtx ();
24565 rtx mem;
24566 rtx tmpreg = gen_reg_rtx (SImode);
24567 rtx scratch = gen_reg_rtx (SImode);
24568 rtx cmp;
24569
24570 align = 0;
24571 if (CONST_INT_P (align_rtx))
24572 align = INTVAL (align_rtx);
24573
24574 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
24575
24576 /* Is there a known alignment and is it less than 4? */
24577 if (align < 4)
24578 {
24579 rtx scratch1 = gen_reg_rtx (Pmode);
24580 emit_move_insn (scratch1, out);
24581 /* Is there a known alignment and is it not 2? */
24582 if (align != 2)
24583 {
24584 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
24585 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
24586
24587 /* Leave just the 3 lower bits. */
24588 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
24589 NULL_RTX, 0, OPTAB_WIDEN);
24590
24591 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
24592 Pmode, 1, align_4_label);
24593 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
24594 Pmode, 1, align_2_label);
24595 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
24596 Pmode, 1, align_3_label);
24597 }
24598 else
24599 {
24600 /* Since the alignment is 2, we have to check 2 or 0 bytes;
24601 check if is aligned to 4 - byte. */
24602
24603 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
24604 NULL_RTX, 0, OPTAB_WIDEN);
24605
24606 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
24607 Pmode, 1, align_4_label);
24608 }
24609
24610 mem = change_address (src, QImode, out);
24611
24612 /* Now compare the bytes. */
24613
24614 /* Compare the first n unaligned byte on a byte per byte basis. */
24615 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
24616 QImode, 1, end_0_label);
24617
24618 /* Increment the address. */
24619 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
24620
24621 /* Not needed with an alignment of 2 */
24622 if (align != 2)
24623 {
24624 emit_label (align_2_label);
24625
24626 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
24627 end_0_label);
24628
24629 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
24630
24631 emit_label (align_3_label);
24632 }
24633
24634 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
24635 end_0_label);
24636
24637 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
24638 }
24639
24640 /* Generate loop to check 4 bytes at a time. It is not a good idea to
24641 align this loop. It gives only huge programs, but does not help to
24642 speed up. */
24643 emit_label (align_4_label);
24644
24645 mem = change_address (src, SImode, out);
24646 emit_move_insn (scratch, mem);
24647 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
24648
24649 /* This formula yields a nonzero result iff one of the bytes is zero.
24650 This saves three branches inside loop and many cycles. */
24651
24652 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
24653 emit_insn (gen_one_cmplsi2 (scratch, scratch));
24654 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
24655 emit_insn (gen_andsi3 (tmpreg, tmpreg,
24656 gen_int_mode (0x80808080, SImode)));
24657 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
24658 align_4_label);
24659
24660 if (TARGET_CMOVE)
24661 {
24662 rtx reg = gen_reg_rtx (SImode);
24663 rtx reg2 = gen_reg_rtx (Pmode);
24664 emit_move_insn (reg, tmpreg);
24665 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
24666
24667 /* If zero is not in the first two bytes, move two bytes forward. */
24668 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
24669 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24670 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
24671 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
24672 gen_rtx_IF_THEN_ELSE (SImode, tmp,
24673 reg,
24674 tmpreg)));
24675 /* Emit lea manually to avoid clobbering of flags. */
24676 emit_insn (gen_rtx_SET (SImode, reg2,
24677 gen_rtx_PLUS (Pmode, out, const2_rtx)));
24678
24679 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24680 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
24681 emit_insn (gen_rtx_SET (VOIDmode, out,
24682 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
24683 reg2,
24684 out)));
24685 }
24686 else
24687 {
24688 rtx end_2_label = gen_label_rtx ();
24689 /* Is zero in the first two bytes? */
24690
24691 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
24692 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24693 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
24694 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
24695 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
24696 pc_rtx);
24697 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
24698 JUMP_LABEL (tmp) = end_2_label;
24699
24700 /* Not in the first two. Move two bytes forward. */
24701 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
24702 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
24703
24704 emit_label (end_2_label);
24705
24706 }
24707
24708 /* Avoid branch in fixing the byte. */
24709 tmpreg = gen_lowpart (QImode, tmpreg);
24710 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
24711 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
24712 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
24713 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
24714
24715 emit_label (end_0_label);
24716 }
24717
24718 /* Expand strlen. */
24719
24720 bool
24721 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
24722 {
24723 rtx addr, scratch1, scratch2, scratch3, scratch4;
24724
24725 /* The generic case of strlen expander is long. Avoid it's
24726 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
24727
24728 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
24729 && !TARGET_INLINE_ALL_STRINGOPS
24730 && !optimize_insn_for_size_p ()
24731 && (!CONST_INT_P (align) || INTVAL (align) < 4))
24732 return false;
24733
24734 addr = force_reg (Pmode, XEXP (src, 0));
24735 scratch1 = gen_reg_rtx (Pmode);
24736
24737 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
24738 && !optimize_insn_for_size_p ())
24739 {
24740 /* Well it seems that some optimizer does not combine a call like
24741 foo(strlen(bar), strlen(bar));
24742 when the move and the subtraction is done here. It does calculate
24743 the length just once when these instructions are done inside of
24744 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
24745 often used and I use one fewer register for the lifetime of
24746 output_strlen_unroll() this is better. */
24747
24748 emit_move_insn (out, addr);
24749
24750 ix86_expand_strlensi_unroll_1 (out, src, align);
24751
24752 /* strlensi_unroll_1 returns the address of the zero at the end of
24753 the string, like memchr(), so compute the length by subtracting
24754 the start address. */
24755 emit_insn (ix86_gen_sub3 (out, out, addr));
24756 }
24757 else
24758 {
24759 rtx unspec;
24760
24761 /* Can't use this if the user has appropriated eax, ecx, or edi. */
24762 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
24763 return false;
24764
24765 scratch2 = gen_reg_rtx (Pmode);
24766 scratch3 = gen_reg_rtx (Pmode);
24767 scratch4 = force_reg (Pmode, constm1_rtx);
24768
24769 emit_move_insn (scratch3, addr);
24770 eoschar = force_reg (QImode, eoschar);
24771
24772 src = replace_equiv_address_nv (src, scratch3);
24773
24774 /* If .md starts supporting :P, this can be done in .md. */
24775 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
24776 scratch4), UNSPEC_SCAS);
24777 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
24778 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
24779 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
24780 }
24781 return true;
24782 }
24783
24784 /* For given symbol (function) construct code to compute address of it's PLT
24785 entry in large x86-64 PIC model. */
24786 static rtx
24787 construct_plt_address (rtx symbol)
24788 {
24789 rtx tmp, unspec;
24790
24791 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
24792 gcc_assert (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF);
24793 gcc_assert (Pmode == DImode);
24794
24795 tmp = gen_reg_rtx (Pmode);
24796 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
24797
24798 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
24799 emit_insn (ix86_gen_add3 (tmp, tmp, pic_offset_table_rtx));
24800 return tmp;
24801 }
24802
24803 rtx
24804 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
24805 rtx callarg2,
24806 rtx pop, bool sibcall)
24807 {
24808 unsigned int const cregs_size
24809 = ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers);
24810 rtx vec[3 + cregs_size];
24811 rtx use = NULL, call;
24812 unsigned int vec_len = 0;
24813
24814 if (pop == const0_rtx)
24815 pop = NULL;
24816 gcc_assert (!TARGET_64BIT || !pop);
24817
24818 if (TARGET_MACHO && !TARGET_64BIT)
24819 {
24820 #if TARGET_MACHO
24821 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
24822 fnaddr = machopic_indirect_call_target (fnaddr);
24823 #endif
24824 }
24825 else
24826 {
24827 /* Static functions and indirect calls don't need the pic register. */
24828 if (flag_pic
24829 && (!TARGET_64BIT
24830 || (ix86_cmodel == CM_LARGE_PIC
24831 && DEFAULT_ABI != MS_ABI))
24832 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
24833 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
24834 use_reg (&use, pic_offset_table_rtx);
24835 }
24836
24837 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
24838 {
24839 rtx al = gen_rtx_REG (QImode, AX_REG);
24840 emit_move_insn (al, callarg2);
24841 use_reg (&use, al);
24842 }
24843
24844 if (ix86_cmodel == CM_LARGE_PIC
24845 && !TARGET_PECOFF
24846 && MEM_P (fnaddr)
24847 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
24848 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
24849 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
24850 else if (sibcall
24851 ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
24852 : !call_insn_operand (XEXP (fnaddr, 0), word_mode))
24853 {
24854 fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
24855 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));
24856 }
24857
24858 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
24859 if (retval)
24860 call = gen_rtx_SET (VOIDmode, retval, call);
24861 vec[vec_len++] = call;
24862
24863 if (pop)
24864 {
24865 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
24866 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
24867 vec[vec_len++] = pop;
24868 }
24869
24870 if (TARGET_64BIT_MS_ABI
24871 && (!callarg2 || INTVAL (callarg2) != -2))
24872 {
24873 unsigned i;
24874
24875 vec[vec_len++] = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
24876 UNSPEC_MS_TO_SYSV_CALL);
24877
24878 for (i = 0; i < cregs_size; i++)
24879 {
24880 int regno = x86_64_ms_sysv_extra_clobbered_registers[i];
24881 enum machine_mode mode = SSE_REGNO_P (regno) ? TImode : DImode;
24882
24883 vec[vec_len++]
24884 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (mode, regno));
24885 }
24886 }
24887
24888 if (vec_len > 1)
24889 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
24890 call = emit_call_insn (call);
24891 if (use)
24892 CALL_INSN_FUNCTION_USAGE (call) = use;
24893
24894 return call;
24895 }
24896
24897 /* Output the assembly for a call instruction. */
24898
24899 const char *
24900 ix86_output_call_insn (rtx insn, rtx call_op)
24901 {
24902 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
24903 bool seh_nop_p = false;
24904 const char *xasm;
24905
24906 if (SIBLING_CALL_P (insn))
24907 {
24908 if (direct_p)
24909 xasm = "jmp\t%P0";
24910 /* SEH epilogue detection requires the indirect branch case
24911 to include REX.W. */
24912 else if (TARGET_SEH)
24913 xasm = "rex.W jmp %A0";
24914 else
24915 xasm = "jmp\t%A0";
24916
24917 output_asm_insn (xasm, &call_op);
24918 return "";
24919 }
24920
24921 /* SEH unwinding can require an extra nop to be emitted in several
24922 circumstances. Determine if we have one of those. */
24923 if (TARGET_SEH)
24924 {
24925 rtx i;
24926
24927 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
24928 {
24929 /* If we get to another real insn, we don't need the nop. */
24930 if (INSN_P (i))
24931 break;
24932
24933 /* If we get to the epilogue note, prevent a catch region from
24934 being adjacent to the standard epilogue sequence. If non-
24935 call-exceptions, we'll have done this during epilogue emission. */
24936 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
24937 && !flag_non_call_exceptions
24938 && !can_throw_internal (insn))
24939 {
24940 seh_nop_p = true;
24941 break;
24942 }
24943 }
24944
24945 /* If we didn't find a real insn following the call, prevent the
24946 unwinder from looking into the next function. */
24947 if (i == NULL)
24948 seh_nop_p = true;
24949 }
24950
24951 if (direct_p)
24952 xasm = "call\t%P0";
24953 else
24954 xasm = "call\t%A0";
24955
24956 output_asm_insn (xasm, &call_op);
24957
24958 if (seh_nop_p)
24959 return "nop";
24960
24961 return "";
24962 }
24963 \f
24964 /* Clear stack slot assignments remembered from previous functions.
24965 This is called from INIT_EXPANDERS once before RTL is emitted for each
24966 function. */
24967
24968 static struct machine_function *
24969 ix86_init_machine_status (void)
24970 {
24971 struct machine_function *f;
24972
24973 f = ggc_alloc_cleared_machine_function ();
24974 f->use_fast_prologue_epilogue_nregs = -1;
24975 f->call_abi = ix86_abi;
24976
24977 return f;
24978 }
24979
24980 /* Return a MEM corresponding to a stack slot with mode MODE.
24981 Allocate a new slot if necessary.
24982
24983 The RTL for a function can have several slots available: N is
24984 which slot to use. */
24985
24986 rtx
24987 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
24988 {
24989 struct stack_local_entry *s;
24990
24991 gcc_assert (n < MAX_386_STACK_LOCALS);
24992
24993 for (s = ix86_stack_locals; s; s = s->next)
24994 if (s->mode == mode && s->n == n)
24995 return validize_mem (copy_rtx (s->rtl));
24996
24997 s = ggc_alloc_stack_local_entry ();
24998 s->n = n;
24999 s->mode = mode;
25000 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
25001
25002 s->next = ix86_stack_locals;
25003 ix86_stack_locals = s;
25004 return validize_mem (s->rtl);
25005 }
25006
25007 static void
25008 ix86_instantiate_decls (void)
25009 {
25010 struct stack_local_entry *s;
25011
25012 for (s = ix86_stack_locals; s; s = s->next)
25013 if (s->rtl != NULL_RTX)
25014 instantiate_decl_rtl (s->rtl);
25015 }
25016 \f
25017 /* Check whether x86 address PARTS is a pc-relative address. */
25018
25019 static bool
25020 rip_relative_addr_p (struct ix86_address *parts)
25021 {
25022 rtx base, index, disp;
25023
25024 base = parts->base;
25025 index = parts->index;
25026 disp = parts->disp;
25027
25028 if (disp && !base && !index)
25029 {
25030 if (TARGET_64BIT)
25031 {
25032 rtx symbol = disp;
25033
25034 if (GET_CODE (disp) == CONST)
25035 symbol = XEXP (disp, 0);
25036 if (GET_CODE (symbol) == PLUS
25037 && CONST_INT_P (XEXP (symbol, 1)))
25038 symbol = XEXP (symbol, 0);
25039
25040 if (GET_CODE (symbol) == LABEL_REF
25041 || (GET_CODE (symbol) == SYMBOL_REF
25042 && SYMBOL_REF_TLS_MODEL (symbol) == 0)
25043 || (GET_CODE (symbol) == UNSPEC
25044 && (XINT (symbol, 1) == UNSPEC_GOTPCREL
25045 || XINT (symbol, 1) == UNSPEC_PCREL
25046 || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
25047 return true;
25048 }
25049 }
25050 return false;
25051 }
25052
25053 /* Calculate the length of the memory address in the instruction encoding.
25054 Includes addr32 prefix, does not include the one-byte modrm, opcode,
25055 or other prefixes. We never generate addr32 prefix for LEA insn. */
25056
25057 int
25058 memory_address_length (rtx addr, bool lea)
25059 {
25060 struct ix86_address parts;
25061 rtx base, index, disp;
25062 int len;
25063 int ok;
25064
25065 if (GET_CODE (addr) == PRE_DEC
25066 || GET_CODE (addr) == POST_INC
25067 || GET_CODE (addr) == PRE_MODIFY
25068 || GET_CODE (addr) == POST_MODIFY)
25069 return 0;
25070
25071 ok = ix86_decompose_address (addr, &parts);
25072 gcc_assert (ok);
25073
25074 len = (parts.seg == SEG_DEFAULT) ? 0 : 1;
25075
25076 /* If this is not LEA instruction, add the length of addr32 prefix. */
25077 if (TARGET_64BIT && !lea
25078 && (SImode_address_operand (addr, VOIDmode)
25079 || (parts.base && GET_MODE (parts.base) == SImode)
25080 || (parts.index && GET_MODE (parts.index) == SImode)))
25081 len++;
25082
25083 base = parts.base;
25084 index = parts.index;
25085 disp = parts.disp;
25086
25087 if (base && GET_CODE (base) == SUBREG)
25088 base = SUBREG_REG (base);
25089 if (index && GET_CODE (index) == SUBREG)
25090 index = SUBREG_REG (index);
25091
25092 gcc_assert (base == NULL_RTX || REG_P (base));
25093 gcc_assert (index == NULL_RTX || REG_P (index));
25094
25095 /* Rule of thumb:
25096 - esp as the base always wants an index,
25097 - ebp as the base always wants a displacement,
25098 - r12 as the base always wants an index,
25099 - r13 as the base always wants a displacement. */
25100
25101 /* Register Indirect. */
25102 if (base && !index && !disp)
25103 {
25104 /* esp (for its index) and ebp (for its displacement) need
25105 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
25106 code. */
25107 if (base == arg_pointer_rtx
25108 || base == frame_pointer_rtx
25109 || REGNO (base) == SP_REG
25110 || REGNO (base) == BP_REG
25111 || REGNO (base) == R12_REG
25112 || REGNO (base) == R13_REG)
25113 len++;
25114 }
25115
25116 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
25117 is not disp32, but disp32(%rip), so for disp32
25118 SIB byte is needed, unless print_operand_address
25119 optimizes it into disp32(%rip) or (%rip) is implied
25120 by UNSPEC. */
25121 else if (disp && !base && !index)
25122 {
25123 len += 4;
25124 if (rip_relative_addr_p (&parts))
25125 len++;
25126 }
25127 else
25128 {
25129 /* Find the length of the displacement constant. */
25130 if (disp)
25131 {
25132 if (base && satisfies_constraint_K (disp))
25133 len += 1;
25134 else
25135 len += 4;
25136 }
25137 /* ebp always wants a displacement. Similarly r13. */
25138 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
25139 len++;
25140
25141 /* An index requires the two-byte modrm form.... */
25142 if (index
25143 /* ...like esp (or r12), which always wants an index. */
25144 || base == arg_pointer_rtx
25145 || base == frame_pointer_rtx
25146 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
25147 len++;
25148 }
25149
25150 return len;
25151 }
25152
25153 /* Compute default value for "length_immediate" attribute. When SHORTFORM
25154 is set, expect that insn have 8bit immediate alternative. */
25155 int
25156 ix86_attr_length_immediate_default (rtx insn, bool shortform)
25157 {
25158 int len = 0;
25159 int i;
25160 extract_insn_cached (insn);
25161 for (i = recog_data.n_operands - 1; i >= 0; --i)
25162 if (CONSTANT_P (recog_data.operand[i]))
25163 {
25164 enum attr_mode mode = get_attr_mode (insn);
25165
25166 gcc_assert (!len);
25167 if (shortform && CONST_INT_P (recog_data.operand[i]))
25168 {
25169 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
25170 switch (mode)
25171 {
25172 case MODE_QI:
25173 len = 1;
25174 continue;
25175 case MODE_HI:
25176 ival = trunc_int_for_mode (ival, HImode);
25177 break;
25178 case MODE_SI:
25179 ival = trunc_int_for_mode (ival, SImode);
25180 break;
25181 default:
25182 break;
25183 }
25184 if (IN_RANGE (ival, -128, 127))
25185 {
25186 len = 1;
25187 continue;
25188 }
25189 }
25190 switch (mode)
25191 {
25192 case MODE_QI:
25193 len = 1;
25194 break;
25195 case MODE_HI:
25196 len = 2;
25197 break;
25198 case MODE_SI:
25199 len = 4;
25200 break;
25201 /* Immediates for DImode instructions are encoded
25202 as 32bit sign extended values. */
25203 case MODE_DI:
25204 len = 4;
25205 break;
25206 default:
25207 fatal_insn ("unknown insn mode", insn);
25208 }
25209 }
25210 return len;
25211 }
25212
25213 /* Compute default value for "length_address" attribute. */
25214 int
25215 ix86_attr_length_address_default (rtx insn)
25216 {
25217 int i;
25218
25219 if (get_attr_type (insn) == TYPE_LEA)
25220 {
25221 rtx set = PATTERN (insn), addr;
25222
25223 if (GET_CODE (set) == PARALLEL)
25224 set = XVECEXP (set, 0, 0);
25225
25226 gcc_assert (GET_CODE (set) == SET);
25227
25228 addr = SET_SRC (set);
25229
25230 return memory_address_length (addr, true);
25231 }
25232
25233 extract_insn_cached (insn);
25234 for (i = recog_data.n_operands - 1; i >= 0; --i)
25235 if (MEM_P (recog_data.operand[i]))
25236 {
25237 constrain_operands_cached (reload_completed);
25238 if (which_alternative != -1)
25239 {
25240 const char *constraints = recog_data.constraints[i];
25241 int alt = which_alternative;
25242
25243 while (*constraints == '=' || *constraints == '+')
25244 constraints++;
25245 while (alt-- > 0)
25246 while (*constraints++ != ',')
25247 ;
25248 /* Skip ignored operands. */
25249 if (*constraints == 'X')
25250 continue;
25251 }
25252 return memory_address_length (XEXP (recog_data.operand[i], 0), false);
25253 }
25254 return 0;
25255 }
25256
25257 /* Compute default value for "length_vex" attribute. It includes
25258 2 or 3 byte VEX prefix and 1 opcode byte. */
25259
25260 int
25261 ix86_attr_length_vex_default (rtx insn, bool has_0f_opcode, bool has_vex_w)
25262 {
25263 int i;
25264
25265 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
25266 byte VEX prefix. */
25267 if (!has_0f_opcode || has_vex_w)
25268 return 3 + 1;
25269
25270 /* We can always use 2 byte VEX prefix in 32bit. */
25271 if (!TARGET_64BIT)
25272 return 2 + 1;
25273
25274 extract_insn_cached (insn);
25275
25276 for (i = recog_data.n_operands - 1; i >= 0; --i)
25277 if (REG_P (recog_data.operand[i]))
25278 {
25279 /* REX.W bit uses 3 byte VEX prefix. */
25280 if (GET_MODE (recog_data.operand[i]) == DImode
25281 && GENERAL_REG_P (recog_data.operand[i]))
25282 return 3 + 1;
25283 }
25284 else
25285 {
25286 /* REX.X or REX.B bits use 3 byte VEX prefix. */
25287 if (MEM_P (recog_data.operand[i])
25288 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
25289 return 3 + 1;
25290 }
25291
25292 return 2 + 1;
25293 }
25294 \f
25295 /* Return the maximum number of instructions a cpu can issue. */
25296
25297 static int
25298 ix86_issue_rate (void)
25299 {
25300 switch (ix86_tune)
25301 {
25302 case PROCESSOR_PENTIUM:
25303 case PROCESSOR_BONNELL:
25304 case PROCESSOR_SILVERMONT:
25305 case PROCESSOR_INTEL:
25306 case PROCESSOR_K6:
25307 case PROCESSOR_BTVER2:
25308 case PROCESSOR_PENTIUM4:
25309 case PROCESSOR_NOCONA:
25310 return 2;
25311
25312 case PROCESSOR_PENTIUMPRO:
25313 case PROCESSOR_ATHLON:
25314 case PROCESSOR_K8:
25315 case PROCESSOR_AMDFAM10:
25316 case PROCESSOR_GENERIC:
25317 case PROCESSOR_BTVER1:
25318 return 3;
25319
25320 case PROCESSOR_BDVER1:
25321 case PROCESSOR_BDVER2:
25322 case PROCESSOR_BDVER3:
25323 case PROCESSOR_BDVER4:
25324 case PROCESSOR_CORE2:
25325 case PROCESSOR_NEHALEM:
25326 case PROCESSOR_SANDYBRIDGE:
25327 case PROCESSOR_HASWELL:
25328 return 4;
25329
25330 default:
25331 return 1;
25332 }
25333 }
25334
25335 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
25336 by DEP_INSN and nothing set by DEP_INSN. */
25337
25338 static bool
25339 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
25340 {
25341 rtx set, set2;
25342
25343 /* Simplify the test for uninteresting insns. */
25344 if (insn_type != TYPE_SETCC
25345 && insn_type != TYPE_ICMOV
25346 && insn_type != TYPE_FCMOV
25347 && insn_type != TYPE_IBR)
25348 return false;
25349
25350 if ((set = single_set (dep_insn)) != 0)
25351 {
25352 set = SET_DEST (set);
25353 set2 = NULL_RTX;
25354 }
25355 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
25356 && XVECLEN (PATTERN (dep_insn), 0) == 2
25357 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
25358 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
25359 {
25360 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
25361 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
25362 }
25363 else
25364 return false;
25365
25366 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
25367 return false;
25368
25369 /* This test is true if the dependent insn reads the flags but
25370 not any other potentially set register. */
25371 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
25372 return false;
25373
25374 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
25375 return false;
25376
25377 return true;
25378 }
25379
25380 /* Return true iff USE_INSN has a memory address with operands set by
25381 SET_INSN. */
25382
25383 bool
25384 ix86_agi_dependent (rtx set_insn, rtx use_insn)
25385 {
25386 int i;
25387 extract_insn_cached (use_insn);
25388 for (i = recog_data.n_operands - 1; i >= 0; --i)
25389 if (MEM_P (recog_data.operand[i]))
25390 {
25391 rtx addr = XEXP (recog_data.operand[i], 0);
25392 return modified_in_p (addr, set_insn) != 0;
25393 }
25394 return false;
25395 }
25396
25397 /* Helper function for exact_store_load_dependency.
25398 Return true if addr is found in insn. */
25399 static bool
25400 exact_dependency_1 (rtx addr, rtx insn)
25401 {
25402 enum rtx_code code;
25403 const char *format_ptr;
25404 int i, j;
25405
25406 code = GET_CODE (insn);
25407 switch (code)
25408 {
25409 case MEM:
25410 if (rtx_equal_p (addr, insn))
25411 return true;
25412 break;
25413 case REG:
25414 CASE_CONST_ANY:
25415 case SYMBOL_REF:
25416 case CODE_LABEL:
25417 case PC:
25418 case CC0:
25419 case EXPR_LIST:
25420 return false;
25421 default:
25422 break;
25423 }
25424
25425 format_ptr = GET_RTX_FORMAT (code);
25426 for (i = 0; i < GET_RTX_LENGTH (code); i++)
25427 {
25428 switch (*format_ptr++)
25429 {
25430 case 'e':
25431 if (exact_dependency_1 (addr, XEXP (insn, i)))
25432 return true;
25433 break;
25434 case 'E':
25435 for (j = 0; j < XVECLEN (insn, i); j++)
25436 if (exact_dependency_1 (addr, XVECEXP (insn, i, j)))
25437 return true;
25438 break;
25439 }
25440 }
25441 return false;
25442 }
25443
25444 /* Return true if there exists exact dependency for store & load, i.e.
25445 the same memory address is used in them. */
25446 static bool
25447 exact_store_load_dependency (rtx store, rtx load)
25448 {
25449 rtx set1, set2;
25450
25451 set1 = single_set (store);
25452 if (!set1)
25453 return false;
25454 if (!MEM_P (SET_DEST (set1)))
25455 return false;
25456 set2 = single_set (load);
25457 if (!set2)
25458 return false;
25459 if (exact_dependency_1 (SET_DEST (set1), SET_SRC (set2)))
25460 return true;
25461 return false;
25462 }
25463
25464 static int
25465 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
25466 {
25467 enum attr_type insn_type, dep_insn_type;
25468 enum attr_memory memory;
25469 rtx set, set2;
25470 int dep_insn_code_number;
25471
25472 /* Anti and output dependencies have zero cost on all CPUs. */
25473 if (REG_NOTE_KIND (link) != 0)
25474 return 0;
25475
25476 dep_insn_code_number = recog_memoized (dep_insn);
25477
25478 /* If we can't recognize the insns, we can't really do anything. */
25479 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
25480 return cost;
25481
25482 insn_type = get_attr_type (insn);
25483 dep_insn_type = get_attr_type (dep_insn);
25484
25485 switch (ix86_tune)
25486 {
25487 case PROCESSOR_PENTIUM:
25488 /* Address Generation Interlock adds a cycle of latency. */
25489 if (insn_type == TYPE_LEA)
25490 {
25491 rtx addr = PATTERN (insn);
25492
25493 if (GET_CODE (addr) == PARALLEL)
25494 addr = XVECEXP (addr, 0, 0);
25495
25496 gcc_assert (GET_CODE (addr) == SET);
25497
25498 addr = SET_SRC (addr);
25499 if (modified_in_p (addr, dep_insn))
25500 cost += 1;
25501 }
25502 else if (ix86_agi_dependent (dep_insn, insn))
25503 cost += 1;
25504
25505 /* ??? Compares pair with jump/setcc. */
25506 if (ix86_flags_dependent (insn, dep_insn, insn_type))
25507 cost = 0;
25508
25509 /* Floating point stores require value to be ready one cycle earlier. */
25510 if (insn_type == TYPE_FMOV
25511 && get_attr_memory (insn) == MEMORY_STORE
25512 && !ix86_agi_dependent (dep_insn, insn))
25513 cost += 1;
25514 break;
25515
25516 case PROCESSOR_PENTIUMPRO:
25517 /* INT->FP conversion is expensive. */
25518 if (get_attr_fp_int_src (dep_insn))
25519 cost += 5;
25520
25521 /* There is one cycle extra latency between an FP op and a store. */
25522 if (insn_type == TYPE_FMOV
25523 && (set = single_set (dep_insn)) != NULL_RTX
25524 && (set2 = single_set (insn)) != NULL_RTX
25525 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
25526 && MEM_P (SET_DEST (set2)))
25527 cost += 1;
25528
25529 memory = get_attr_memory (insn);
25530
25531 /* Show ability of reorder buffer to hide latency of load by executing
25532 in parallel with previous instruction in case
25533 previous instruction is not needed to compute the address. */
25534 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25535 && !ix86_agi_dependent (dep_insn, insn))
25536 {
25537 /* Claim moves to take one cycle, as core can issue one load
25538 at time and the next load can start cycle later. */
25539 if (dep_insn_type == TYPE_IMOV
25540 || dep_insn_type == TYPE_FMOV)
25541 cost = 1;
25542 else if (cost > 1)
25543 cost--;
25544 }
25545 break;
25546
25547 case PROCESSOR_K6:
25548 /* The esp dependency is resolved before
25549 the instruction is really finished. */
25550 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25551 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25552 return 1;
25553
25554 /* INT->FP conversion is expensive. */
25555 if (get_attr_fp_int_src (dep_insn))
25556 cost += 5;
25557
25558 memory = get_attr_memory (insn);
25559
25560 /* Show ability of reorder buffer to hide latency of load by executing
25561 in parallel with previous instruction in case
25562 previous instruction is not needed to compute the address. */
25563 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25564 && !ix86_agi_dependent (dep_insn, insn))
25565 {
25566 /* Claim moves to take one cycle, as core can issue one load
25567 at time and the next load can start cycle later. */
25568 if (dep_insn_type == TYPE_IMOV
25569 || dep_insn_type == TYPE_FMOV)
25570 cost = 1;
25571 else if (cost > 2)
25572 cost -= 2;
25573 else
25574 cost = 1;
25575 }
25576 break;
25577
25578 case PROCESSOR_AMDFAM10:
25579 case PROCESSOR_BDVER1:
25580 case PROCESSOR_BDVER2:
25581 case PROCESSOR_BDVER3:
25582 case PROCESSOR_BDVER4:
25583 case PROCESSOR_BTVER1:
25584 case PROCESSOR_BTVER2:
25585 case PROCESSOR_GENERIC:
25586 /* Stack engine allows to execute push&pop instructions in parall. */
25587 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25588 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25589 return 0;
25590 /* FALLTHRU */
25591
25592 case PROCESSOR_ATHLON:
25593 case PROCESSOR_K8:
25594 memory = get_attr_memory (insn);
25595
25596 /* Show ability of reorder buffer to hide latency of load by executing
25597 in parallel with previous instruction in case
25598 previous instruction is not needed to compute the address. */
25599 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25600 && !ix86_agi_dependent (dep_insn, insn))
25601 {
25602 enum attr_unit unit = get_attr_unit (insn);
25603 int loadcost = 3;
25604
25605 /* Because of the difference between the length of integer and
25606 floating unit pipeline preparation stages, the memory operands
25607 for floating point are cheaper.
25608
25609 ??? For Athlon it the difference is most probably 2. */
25610 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
25611 loadcost = 3;
25612 else
25613 loadcost = TARGET_ATHLON ? 2 : 0;
25614
25615 if (cost >= loadcost)
25616 cost -= loadcost;
25617 else
25618 cost = 0;
25619 }
25620 break;
25621
25622 case PROCESSOR_CORE2:
25623 case PROCESSOR_NEHALEM:
25624 case PROCESSOR_SANDYBRIDGE:
25625 case PROCESSOR_HASWELL:
25626 /* Stack engine allows to execute push&pop instructions in parall. */
25627 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25628 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25629 return 0;
25630
25631 memory = get_attr_memory (insn);
25632
25633 /* Show ability of reorder buffer to hide latency of load by executing
25634 in parallel with previous instruction in case
25635 previous instruction is not needed to compute the address. */
25636 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25637 && !ix86_agi_dependent (dep_insn, insn))
25638 {
25639 if (cost >= 4)
25640 cost -= 4;
25641 else
25642 cost = 0;
25643 }
25644 break;
25645
25646 case PROCESSOR_SILVERMONT:
25647 case PROCESSOR_INTEL:
25648 if (!reload_completed)
25649 return cost;
25650
25651 /* Increase cost of integer loads. */
25652 memory = get_attr_memory (dep_insn);
25653 if (memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25654 {
25655 enum attr_unit unit = get_attr_unit (dep_insn);
25656 if (unit == UNIT_INTEGER && cost == 1)
25657 {
25658 if (memory == MEMORY_LOAD)
25659 cost = 3;
25660 else
25661 {
25662 /* Increase cost of ld/st for short int types only
25663 because of store forwarding issue. */
25664 rtx set = single_set (dep_insn);
25665 if (set && (GET_MODE (SET_DEST (set)) == QImode
25666 || GET_MODE (SET_DEST (set)) == HImode))
25667 {
25668 /* Increase cost of store/load insn if exact
25669 dependence exists and it is load insn. */
25670 enum attr_memory insn_memory = get_attr_memory (insn);
25671 if (insn_memory == MEMORY_LOAD
25672 && exact_store_load_dependency (dep_insn, insn))
25673 cost = 3;
25674 }
25675 }
25676 }
25677 }
25678
25679 default:
25680 break;
25681 }
25682
25683 return cost;
25684 }
25685
25686 /* How many alternative schedules to try. This should be as wide as the
25687 scheduling freedom in the DFA, but no wider. Making this value too
25688 large results extra work for the scheduler. */
25689
25690 static int
25691 ia32_multipass_dfa_lookahead (void)
25692 {
25693 switch (ix86_tune)
25694 {
25695 case PROCESSOR_PENTIUM:
25696 return 2;
25697
25698 case PROCESSOR_PENTIUMPRO:
25699 case PROCESSOR_K6:
25700 return 1;
25701
25702 case PROCESSOR_BDVER1:
25703 case PROCESSOR_BDVER2:
25704 case PROCESSOR_BDVER3:
25705 case PROCESSOR_BDVER4:
25706 /* We use lookahead value 4 for BD both before and after reload
25707 schedules. Plan is to have value 8 included for O3. */
25708 return 4;
25709
25710 case PROCESSOR_CORE2:
25711 case PROCESSOR_NEHALEM:
25712 case PROCESSOR_SANDYBRIDGE:
25713 case PROCESSOR_HASWELL:
25714 case PROCESSOR_BONNELL:
25715 case PROCESSOR_SILVERMONT:
25716 case PROCESSOR_INTEL:
25717 /* Generally, we want haifa-sched:max_issue() to look ahead as far
25718 as many instructions can be executed on a cycle, i.e.,
25719 issue_rate. I wonder why tuning for many CPUs does not do this. */
25720 if (reload_completed)
25721 return ix86_issue_rate ();
25722 /* Don't use lookahead for pre-reload schedule to save compile time. */
25723 return 0;
25724
25725 default:
25726 return 0;
25727 }
25728 }
25729
25730 /* Return true if target platform supports macro-fusion. */
25731
25732 static bool
25733 ix86_macro_fusion_p ()
25734 {
25735 return TARGET_FUSE_CMP_AND_BRANCH;
25736 }
25737
25738 /* Check whether current microarchitecture support macro fusion
25739 for insn pair "CONDGEN + CONDJMP". Refer to
25740 "Intel Architectures Optimization Reference Manual". */
25741
25742 static bool
25743 ix86_macro_fusion_pair_p (rtx condgen, rtx condjmp)
25744 {
25745 rtx src, dest;
25746 rtx single_set = single_set (condgen);
25747 enum rtx_code ccode;
25748 rtx compare_set = NULL_RTX, test_if, cond;
25749 rtx alu_set = NULL_RTX, addr = NULL_RTX;
25750
25751 if (get_attr_type (condgen) != TYPE_TEST
25752 && get_attr_type (condgen) != TYPE_ICMP
25753 && get_attr_type (condgen) != TYPE_INCDEC
25754 && get_attr_type (condgen) != TYPE_ALU)
25755 return false;
25756
25757 if (single_set == NULL_RTX
25758 && !TARGET_FUSE_ALU_AND_BRANCH)
25759 return false;
25760
25761 if (single_set != NULL_RTX)
25762 compare_set = single_set;
25763 else
25764 {
25765 int i;
25766 rtx pat = PATTERN (condgen);
25767 for (i = 0; i < XVECLEN (pat, 0); i++)
25768 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
25769 {
25770 rtx set_src = SET_SRC (XVECEXP (pat, 0, i));
25771 if (GET_CODE (set_src) == COMPARE)
25772 compare_set = XVECEXP (pat, 0, i);
25773 else
25774 alu_set = XVECEXP (pat, 0, i);
25775 }
25776 }
25777 if (compare_set == NULL_RTX)
25778 return false;
25779 src = SET_SRC (compare_set);
25780 if (GET_CODE (src) != COMPARE)
25781 return false;
25782
25783 /* Macro-fusion for cmp/test MEM-IMM + conditional jmp is not
25784 supported. */
25785 if ((MEM_P (XEXP (src, 0))
25786 && CONST_INT_P (XEXP (src, 1)))
25787 || (MEM_P (XEXP (src, 1))
25788 && CONST_INT_P (XEXP (src, 0))))
25789 return false;
25790
25791 /* No fusion for RIP-relative address. */
25792 if (MEM_P (XEXP (src, 0)))
25793 addr = XEXP (XEXP (src, 0), 0);
25794 else if (MEM_P (XEXP (src, 1)))
25795 addr = XEXP (XEXP (src, 1), 0);
25796
25797 if (addr) {
25798 ix86_address parts;
25799 int ok = ix86_decompose_address (addr, &parts);
25800 gcc_assert (ok);
25801
25802 if (rip_relative_addr_p (&parts))
25803 return false;
25804 }
25805
25806 test_if = SET_SRC (pc_set (condjmp));
25807 cond = XEXP (test_if, 0);
25808 ccode = GET_CODE (cond);
25809 /* Check whether conditional jump use Sign or Overflow Flags. */
25810 if (!TARGET_FUSE_CMP_AND_BRANCH_SOFLAGS
25811 && (ccode == GE
25812 || ccode == GT
25813 || ccode == LE
25814 || ccode == LT))
25815 return false;
25816
25817 /* Return true for TYPE_TEST and TYPE_ICMP. */
25818 if (get_attr_type (condgen) == TYPE_TEST
25819 || get_attr_type (condgen) == TYPE_ICMP)
25820 return true;
25821
25822 /* The following is the case that macro-fusion for alu + jmp. */
25823 if (!TARGET_FUSE_ALU_AND_BRANCH || !alu_set)
25824 return false;
25825
25826 /* No fusion for alu op with memory destination operand. */
25827 dest = SET_DEST (alu_set);
25828 if (MEM_P (dest))
25829 return false;
25830
25831 /* Macro-fusion for inc/dec + unsigned conditional jump is not
25832 supported. */
25833 if (get_attr_type (condgen) == TYPE_INCDEC
25834 && (ccode == GEU
25835 || ccode == GTU
25836 || ccode == LEU
25837 || ccode == LTU))
25838 return false;
25839
25840 return true;
25841 }
25842
25843 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
25844 execution. It is applied if
25845 (1) IMUL instruction is on the top of list;
25846 (2) There exists the only producer of independent IMUL instruction in
25847 ready list.
25848 Return index of IMUL producer if it was found and -1 otherwise. */
25849 static int
25850 do_reorder_for_imul (rtx *ready, int n_ready)
25851 {
25852 rtx insn, set, insn1, insn2;
25853 sd_iterator_def sd_it;
25854 dep_t dep;
25855 int index = -1;
25856 int i;
25857
25858 if (!TARGET_BONNELL)
25859 return index;
25860
25861 /* Check that IMUL instruction is on the top of ready list. */
25862 insn = ready[n_ready - 1];
25863 set = single_set (insn);
25864 if (!set)
25865 return index;
25866 if (!(GET_CODE (SET_SRC (set)) == MULT
25867 && GET_MODE (SET_SRC (set)) == SImode))
25868 return index;
25869
25870 /* Search for producer of independent IMUL instruction. */
25871 for (i = n_ready - 2; i >= 0; i--)
25872 {
25873 insn = ready[i];
25874 if (!NONDEBUG_INSN_P (insn))
25875 continue;
25876 /* Skip IMUL instruction. */
25877 insn2 = PATTERN (insn);
25878 if (GET_CODE (insn2) == PARALLEL)
25879 insn2 = XVECEXP (insn2, 0, 0);
25880 if (GET_CODE (insn2) == SET
25881 && GET_CODE (SET_SRC (insn2)) == MULT
25882 && GET_MODE (SET_SRC (insn2)) == SImode)
25883 continue;
25884
25885 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
25886 {
25887 rtx con;
25888 con = DEP_CON (dep);
25889 if (!NONDEBUG_INSN_P (con))
25890 continue;
25891 insn1 = PATTERN (con);
25892 if (GET_CODE (insn1) == PARALLEL)
25893 insn1 = XVECEXP (insn1, 0, 0);
25894
25895 if (GET_CODE (insn1) == SET
25896 && GET_CODE (SET_SRC (insn1)) == MULT
25897 && GET_MODE (SET_SRC (insn1)) == SImode)
25898 {
25899 sd_iterator_def sd_it1;
25900 dep_t dep1;
25901 /* Check if there is no other dependee for IMUL. */
25902 index = i;
25903 FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1)
25904 {
25905 rtx pro;
25906 pro = DEP_PRO (dep1);
25907 if (!NONDEBUG_INSN_P (pro))
25908 continue;
25909 if (pro != insn)
25910 index = -1;
25911 }
25912 if (index >= 0)
25913 break;
25914 }
25915 }
25916 if (index >= 0)
25917 break;
25918 }
25919 return index;
25920 }
25921
25922 /* Try to find the best candidate on the top of ready list if two insns
25923 have the same priority - candidate is best if its dependees were
25924 scheduled earlier. Applied for Silvermont only.
25925 Return true if top 2 insns must be interchanged. */
25926 static bool
25927 swap_top_of_ready_list (rtx *ready, int n_ready)
25928 {
25929 rtx top = ready[n_ready - 1];
25930 rtx next = ready[n_ready - 2];
25931 rtx set;
25932 sd_iterator_def sd_it;
25933 dep_t dep;
25934 int clock1 = -1;
25935 int clock2 = -1;
25936 #define INSN_TICK(INSN) (HID (INSN)->tick)
25937
25938 if (!TARGET_SILVERMONT && !TARGET_INTEL)
25939 return false;
25940
25941 if (!NONDEBUG_INSN_P (top))
25942 return false;
25943 if (!NONJUMP_INSN_P (top))
25944 return false;
25945 if (!NONDEBUG_INSN_P (next))
25946 return false;
25947 if (!NONJUMP_INSN_P (next))
25948 return false;
25949 set = single_set (top);
25950 if (!set)
25951 return false;
25952 set = single_set (next);
25953 if (!set)
25954 return false;
25955
25956 if (INSN_PRIORITY_KNOWN (top) && INSN_PRIORITY_KNOWN (next))
25957 {
25958 if (INSN_PRIORITY (top) != INSN_PRIORITY (next))
25959 return false;
25960 /* Determine winner more precise. */
25961 FOR_EACH_DEP (top, SD_LIST_RES_BACK, sd_it, dep)
25962 {
25963 rtx pro;
25964 pro = DEP_PRO (dep);
25965 if (!NONDEBUG_INSN_P (pro))
25966 continue;
25967 if (INSN_TICK (pro) > clock1)
25968 clock1 = INSN_TICK (pro);
25969 }
25970 FOR_EACH_DEP (next, SD_LIST_RES_BACK, sd_it, dep)
25971 {
25972 rtx pro;
25973 pro = DEP_PRO (dep);
25974 if (!NONDEBUG_INSN_P (pro))
25975 continue;
25976 if (INSN_TICK (pro) > clock2)
25977 clock2 = INSN_TICK (pro);
25978 }
25979
25980 if (clock1 == clock2)
25981 {
25982 /* Determine winner - load must win. */
25983 enum attr_memory memory1, memory2;
25984 memory1 = get_attr_memory (top);
25985 memory2 = get_attr_memory (next);
25986 if (memory2 == MEMORY_LOAD && memory1 != MEMORY_LOAD)
25987 return true;
25988 }
25989 return (bool) (clock2 < clock1);
25990 }
25991 return false;
25992 #undef INSN_TICK
25993 }
25994
25995 /* Perform possible reodering of ready list for Atom/Silvermont only.
25996 Return issue rate. */
25997 static int
25998 ix86_sched_reorder (FILE *dump, int sched_verbose, rtx *ready, int *pn_ready,
25999 int clock_var)
26000 {
26001 int issue_rate = -1;
26002 int n_ready = *pn_ready;
26003 int i;
26004 rtx insn;
26005 int index = -1;
26006
26007 /* Set up issue rate. */
26008 issue_rate = ix86_issue_rate ();
26009
26010 /* Do reodering for BONNELL/SILVERMONT only. */
26011 if (!TARGET_BONNELL && !TARGET_SILVERMONT && !TARGET_INTEL)
26012 return issue_rate;
26013
26014 /* Nothing to do if ready list contains only 1 instruction. */
26015 if (n_ready <= 1)
26016 return issue_rate;
26017
26018 /* Do reodering for post-reload scheduler only. */
26019 if (!reload_completed)
26020 return issue_rate;
26021
26022 if ((index = do_reorder_for_imul (ready, n_ready)) >= 0)
26023 {
26024 if (sched_verbose > 1)
26025 fprintf (dump, ";;\tatom sched_reorder: put %d insn on top\n",
26026 INSN_UID (ready[index]));
26027
26028 /* Put IMUL producer (ready[index]) at the top of ready list. */
26029 insn = ready[index];
26030 for (i = index; i < n_ready - 1; i++)
26031 ready[i] = ready[i + 1];
26032 ready[n_ready - 1] = insn;
26033 return issue_rate;
26034 }
26035 if (clock_var != 0 && swap_top_of_ready_list (ready, n_ready))
26036 {
26037 if (sched_verbose > 1)
26038 fprintf (dump, ";;\tslm sched_reorder: swap %d and %d insns\n",
26039 INSN_UID (ready[n_ready - 1]), INSN_UID (ready[n_ready - 2]));
26040 /* Swap 2 top elements of ready list. */
26041 insn = ready[n_ready - 1];
26042 ready[n_ready - 1] = ready[n_ready - 2];
26043 ready[n_ready - 2] = insn;
26044 }
26045 return issue_rate;
26046 }
26047
26048 static bool
26049 ix86_class_likely_spilled_p (reg_class_t);
26050
26051 /* Returns true if lhs of insn is HW function argument register and set up
26052 is_spilled to true if it is likely spilled HW register. */
26053 static bool
26054 insn_is_function_arg (rtx insn, bool* is_spilled)
26055 {
26056 rtx dst;
26057
26058 if (!NONDEBUG_INSN_P (insn))
26059 return false;
26060 /* Call instructions are not movable, ignore it. */
26061 if (CALL_P (insn))
26062 return false;
26063 insn = PATTERN (insn);
26064 if (GET_CODE (insn) == PARALLEL)
26065 insn = XVECEXP (insn, 0, 0);
26066 if (GET_CODE (insn) != SET)
26067 return false;
26068 dst = SET_DEST (insn);
26069 if (REG_P (dst) && HARD_REGISTER_P (dst)
26070 && ix86_function_arg_regno_p (REGNO (dst)))
26071 {
26072 /* Is it likely spilled HW register? */
26073 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
26074 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
26075 *is_spilled = true;
26076 return true;
26077 }
26078 return false;
26079 }
26080
26081 /* Add output dependencies for chain of function adjacent arguments if only
26082 there is a move to likely spilled HW register. Return first argument
26083 if at least one dependence was added or NULL otherwise. */
26084 static rtx
26085 add_parameter_dependencies (rtx call, rtx head)
26086 {
26087 rtx insn;
26088 rtx last = call;
26089 rtx first_arg = NULL;
26090 bool is_spilled = false;
26091
26092 head = PREV_INSN (head);
26093
26094 /* Find nearest to call argument passing instruction. */
26095 while (true)
26096 {
26097 last = PREV_INSN (last);
26098 if (last == head)
26099 return NULL;
26100 if (!NONDEBUG_INSN_P (last))
26101 continue;
26102 if (insn_is_function_arg (last, &is_spilled))
26103 break;
26104 return NULL;
26105 }
26106
26107 first_arg = last;
26108 while (true)
26109 {
26110 insn = PREV_INSN (last);
26111 if (!INSN_P (insn))
26112 break;
26113 if (insn == head)
26114 break;
26115 if (!NONDEBUG_INSN_P (insn))
26116 {
26117 last = insn;
26118 continue;
26119 }
26120 if (insn_is_function_arg (insn, &is_spilled))
26121 {
26122 /* Add output depdendence between two function arguments if chain
26123 of output arguments contains likely spilled HW registers. */
26124 if (is_spilled)
26125 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26126 first_arg = last = insn;
26127 }
26128 else
26129 break;
26130 }
26131 if (!is_spilled)
26132 return NULL;
26133 return first_arg;
26134 }
26135
26136 /* Add output or anti dependency from insn to first_arg to restrict its code
26137 motion. */
26138 static void
26139 avoid_func_arg_motion (rtx first_arg, rtx insn)
26140 {
26141 rtx set;
26142 rtx tmp;
26143
26144 set = single_set (insn);
26145 if (!set)
26146 return;
26147 tmp = SET_DEST (set);
26148 if (REG_P (tmp))
26149 {
26150 /* Add output dependency to the first function argument. */
26151 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26152 return;
26153 }
26154 /* Add anti dependency. */
26155 add_dependence (first_arg, insn, REG_DEP_ANTI);
26156 }
26157
26158 /* Avoid cross block motion of function argument through adding dependency
26159 from the first non-jump instruction in bb. */
26160 static void
26161 add_dependee_for_func_arg (rtx arg, basic_block bb)
26162 {
26163 rtx insn = BB_END (bb);
26164
26165 while (insn)
26166 {
26167 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
26168 {
26169 rtx set = single_set (insn);
26170 if (set)
26171 {
26172 avoid_func_arg_motion (arg, insn);
26173 return;
26174 }
26175 }
26176 if (insn == BB_HEAD (bb))
26177 return;
26178 insn = PREV_INSN (insn);
26179 }
26180 }
26181
26182 /* Hook for pre-reload schedule - avoid motion of function arguments
26183 passed in likely spilled HW registers. */
26184 static void
26185 ix86_dependencies_evaluation_hook (rtx head, rtx tail)
26186 {
26187 rtx insn;
26188 rtx first_arg = NULL;
26189 if (reload_completed)
26190 return;
26191 while (head != tail && DEBUG_INSN_P (head))
26192 head = NEXT_INSN (head);
26193 for (insn = tail; insn != head; insn = PREV_INSN (insn))
26194 if (INSN_P (insn) && CALL_P (insn))
26195 {
26196 first_arg = add_parameter_dependencies (insn, head);
26197 if (first_arg)
26198 {
26199 /* Add dependee for first argument to predecessors if only
26200 region contains more than one block. */
26201 basic_block bb = BLOCK_FOR_INSN (insn);
26202 int rgn = CONTAINING_RGN (bb->index);
26203 int nr_blks = RGN_NR_BLOCKS (rgn);
26204 /* Skip trivial regions and region head blocks that can have
26205 predecessors outside of region. */
26206 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
26207 {
26208 edge e;
26209 edge_iterator ei;
26210 /* Assume that region is SCC, i.e. all immediate predecessors
26211 of non-head block are in the same region. */
26212 FOR_EACH_EDGE (e, ei, bb->preds)
26213 {
26214 /* Avoid creating of loop-carried dependencies through
26215 using topological odering in region. */
26216 if (BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
26217 add_dependee_for_func_arg (first_arg, e->src);
26218 }
26219 }
26220 insn = first_arg;
26221 if (insn == head)
26222 break;
26223 }
26224 }
26225 else if (first_arg)
26226 avoid_func_arg_motion (first_arg, insn);
26227 }
26228
26229 /* Hook for pre-reload schedule - set priority of moves from likely spilled
26230 HW registers to maximum, to schedule them at soon as possible. These are
26231 moves from function argument registers at the top of the function entry
26232 and moves from function return value registers after call. */
26233 static int
26234 ix86_adjust_priority (rtx insn, int priority)
26235 {
26236 rtx set;
26237
26238 if (reload_completed)
26239 return priority;
26240
26241 if (!NONDEBUG_INSN_P (insn))
26242 return priority;
26243
26244 set = single_set (insn);
26245 if (set)
26246 {
26247 rtx tmp = SET_SRC (set);
26248 if (REG_P (tmp)
26249 && HARD_REGISTER_P (tmp)
26250 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
26251 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
26252 return current_sched_info->sched_max_insns_priority;
26253 }
26254
26255 return priority;
26256 }
26257
26258 /* Model decoder of Core 2/i7.
26259 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
26260 track the instruction fetch block boundaries and make sure that long
26261 (9+ bytes) instructions are assigned to D0. */
26262
26263 /* Maximum length of an insn that can be handled by
26264 a secondary decoder unit. '8' for Core 2/i7. */
26265 static int core2i7_secondary_decoder_max_insn_size;
26266
26267 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
26268 '16' for Core 2/i7. */
26269 static int core2i7_ifetch_block_size;
26270
26271 /* Maximum number of instructions decoder can handle per cycle.
26272 '6' for Core 2/i7. */
26273 static int core2i7_ifetch_block_max_insns;
26274
26275 typedef struct ix86_first_cycle_multipass_data_ *
26276 ix86_first_cycle_multipass_data_t;
26277 typedef const struct ix86_first_cycle_multipass_data_ *
26278 const_ix86_first_cycle_multipass_data_t;
26279
26280 /* A variable to store target state across calls to max_issue within
26281 one cycle. */
26282 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
26283 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
26284
26285 /* Initialize DATA. */
26286 static void
26287 core2i7_first_cycle_multipass_init (void *_data)
26288 {
26289 ix86_first_cycle_multipass_data_t data
26290 = (ix86_first_cycle_multipass_data_t) _data;
26291
26292 data->ifetch_block_len = 0;
26293 data->ifetch_block_n_insns = 0;
26294 data->ready_try_change = NULL;
26295 data->ready_try_change_size = 0;
26296 }
26297
26298 /* Advancing the cycle; reset ifetch block counts. */
26299 static void
26300 core2i7_dfa_post_advance_cycle (void)
26301 {
26302 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
26303
26304 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
26305
26306 data->ifetch_block_len = 0;
26307 data->ifetch_block_n_insns = 0;
26308 }
26309
26310 static int min_insn_size (rtx);
26311
26312 /* Filter out insns from ready_try that the core will not be able to issue
26313 on current cycle due to decoder. */
26314 static void
26315 core2i7_first_cycle_multipass_filter_ready_try
26316 (const_ix86_first_cycle_multipass_data_t data,
26317 char *ready_try, int n_ready, bool first_cycle_insn_p)
26318 {
26319 while (n_ready--)
26320 {
26321 rtx insn;
26322 int insn_size;
26323
26324 if (ready_try[n_ready])
26325 continue;
26326
26327 insn = get_ready_element (n_ready);
26328 insn_size = min_insn_size (insn);
26329
26330 if (/* If this is a too long an insn for a secondary decoder ... */
26331 (!first_cycle_insn_p
26332 && insn_size > core2i7_secondary_decoder_max_insn_size)
26333 /* ... or it would not fit into the ifetch block ... */
26334 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
26335 /* ... or the decoder is full already ... */
26336 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
26337 /* ... mask the insn out. */
26338 {
26339 ready_try[n_ready] = 1;
26340
26341 if (data->ready_try_change)
26342 bitmap_set_bit (data->ready_try_change, n_ready);
26343 }
26344 }
26345 }
26346
26347 /* Prepare for a new round of multipass lookahead scheduling. */
26348 static void
26349 core2i7_first_cycle_multipass_begin (void *_data, char *ready_try, int n_ready,
26350 bool first_cycle_insn_p)
26351 {
26352 ix86_first_cycle_multipass_data_t data
26353 = (ix86_first_cycle_multipass_data_t) _data;
26354 const_ix86_first_cycle_multipass_data_t prev_data
26355 = ix86_first_cycle_multipass_data;
26356
26357 /* Restore the state from the end of the previous round. */
26358 data->ifetch_block_len = prev_data->ifetch_block_len;
26359 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
26360
26361 /* Filter instructions that cannot be issued on current cycle due to
26362 decoder restrictions. */
26363 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
26364 first_cycle_insn_p);
26365 }
26366
26367 /* INSN is being issued in current solution. Account for its impact on
26368 the decoder model. */
26369 static void
26370 core2i7_first_cycle_multipass_issue (void *_data, char *ready_try, int n_ready,
26371 rtx insn, const void *_prev_data)
26372 {
26373 ix86_first_cycle_multipass_data_t data
26374 = (ix86_first_cycle_multipass_data_t) _data;
26375 const_ix86_first_cycle_multipass_data_t prev_data
26376 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
26377
26378 int insn_size = min_insn_size (insn);
26379
26380 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
26381 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
26382 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
26383 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
26384
26385 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
26386 if (!data->ready_try_change)
26387 {
26388 data->ready_try_change = sbitmap_alloc (n_ready);
26389 data->ready_try_change_size = n_ready;
26390 }
26391 else if (data->ready_try_change_size < n_ready)
26392 {
26393 data->ready_try_change = sbitmap_resize (data->ready_try_change,
26394 n_ready, 0);
26395 data->ready_try_change_size = n_ready;
26396 }
26397 bitmap_clear (data->ready_try_change);
26398
26399 /* Filter out insns from ready_try that the core will not be able to issue
26400 on current cycle due to decoder. */
26401 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
26402 false);
26403 }
26404
26405 /* Revert the effect on ready_try. */
26406 static void
26407 core2i7_first_cycle_multipass_backtrack (const void *_data,
26408 char *ready_try,
26409 int n_ready ATTRIBUTE_UNUSED)
26410 {
26411 const_ix86_first_cycle_multipass_data_t data
26412 = (const_ix86_first_cycle_multipass_data_t) _data;
26413 unsigned int i = 0;
26414 sbitmap_iterator sbi;
26415
26416 gcc_assert (bitmap_last_set_bit (data->ready_try_change) < n_ready);
26417 EXECUTE_IF_SET_IN_BITMAP (data->ready_try_change, 0, i, sbi)
26418 {
26419 ready_try[i] = 0;
26420 }
26421 }
26422
26423 /* Save the result of multipass lookahead scheduling for the next round. */
26424 static void
26425 core2i7_first_cycle_multipass_end (const void *_data)
26426 {
26427 const_ix86_first_cycle_multipass_data_t data
26428 = (const_ix86_first_cycle_multipass_data_t) _data;
26429 ix86_first_cycle_multipass_data_t next_data
26430 = ix86_first_cycle_multipass_data;
26431
26432 if (data != NULL)
26433 {
26434 next_data->ifetch_block_len = data->ifetch_block_len;
26435 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
26436 }
26437 }
26438
26439 /* Deallocate target data. */
26440 static void
26441 core2i7_first_cycle_multipass_fini (void *_data)
26442 {
26443 ix86_first_cycle_multipass_data_t data
26444 = (ix86_first_cycle_multipass_data_t) _data;
26445
26446 if (data->ready_try_change)
26447 {
26448 sbitmap_free (data->ready_try_change);
26449 data->ready_try_change = NULL;
26450 data->ready_try_change_size = 0;
26451 }
26452 }
26453
26454 /* Prepare for scheduling pass. */
26455 static void
26456 ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
26457 int verbose ATTRIBUTE_UNUSED,
26458 int max_uid ATTRIBUTE_UNUSED)
26459 {
26460 /* Install scheduling hooks for current CPU. Some of these hooks are used
26461 in time-critical parts of the scheduler, so we only set them up when
26462 they are actually used. */
26463 switch (ix86_tune)
26464 {
26465 case PROCESSOR_CORE2:
26466 case PROCESSOR_NEHALEM:
26467 case PROCESSOR_SANDYBRIDGE:
26468 case PROCESSOR_HASWELL:
26469 /* Do not perform multipass scheduling for pre-reload schedule
26470 to save compile time. */
26471 if (reload_completed)
26472 {
26473 targetm.sched.dfa_post_advance_cycle
26474 = core2i7_dfa_post_advance_cycle;
26475 targetm.sched.first_cycle_multipass_init
26476 = core2i7_first_cycle_multipass_init;
26477 targetm.sched.first_cycle_multipass_begin
26478 = core2i7_first_cycle_multipass_begin;
26479 targetm.sched.first_cycle_multipass_issue
26480 = core2i7_first_cycle_multipass_issue;
26481 targetm.sched.first_cycle_multipass_backtrack
26482 = core2i7_first_cycle_multipass_backtrack;
26483 targetm.sched.first_cycle_multipass_end
26484 = core2i7_first_cycle_multipass_end;
26485 targetm.sched.first_cycle_multipass_fini
26486 = core2i7_first_cycle_multipass_fini;
26487
26488 /* Set decoder parameters. */
26489 core2i7_secondary_decoder_max_insn_size = 8;
26490 core2i7_ifetch_block_size = 16;
26491 core2i7_ifetch_block_max_insns = 6;
26492 break;
26493 }
26494 /* ... Fall through ... */
26495 default:
26496 targetm.sched.dfa_post_advance_cycle = NULL;
26497 targetm.sched.first_cycle_multipass_init = NULL;
26498 targetm.sched.first_cycle_multipass_begin = NULL;
26499 targetm.sched.first_cycle_multipass_issue = NULL;
26500 targetm.sched.first_cycle_multipass_backtrack = NULL;
26501 targetm.sched.first_cycle_multipass_end = NULL;
26502 targetm.sched.first_cycle_multipass_fini = NULL;
26503 break;
26504 }
26505 }
26506
26507 \f
26508 /* Compute the alignment given to a constant that is being placed in memory.
26509 EXP is the constant and ALIGN is the alignment that the object would
26510 ordinarily have.
26511 The value of this function is used instead of that alignment to align
26512 the object. */
26513
26514 int
26515 ix86_constant_alignment (tree exp, int align)
26516 {
26517 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
26518 || TREE_CODE (exp) == INTEGER_CST)
26519 {
26520 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
26521 return 64;
26522 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
26523 return 128;
26524 }
26525 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
26526 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
26527 return BITS_PER_WORD;
26528
26529 return align;
26530 }
26531
26532 /* Compute the alignment for a static variable.
26533 TYPE is the data type, and ALIGN is the alignment that
26534 the object would ordinarily have. The value of this function is used
26535 instead of that alignment to align the object. */
26536
26537 int
26538 ix86_data_alignment (tree type, int align, bool opt)
26539 {
26540 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
26541 for symbols from other compilation units or symbols that don't need
26542 to bind locally. In order to preserve some ABI compatibility with
26543 those compilers, ensure we don't decrease alignment from what we
26544 used to assume. */
26545
26546 int max_align_compat
26547 = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
26548
26549 /* A data structure, equal or greater than the size of a cache line
26550 (64 bytes in the Pentium 4 and other recent Intel processors, including
26551 processors based on Intel Core microarchitecture) should be aligned
26552 so that its base address is a multiple of a cache line size. */
26553
26554 int max_align
26555 = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
26556
26557 if (max_align < BITS_PER_WORD)
26558 max_align = BITS_PER_WORD;
26559
26560 if (opt
26561 && AGGREGATE_TYPE_P (type)
26562 && TYPE_SIZE (type)
26563 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
26564 {
26565 if ((TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align_compat
26566 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
26567 && align < max_align_compat)
26568 align = max_align_compat;
26569 if ((TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
26570 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
26571 && align < max_align)
26572 align = max_align;
26573 }
26574
26575 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
26576 to 16byte boundary. */
26577 if (TARGET_64BIT)
26578 {
26579 if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
26580 && TYPE_SIZE (type)
26581 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
26582 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
26583 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
26584 return 128;
26585 }
26586
26587 if (!opt)
26588 return align;
26589
26590 if (TREE_CODE (type) == ARRAY_TYPE)
26591 {
26592 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
26593 return 64;
26594 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
26595 return 128;
26596 }
26597 else if (TREE_CODE (type) == COMPLEX_TYPE)
26598 {
26599
26600 if (TYPE_MODE (type) == DCmode && align < 64)
26601 return 64;
26602 if ((TYPE_MODE (type) == XCmode
26603 || TYPE_MODE (type) == TCmode) && align < 128)
26604 return 128;
26605 }
26606 else if ((TREE_CODE (type) == RECORD_TYPE
26607 || TREE_CODE (type) == UNION_TYPE
26608 || TREE_CODE (type) == QUAL_UNION_TYPE)
26609 && TYPE_FIELDS (type))
26610 {
26611 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
26612 return 64;
26613 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
26614 return 128;
26615 }
26616 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
26617 || TREE_CODE (type) == INTEGER_TYPE)
26618 {
26619 if (TYPE_MODE (type) == DFmode && align < 64)
26620 return 64;
26621 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
26622 return 128;
26623 }
26624
26625 return align;
26626 }
26627
26628 /* Compute the alignment for a local variable or a stack slot. EXP is
26629 the data type or decl itself, MODE is the widest mode available and
26630 ALIGN is the alignment that the object would ordinarily have. The
26631 value of this macro is used instead of that alignment to align the
26632 object. */
26633
26634 unsigned int
26635 ix86_local_alignment (tree exp, enum machine_mode mode,
26636 unsigned int align)
26637 {
26638 tree type, decl;
26639
26640 if (exp && DECL_P (exp))
26641 {
26642 type = TREE_TYPE (exp);
26643 decl = exp;
26644 }
26645 else
26646 {
26647 type = exp;
26648 decl = NULL;
26649 }
26650
26651 /* Don't do dynamic stack realignment for long long objects with
26652 -mpreferred-stack-boundary=2. */
26653 if (!TARGET_64BIT
26654 && align == 64
26655 && ix86_preferred_stack_boundary < 64
26656 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
26657 && (!type || !TYPE_USER_ALIGN (type))
26658 && (!decl || !DECL_USER_ALIGN (decl)))
26659 align = 32;
26660
26661 /* If TYPE is NULL, we are allocating a stack slot for caller-save
26662 register in MODE. We will return the largest alignment of XF
26663 and DF. */
26664 if (!type)
26665 {
26666 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
26667 align = GET_MODE_ALIGNMENT (DFmode);
26668 return align;
26669 }
26670
26671 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
26672 to 16byte boundary. Exact wording is:
26673
26674 An array uses the same alignment as its elements, except that a local or
26675 global array variable of length at least 16 bytes or
26676 a C99 variable-length array variable always has alignment of at least 16 bytes.
26677
26678 This was added to allow use of aligned SSE instructions at arrays. This
26679 rule is meant for static storage (where compiler can not do the analysis
26680 by itself). We follow it for automatic variables only when convenient.
26681 We fully control everything in the function compiled and functions from
26682 other unit can not rely on the alignment.
26683
26684 Exclude va_list type. It is the common case of local array where
26685 we can not benefit from the alignment.
26686
26687 TODO: Probably one should optimize for size only when var is not escaping. */
26688 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
26689 && TARGET_SSE)
26690 {
26691 if (AGGREGATE_TYPE_P (type)
26692 && (va_list_type_node == NULL_TREE
26693 || (TYPE_MAIN_VARIANT (type)
26694 != TYPE_MAIN_VARIANT (va_list_type_node)))
26695 && TYPE_SIZE (type)
26696 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
26697 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
26698 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
26699 return 128;
26700 }
26701 if (TREE_CODE (type) == ARRAY_TYPE)
26702 {
26703 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
26704 return 64;
26705 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
26706 return 128;
26707 }
26708 else if (TREE_CODE (type) == COMPLEX_TYPE)
26709 {
26710 if (TYPE_MODE (type) == DCmode && align < 64)
26711 return 64;
26712 if ((TYPE_MODE (type) == XCmode
26713 || TYPE_MODE (type) == TCmode) && align < 128)
26714 return 128;
26715 }
26716 else if ((TREE_CODE (type) == RECORD_TYPE
26717 || TREE_CODE (type) == UNION_TYPE
26718 || TREE_CODE (type) == QUAL_UNION_TYPE)
26719 && TYPE_FIELDS (type))
26720 {
26721 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
26722 return 64;
26723 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
26724 return 128;
26725 }
26726 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
26727 || TREE_CODE (type) == INTEGER_TYPE)
26728 {
26729
26730 if (TYPE_MODE (type) == DFmode && align < 64)
26731 return 64;
26732 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
26733 return 128;
26734 }
26735 return align;
26736 }
26737
26738 /* Compute the minimum required alignment for dynamic stack realignment
26739 purposes for a local variable, parameter or a stack slot. EXP is
26740 the data type or decl itself, MODE is its mode and ALIGN is the
26741 alignment that the object would ordinarily have. */
26742
26743 unsigned int
26744 ix86_minimum_alignment (tree exp, enum machine_mode mode,
26745 unsigned int align)
26746 {
26747 tree type, decl;
26748
26749 if (exp && DECL_P (exp))
26750 {
26751 type = TREE_TYPE (exp);
26752 decl = exp;
26753 }
26754 else
26755 {
26756 type = exp;
26757 decl = NULL;
26758 }
26759
26760 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
26761 return align;
26762
26763 /* Don't do dynamic stack realignment for long long objects with
26764 -mpreferred-stack-boundary=2. */
26765 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
26766 && (!type || !TYPE_USER_ALIGN (type))
26767 && (!decl || !DECL_USER_ALIGN (decl)))
26768 return 32;
26769
26770 return align;
26771 }
26772 \f
26773 /* Find a location for the static chain incoming to a nested function.
26774 This is a register, unless all free registers are used by arguments. */
26775
26776 static rtx
26777 ix86_static_chain (const_tree fndecl, bool incoming_p)
26778 {
26779 unsigned regno;
26780
26781 if (!DECL_STATIC_CHAIN (fndecl))
26782 return NULL;
26783
26784 if (TARGET_64BIT)
26785 {
26786 /* We always use R10 in 64-bit mode. */
26787 regno = R10_REG;
26788 }
26789 else
26790 {
26791 tree fntype;
26792 unsigned int ccvt;
26793
26794 /* By default in 32-bit mode we use ECX to pass the static chain. */
26795 regno = CX_REG;
26796
26797 fntype = TREE_TYPE (fndecl);
26798 ccvt = ix86_get_callcvt (fntype);
26799 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
26800 {
26801 /* Fastcall functions use ecx/edx for arguments, which leaves
26802 us with EAX for the static chain.
26803 Thiscall functions use ecx for arguments, which also
26804 leaves us with EAX for the static chain. */
26805 regno = AX_REG;
26806 }
26807 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
26808 {
26809 /* Thiscall functions use ecx for arguments, which leaves
26810 us with EAX and EDX for the static chain.
26811 We are using for abi-compatibility EAX. */
26812 regno = AX_REG;
26813 }
26814 else if (ix86_function_regparm (fntype, fndecl) == 3)
26815 {
26816 /* For regparm 3, we have no free call-clobbered registers in
26817 which to store the static chain. In order to implement this,
26818 we have the trampoline push the static chain to the stack.
26819 However, we can't push a value below the return address when
26820 we call the nested function directly, so we have to use an
26821 alternate entry point. For this we use ESI, and have the
26822 alternate entry point push ESI, so that things appear the
26823 same once we're executing the nested function. */
26824 if (incoming_p)
26825 {
26826 if (fndecl == current_function_decl)
26827 ix86_static_chain_on_stack = true;
26828 return gen_frame_mem (SImode,
26829 plus_constant (Pmode,
26830 arg_pointer_rtx, -8));
26831 }
26832 regno = SI_REG;
26833 }
26834 }
26835
26836 return gen_rtx_REG (Pmode, regno);
26837 }
26838
26839 /* Emit RTL insns to initialize the variable parts of a trampoline.
26840 FNDECL is the decl of the target address; M_TRAMP is a MEM for
26841 the trampoline, and CHAIN_VALUE is an RTX for the static chain
26842 to be passed to the target function. */
26843
26844 static void
26845 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
26846 {
26847 rtx mem, fnaddr;
26848 int opcode;
26849 int offset = 0;
26850
26851 fnaddr = XEXP (DECL_RTL (fndecl), 0);
26852
26853 if (TARGET_64BIT)
26854 {
26855 int size;
26856
26857 /* Load the function address to r11. Try to load address using
26858 the shorter movl instead of movabs. We may want to support
26859 movq for kernel mode, but kernel does not use trampolines at
26860 the moment. FNADDR is a 32bit address and may not be in
26861 DImode when ptr_mode == SImode. Always use movl in this
26862 case. */
26863 if (ptr_mode == SImode
26864 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
26865 {
26866 fnaddr = copy_addr_to_reg (fnaddr);
26867
26868 mem = adjust_address (m_tramp, HImode, offset);
26869 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
26870
26871 mem = adjust_address (m_tramp, SImode, offset + 2);
26872 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
26873 offset += 6;
26874 }
26875 else
26876 {
26877 mem = adjust_address (m_tramp, HImode, offset);
26878 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
26879
26880 mem = adjust_address (m_tramp, DImode, offset + 2);
26881 emit_move_insn (mem, fnaddr);
26882 offset += 10;
26883 }
26884
26885 /* Load static chain using movabs to r10. Use the shorter movl
26886 instead of movabs when ptr_mode == SImode. */
26887 if (ptr_mode == SImode)
26888 {
26889 opcode = 0xba41;
26890 size = 6;
26891 }
26892 else
26893 {
26894 opcode = 0xba49;
26895 size = 10;
26896 }
26897
26898 mem = adjust_address (m_tramp, HImode, offset);
26899 emit_move_insn (mem, gen_int_mode (opcode, HImode));
26900
26901 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
26902 emit_move_insn (mem, chain_value);
26903 offset += size;
26904
26905 /* Jump to r11; the last (unused) byte is a nop, only there to
26906 pad the write out to a single 32-bit store. */
26907 mem = adjust_address (m_tramp, SImode, offset);
26908 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
26909 offset += 4;
26910 }
26911 else
26912 {
26913 rtx disp, chain;
26914
26915 /* Depending on the static chain location, either load a register
26916 with a constant, or push the constant to the stack. All of the
26917 instructions are the same size. */
26918 chain = ix86_static_chain (fndecl, true);
26919 if (REG_P (chain))
26920 {
26921 switch (REGNO (chain))
26922 {
26923 case AX_REG:
26924 opcode = 0xb8; break;
26925 case CX_REG:
26926 opcode = 0xb9; break;
26927 default:
26928 gcc_unreachable ();
26929 }
26930 }
26931 else
26932 opcode = 0x68;
26933
26934 mem = adjust_address (m_tramp, QImode, offset);
26935 emit_move_insn (mem, gen_int_mode (opcode, QImode));
26936
26937 mem = adjust_address (m_tramp, SImode, offset + 1);
26938 emit_move_insn (mem, chain_value);
26939 offset += 5;
26940
26941 mem = adjust_address (m_tramp, QImode, offset);
26942 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
26943
26944 mem = adjust_address (m_tramp, SImode, offset + 1);
26945
26946 /* Compute offset from the end of the jmp to the target function.
26947 In the case in which the trampoline stores the static chain on
26948 the stack, we need to skip the first insn which pushes the
26949 (call-saved) register static chain; this push is 1 byte. */
26950 offset += 5;
26951 disp = expand_binop (SImode, sub_optab, fnaddr,
26952 plus_constant (Pmode, XEXP (m_tramp, 0),
26953 offset - (MEM_P (chain) ? 1 : 0)),
26954 NULL_RTX, 1, OPTAB_DIRECT);
26955 emit_move_insn (mem, disp);
26956 }
26957
26958 gcc_assert (offset <= TRAMPOLINE_SIZE);
26959
26960 #ifdef HAVE_ENABLE_EXECUTE_STACK
26961 #ifdef CHECK_EXECUTE_STACK_ENABLED
26962 if (CHECK_EXECUTE_STACK_ENABLED)
26963 #endif
26964 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
26965 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
26966 #endif
26967 }
26968 \f
26969 /* The following file contains several enumerations and data structures
26970 built from the definitions in i386-builtin-types.def. */
26971
26972 #include "i386-builtin-types.inc"
26973
26974 /* Table for the ix86 builtin non-function types. */
26975 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
26976
26977 /* Retrieve an element from the above table, building some of
26978 the types lazily. */
26979
26980 static tree
26981 ix86_get_builtin_type (enum ix86_builtin_type tcode)
26982 {
26983 unsigned int index;
26984 tree type, itype;
26985
26986 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
26987
26988 type = ix86_builtin_type_tab[(int) tcode];
26989 if (type != NULL)
26990 return type;
26991
26992 gcc_assert (tcode > IX86_BT_LAST_PRIM);
26993 if (tcode <= IX86_BT_LAST_VECT)
26994 {
26995 enum machine_mode mode;
26996
26997 index = tcode - IX86_BT_LAST_PRIM - 1;
26998 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
26999 mode = ix86_builtin_type_vect_mode[index];
27000
27001 type = build_vector_type_for_mode (itype, mode);
27002 }
27003 else
27004 {
27005 int quals;
27006
27007 index = tcode - IX86_BT_LAST_VECT - 1;
27008 if (tcode <= IX86_BT_LAST_PTR)
27009 quals = TYPE_UNQUALIFIED;
27010 else
27011 quals = TYPE_QUAL_CONST;
27012
27013 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
27014 if (quals != TYPE_UNQUALIFIED)
27015 itype = build_qualified_type (itype, quals);
27016
27017 type = build_pointer_type (itype);
27018 }
27019
27020 ix86_builtin_type_tab[(int) tcode] = type;
27021 return type;
27022 }
27023
27024 /* Table for the ix86 builtin function types. */
27025 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
27026
27027 /* Retrieve an element from the above table, building some of
27028 the types lazily. */
27029
27030 static tree
27031 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
27032 {
27033 tree type;
27034
27035 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
27036
27037 type = ix86_builtin_func_type_tab[(int) tcode];
27038 if (type != NULL)
27039 return type;
27040
27041 if (tcode <= IX86_BT_LAST_FUNC)
27042 {
27043 unsigned start = ix86_builtin_func_start[(int) tcode];
27044 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
27045 tree rtype, atype, args = void_list_node;
27046 unsigned i;
27047
27048 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
27049 for (i = after - 1; i > start; --i)
27050 {
27051 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
27052 args = tree_cons (NULL, atype, args);
27053 }
27054
27055 type = build_function_type (rtype, args);
27056 }
27057 else
27058 {
27059 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
27060 enum ix86_builtin_func_type icode;
27061
27062 icode = ix86_builtin_func_alias_base[index];
27063 type = ix86_get_builtin_func_type (icode);
27064 }
27065
27066 ix86_builtin_func_type_tab[(int) tcode] = type;
27067 return type;
27068 }
27069
27070
27071 /* Codes for all the SSE/MMX builtins. */
27072 enum ix86_builtins
27073 {
27074 IX86_BUILTIN_ADDPS,
27075 IX86_BUILTIN_ADDSS,
27076 IX86_BUILTIN_DIVPS,
27077 IX86_BUILTIN_DIVSS,
27078 IX86_BUILTIN_MULPS,
27079 IX86_BUILTIN_MULSS,
27080 IX86_BUILTIN_SUBPS,
27081 IX86_BUILTIN_SUBSS,
27082
27083 IX86_BUILTIN_CMPEQPS,
27084 IX86_BUILTIN_CMPLTPS,
27085 IX86_BUILTIN_CMPLEPS,
27086 IX86_BUILTIN_CMPGTPS,
27087 IX86_BUILTIN_CMPGEPS,
27088 IX86_BUILTIN_CMPNEQPS,
27089 IX86_BUILTIN_CMPNLTPS,
27090 IX86_BUILTIN_CMPNLEPS,
27091 IX86_BUILTIN_CMPNGTPS,
27092 IX86_BUILTIN_CMPNGEPS,
27093 IX86_BUILTIN_CMPORDPS,
27094 IX86_BUILTIN_CMPUNORDPS,
27095 IX86_BUILTIN_CMPEQSS,
27096 IX86_BUILTIN_CMPLTSS,
27097 IX86_BUILTIN_CMPLESS,
27098 IX86_BUILTIN_CMPNEQSS,
27099 IX86_BUILTIN_CMPNLTSS,
27100 IX86_BUILTIN_CMPNLESS,
27101 IX86_BUILTIN_CMPORDSS,
27102 IX86_BUILTIN_CMPUNORDSS,
27103
27104 IX86_BUILTIN_COMIEQSS,
27105 IX86_BUILTIN_COMILTSS,
27106 IX86_BUILTIN_COMILESS,
27107 IX86_BUILTIN_COMIGTSS,
27108 IX86_BUILTIN_COMIGESS,
27109 IX86_BUILTIN_COMINEQSS,
27110 IX86_BUILTIN_UCOMIEQSS,
27111 IX86_BUILTIN_UCOMILTSS,
27112 IX86_BUILTIN_UCOMILESS,
27113 IX86_BUILTIN_UCOMIGTSS,
27114 IX86_BUILTIN_UCOMIGESS,
27115 IX86_BUILTIN_UCOMINEQSS,
27116
27117 IX86_BUILTIN_CVTPI2PS,
27118 IX86_BUILTIN_CVTPS2PI,
27119 IX86_BUILTIN_CVTSI2SS,
27120 IX86_BUILTIN_CVTSI642SS,
27121 IX86_BUILTIN_CVTSS2SI,
27122 IX86_BUILTIN_CVTSS2SI64,
27123 IX86_BUILTIN_CVTTPS2PI,
27124 IX86_BUILTIN_CVTTSS2SI,
27125 IX86_BUILTIN_CVTTSS2SI64,
27126
27127 IX86_BUILTIN_MAXPS,
27128 IX86_BUILTIN_MAXSS,
27129 IX86_BUILTIN_MINPS,
27130 IX86_BUILTIN_MINSS,
27131
27132 IX86_BUILTIN_LOADUPS,
27133 IX86_BUILTIN_STOREUPS,
27134 IX86_BUILTIN_MOVSS,
27135
27136 IX86_BUILTIN_MOVHLPS,
27137 IX86_BUILTIN_MOVLHPS,
27138 IX86_BUILTIN_LOADHPS,
27139 IX86_BUILTIN_LOADLPS,
27140 IX86_BUILTIN_STOREHPS,
27141 IX86_BUILTIN_STORELPS,
27142
27143 IX86_BUILTIN_MASKMOVQ,
27144 IX86_BUILTIN_MOVMSKPS,
27145 IX86_BUILTIN_PMOVMSKB,
27146
27147 IX86_BUILTIN_MOVNTPS,
27148 IX86_BUILTIN_MOVNTQ,
27149
27150 IX86_BUILTIN_LOADDQU,
27151 IX86_BUILTIN_STOREDQU,
27152
27153 IX86_BUILTIN_PACKSSWB,
27154 IX86_BUILTIN_PACKSSDW,
27155 IX86_BUILTIN_PACKUSWB,
27156
27157 IX86_BUILTIN_PADDB,
27158 IX86_BUILTIN_PADDW,
27159 IX86_BUILTIN_PADDD,
27160 IX86_BUILTIN_PADDQ,
27161 IX86_BUILTIN_PADDSB,
27162 IX86_BUILTIN_PADDSW,
27163 IX86_BUILTIN_PADDUSB,
27164 IX86_BUILTIN_PADDUSW,
27165 IX86_BUILTIN_PSUBB,
27166 IX86_BUILTIN_PSUBW,
27167 IX86_BUILTIN_PSUBD,
27168 IX86_BUILTIN_PSUBQ,
27169 IX86_BUILTIN_PSUBSB,
27170 IX86_BUILTIN_PSUBSW,
27171 IX86_BUILTIN_PSUBUSB,
27172 IX86_BUILTIN_PSUBUSW,
27173
27174 IX86_BUILTIN_PAND,
27175 IX86_BUILTIN_PANDN,
27176 IX86_BUILTIN_POR,
27177 IX86_BUILTIN_PXOR,
27178
27179 IX86_BUILTIN_PAVGB,
27180 IX86_BUILTIN_PAVGW,
27181
27182 IX86_BUILTIN_PCMPEQB,
27183 IX86_BUILTIN_PCMPEQW,
27184 IX86_BUILTIN_PCMPEQD,
27185 IX86_BUILTIN_PCMPGTB,
27186 IX86_BUILTIN_PCMPGTW,
27187 IX86_BUILTIN_PCMPGTD,
27188
27189 IX86_BUILTIN_PMADDWD,
27190
27191 IX86_BUILTIN_PMAXSW,
27192 IX86_BUILTIN_PMAXUB,
27193 IX86_BUILTIN_PMINSW,
27194 IX86_BUILTIN_PMINUB,
27195
27196 IX86_BUILTIN_PMULHUW,
27197 IX86_BUILTIN_PMULHW,
27198 IX86_BUILTIN_PMULLW,
27199
27200 IX86_BUILTIN_PSADBW,
27201 IX86_BUILTIN_PSHUFW,
27202
27203 IX86_BUILTIN_PSLLW,
27204 IX86_BUILTIN_PSLLD,
27205 IX86_BUILTIN_PSLLQ,
27206 IX86_BUILTIN_PSRAW,
27207 IX86_BUILTIN_PSRAD,
27208 IX86_BUILTIN_PSRLW,
27209 IX86_BUILTIN_PSRLD,
27210 IX86_BUILTIN_PSRLQ,
27211 IX86_BUILTIN_PSLLWI,
27212 IX86_BUILTIN_PSLLDI,
27213 IX86_BUILTIN_PSLLQI,
27214 IX86_BUILTIN_PSRAWI,
27215 IX86_BUILTIN_PSRADI,
27216 IX86_BUILTIN_PSRLWI,
27217 IX86_BUILTIN_PSRLDI,
27218 IX86_BUILTIN_PSRLQI,
27219
27220 IX86_BUILTIN_PUNPCKHBW,
27221 IX86_BUILTIN_PUNPCKHWD,
27222 IX86_BUILTIN_PUNPCKHDQ,
27223 IX86_BUILTIN_PUNPCKLBW,
27224 IX86_BUILTIN_PUNPCKLWD,
27225 IX86_BUILTIN_PUNPCKLDQ,
27226
27227 IX86_BUILTIN_SHUFPS,
27228
27229 IX86_BUILTIN_RCPPS,
27230 IX86_BUILTIN_RCPSS,
27231 IX86_BUILTIN_RSQRTPS,
27232 IX86_BUILTIN_RSQRTPS_NR,
27233 IX86_BUILTIN_RSQRTSS,
27234 IX86_BUILTIN_RSQRTF,
27235 IX86_BUILTIN_SQRTPS,
27236 IX86_BUILTIN_SQRTPS_NR,
27237 IX86_BUILTIN_SQRTSS,
27238
27239 IX86_BUILTIN_UNPCKHPS,
27240 IX86_BUILTIN_UNPCKLPS,
27241
27242 IX86_BUILTIN_ANDPS,
27243 IX86_BUILTIN_ANDNPS,
27244 IX86_BUILTIN_ORPS,
27245 IX86_BUILTIN_XORPS,
27246
27247 IX86_BUILTIN_EMMS,
27248 IX86_BUILTIN_LDMXCSR,
27249 IX86_BUILTIN_STMXCSR,
27250 IX86_BUILTIN_SFENCE,
27251
27252 IX86_BUILTIN_FXSAVE,
27253 IX86_BUILTIN_FXRSTOR,
27254 IX86_BUILTIN_FXSAVE64,
27255 IX86_BUILTIN_FXRSTOR64,
27256
27257 IX86_BUILTIN_XSAVE,
27258 IX86_BUILTIN_XRSTOR,
27259 IX86_BUILTIN_XSAVE64,
27260 IX86_BUILTIN_XRSTOR64,
27261
27262 IX86_BUILTIN_XSAVEOPT,
27263 IX86_BUILTIN_XSAVEOPT64,
27264
27265 /* 3DNow! Original */
27266 IX86_BUILTIN_FEMMS,
27267 IX86_BUILTIN_PAVGUSB,
27268 IX86_BUILTIN_PF2ID,
27269 IX86_BUILTIN_PFACC,
27270 IX86_BUILTIN_PFADD,
27271 IX86_BUILTIN_PFCMPEQ,
27272 IX86_BUILTIN_PFCMPGE,
27273 IX86_BUILTIN_PFCMPGT,
27274 IX86_BUILTIN_PFMAX,
27275 IX86_BUILTIN_PFMIN,
27276 IX86_BUILTIN_PFMUL,
27277 IX86_BUILTIN_PFRCP,
27278 IX86_BUILTIN_PFRCPIT1,
27279 IX86_BUILTIN_PFRCPIT2,
27280 IX86_BUILTIN_PFRSQIT1,
27281 IX86_BUILTIN_PFRSQRT,
27282 IX86_BUILTIN_PFSUB,
27283 IX86_BUILTIN_PFSUBR,
27284 IX86_BUILTIN_PI2FD,
27285 IX86_BUILTIN_PMULHRW,
27286
27287 /* 3DNow! Athlon Extensions */
27288 IX86_BUILTIN_PF2IW,
27289 IX86_BUILTIN_PFNACC,
27290 IX86_BUILTIN_PFPNACC,
27291 IX86_BUILTIN_PI2FW,
27292 IX86_BUILTIN_PSWAPDSI,
27293 IX86_BUILTIN_PSWAPDSF,
27294
27295 /* SSE2 */
27296 IX86_BUILTIN_ADDPD,
27297 IX86_BUILTIN_ADDSD,
27298 IX86_BUILTIN_DIVPD,
27299 IX86_BUILTIN_DIVSD,
27300 IX86_BUILTIN_MULPD,
27301 IX86_BUILTIN_MULSD,
27302 IX86_BUILTIN_SUBPD,
27303 IX86_BUILTIN_SUBSD,
27304
27305 IX86_BUILTIN_CMPEQPD,
27306 IX86_BUILTIN_CMPLTPD,
27307 IX86_BUILTIN_CMPLEPD,
27308 IX86_BUILTIN_CMPGTPD,
27309 IX86_BUILTIN_CMPGEPD,
27310 IX86_BUILTIN_CMPNEQPD,
27311 IX86_BUILTIN_CMPNLTPD,
27312 IX86_BUILTIN_CMPNLEPD,
27313 IX86_BUILTIN_CMPNGTPD,
27314 IX86_BUILTIN_CMPNGEPD,
27315 IX86_BUILTIN_CMPORDPD,
27316 IX86_BUILTIN_CMPUNORDPD,
27317 IX86_BUILTIN_CMPEQSD,
27318 IX86_BUILTIN_CMPLTSD,
27319 IX86_BUILTIN_CMPLESD,
27320 IX86_BUILTIN_CMPNEQSD,
27321 IX86_BUILTIN_CMPNLTSD,
27322 IX86_BUILTIN_CMPNLESD,
27323 IX86_BUILTIN_CMPORDSD,
27324 IX86_BUILTIN_CMPUNORDSD,
27325
27326 IX86_BUILTIN_COMIEQSD,
27327 IX86_BUILTIN_COMILTSD,
27328 IX86_BUILTIN_COMILESD,
27329 IX86_BUILTIN_COMIGTSD,
27330 IX86_BUILTIN_COMIGESD,
27331 IX86_BUILTIN_COMINEQSD,
27332 IX86_BUILTIN_UCOMIEQSD,
27333 IX86_BUILTIN_UCOMILTSD,
27334 IX86_BUILTIN_UCOMILESD,
27335 IX86_BUILTIN_UCOMIGTSD,
27336 IX86_BUILTIN_UCOMIGESD,
27337 IX86_BUILTIN_UCOMINEQSD,
27338
27339 IX86_BUILTIN_MAXPD,
27340 IX86_BUILTIN_MAXSD,
27341 IX86_BUILTIN_MINPD,
27342 IX86_BUILTIN_MINSD,
27343
27344 IX86_BUILTIN_ANDPD,
27345 IX86_BUILTIN_ANDNPD,
27346 IX86_BUILTIN_ORPD,
27347 IX86_BUILTIN_XORPD,
27348
27349 IX86_BUILTIN_SQRTPD,
27350 IX86_BUILTIN_SQRTSD,
27351
27352 IX86_BUILTIN_UNPCKHPD,
27353 IX86_BUILTIN_UNPCKLPD,
27354
27355 IX86_BUILTIN_SHUFPD,
27356
27357 IX86_BUILTIN_LOADUPD,
27358 IX86_BUILTIN_STOREUPD,
27359 IX86_BUILTIN_MOVSD,
27360
27361 IX86_BUILTIN_LOADHPD,
27362 IX86_BUILTIN_LOADLPD,
27363
27364 IX86_BUILTIN_CVTDQ2PD,
27365 IX86_BUILTIN_CVTDQ2PS,
27366
27367 IX86_BUILTIN_CVTPD2DQ,
27368 IX86_BUILTIN_CVTPD2PI,
27369 IX86_BUILTIN_CVTPD2PS,
27370 IX86_BUILTIN_CVTTPD2DQ,
27371 IX86_BUILTIN_CVTTPD2PI,
27372
27373 IX86_BUILTIN_CVTPI2PD,
27374 IX86_BUILTIN_CVTSI2SD,
27375 IX86_BUILTIN_CVTSI642SD,
27376
27377 IX86_BUILTIN_CVTSD2SI,
27378 IX86_BUILTIN_CVTSD2SI64,
27379 IX86_BUILTIN_CVTSD2SS,
27380 IX86_BUILTIN_CVTSS2SD,
27381 IX86_BUILTIN_CVTTSD2SI,
27382 IX86_BUILTIN_CVTTSD2SI64,
27383
27384 IX86_BUILTIN_CVTPS2DQ,
27385 IX86_BUILTIN_CVTPS2PD,
27386 IX86_BUILTIN_CVTTPS2DQ,
27387
27388 IX86_BUILTIN_MOVNTI,
27389 IX86_BUILTIN_MOVNTI64,
27390 IX86_BUILTIN_MOVNTPD,
27391 IX86_BUILTIN_MOVNTDQ,
27392
27393 IX86_BUILTIN_MOVQ128,
27394
27395 /* SSE2 MMX */
27396 IX86_BUILTIN_MASKMOVDQU,
27397 IX86_BUILTIN_MOVMSKPD,
27398 IX86_BUILTIN_PMOVMSKB128,
27399
27400 IX86_BUILTIN_PACKSSWB128,
27401 IX86_BUILTIN_PACKSSDW128,
27402 IX86_BUILTIN_PACKUSWB128,
27403
27404 IX86_BUILTIN_PADDB128,
27405 IX86_BUILTIN_PADDW128,
27406 IX86_BUILTIN_PADDD128,
27407 IX86_BUILTIN_PADDQ128,
27408 IX86_BUILTIN_PADDSB128,
27409 IX86_BUILTIN_PADDSW128,
27410 IX86_BUILTIN_PADDUSB128,
27411 IX86_BUILTIN_PADDUSW128,
27412 IX86_BUILTIN_PSUBB128,
27413 IX86_BUILTIN_PSUBW128,
27414 IX86_BUILTIN_PSUBD128,
27415 IX86_BUILTIN_PSUBQ128,
27416 IX86_BUILTIN_PSUBSB128,
27417 IX86_BUILTIN_PSUBSW128,
27418 IX86_BUILTIN_PSUBUSB128,
27419 IX86_BUILTIN_PSUBUSW128,
27420
27421 IX86_BUILTIN_PAND128,
27422 IX86_BUILTIN_PANDN128,
27423 IX86_BUILTIN_POR128,
27424 IX86_BUILTIN_PXOR128,
27425
27426 IX86_BUILTIN_PAVGB128,
27427 IX86_BUILTIN_PAVGW128,
27428
27429 IX86_BUILTIN_PCMPEQB128,
27430 IX86_BUILTIN_PCMPEQW128,
27431 IX86_BUILTIN_PCMPEQD128,
27432 IX86_BUILTIN_PCMPGTB128,
27433 IX86_BUILTIN_PCMPGTW128,
27434 IX86_BUILTIN_PCMPGTD128,
27435
27436 IX86_BUILTIN_PMADDWD128,
27437
27438 IX86_BUILTIN_PMAXSW128,
27439 IX86_BUILTIN_PMAXUB128,
27440 IX86_BUILTIN_PMINSW128,
27441 IX86_BUILTIN_PMINUB128,
27442
27443 IX86_BUILTIN_PMULUDQ,
27444 IX86_BUILTIN_PMULUDQ128,
27445 IX86_BUILTIN_PMULHUW128,
27446 IX86_BUILTIN_PMULHW128,
27447 IX86_BUILTIN_PMULLW128,
27448
27449 IX86_BUILTIN_PSADBW128,
27450 IX86_BUILTIN_PSHUFHW,
27451 IX86_BUILTIN_PSHUFLW,
27452 IX86_BUILTIN_PSHUFD,
27453
27454 IX86_BUILTIN_PSLLDQI128,
27455 IX86_BUILTIN_PSLLWI128,
27456 IX86_BUILTIN_PSLLDI128,
27457 IX86_BUILTIN_PSLLQI128,
27458 IX86_BUILTIN_PSRAWI128,
27459 IX86_BUILTIN_PSRADI128,
27460 IX86_BUILTIN_PSRLDQI128,
27461 IX86_BUILTIN_PSRLWI128,
27462 IX86_BUILTIN_PSRLDI128,
27463 IX86_BUILTIN_PSRLQI128,
27464
27465 IX86_BUILTIN_PSLLDQ128,
27466 IX86_BUILTIN_PSLLW128,
27467 IX86_BUILTIN_PSLLD128,
27468 IX86_BUILTIN_PSLLQ128,
27469 IX86_BUILTIN_PSRAW128,
27470 IX86_BUILTIN_PSRAD128,
27471 IX86_BUILTIN_PSRLW128,
27472 IX86_BUILTIN_PSRLD128,
27473 IX86_BUILTIN_PSRLQ128,
27474
27475 IX86_BUILTIN_PUNPCKHBW128,
27476 IX86_BUILTIN_PUNPCKHWD128,
27477 IX86_BUILTIN_PUNPCKHDQ128,
27478 IX86_BUILTIN_PUNPCKHQDQ128,
27479 IX86_BUILTIN_PUNPCKLBW128,
27480 IX86_BUILTIN_PUNPCKLWD128,
27481 IX86_BUILTIN_PUNPCKLDQ128,
27482 IX86_BUILTIN_PUNPCKLQDQ128,
27483
27484 IX86_BUILTIN_CLFLUSH,
27485 IX86_BUILTIN_MFENCE,
27486 IX86_BUILTIN_LFENCE,
27487 IX86_BUILTIN_PAUSE,
27488
27489 IX86_BUILTIN_FNSTENV,
27490 IX86_BUILTIN_FLDENV,
27491 IX86_BUILTIN_FNSTSW,
27492 IX86_BUILTIN_FNCLEX,
27493
27494 IX86_BUILTIN_BSRSI,
27495 IX86_BUILTIN_BSRDI,
27496 IX86_BUILTIN_RDPMC,
27497 IX86_BUILTIN_RDTSC,
27498 IX86_BUILTIN_RDTSCP,
27499 IX86_BUILTIN_ROLQI,
27500 IX86_BUILTIN_ROLHI,
27501 IX86_BUILTIN_RORQI,
27502 IX86_BUILTIN_RORHI,
27503
27504 /* SSE3. */
27505 IX86_BUILTIN_ADDSUBPS,
27506 IX86_BUILTIN_HADDPS,
27507 IX86_BUILTIN_HSUBPS,
27508 IX86_BUILTIN_MOVSHDUP,
27509 IX86_BUILTIN_MOVSLDUP,
27510 IX86_BUILTIN_ADDSUBPD,
27511 IX86_BUILTIN_HADDPD,
27512 IX86_BUILTIN_HSUBPD,
27513 IX86_BUILTIN_LDDQU,
27514
27515 IX86_BUILTIN_MONITOR,
27516 IX86_BUILTIN_MWAIT,
27517
27518 /* SSSE3. */
27519 IX86_BUILTIN_PHADDW,
27520 IX86_BUILTIN_PHADDD,
27521 IX86_BUILTIN_PHADDSW,
27522 IX86_BUILTIN_PHSUBW,
27523 IX86_BUILTIN_PHSUBD,
27524 IX86_BUILTIN_PHSUBSW,
27525 IX86_BUILTIN_PMADDUBSW,
27526 IX86_BUILTIN_PMULHRSW,
27527 IX86_BUILTIN_PSHUFB,
27528 IX86_BUILTIN_PSIGNB,
27529 IX86_BUILTIN_PSIGNW,
27530 IX86_BUILTIN_PSIGND,
27531 IX86_BUILTIN_PALIGNR,
27532 IX86_BUILTIN_PABSB,
27533 IX86_BUILTIN_PABSW,
27534 IX86_BUILTIN_PABSD,
27535
27536 IX86_BUILTIN_PHADDW128,
27537 IX86_BUILTIN_PHADDD128,
27538 IX86_BUILTIN_PHADDSW128,
27539 IX86_BUILTIN_PHSUBW128,
27540 IX86_BUILTIN_PHSUBD128,
27541 IX86_BUILTIN_PHSUBSW128,
27542 IX86_BUILTIN_PMADDUBSW128,
27543 IX86_BUILTIN_PMULHRSW128,
27544 IX86_BUILTIN_PSHUFB128,
27545 IX86_BUILTIN_PSIGNB128,
27546 IX86_BUILTIN_PSIGNW128,
27547 IX86_BUILTIN_PSIGND128,
27548 IX86_BUILTIN_PALIGNR128,
27549 IX86_BUILTIN_PABSB128,
27550 IX86_BUILTIN_PABSW128,
27551 IX86_BUILTIN_PABSD128,
27552
27553 /* AMDFAM10 - SSE4A New Instructions. */
27554 IX86_BUILTIN_MOVNTSD,
27555 IX86_BUILTIN_MOVNTSS,
27556 IX86_BUILTIN_EXTRQI,
27557 IX86_BUILTIN_EXTRQ,
27558 IX86_BUILTIN_INSERTQI,
27559 IX86_BUILTIN_INSERTQ,
27560
27561 /* SSE4.1. */
27562 IX86_BUILTIN_BLENDPD,
27563 IX86_BUILTIN_BLENDPS,
27564 IX86_BUILTIN_BLENDVPD,
27565 IX86_BUILTIN_BLENDVPS,
27566 IX86_BUILTIN_PBLENDVB128,
27567 IX86_BUILTIN_PBLENDW128,
27568
27569 IX86_BUILTIN_DPPD,
27570 IX86_BUILTIN_DPPS,
27571
27572 IX86_BUILTIN_INSERTPS128,
27573
27574 IX86_BUILTIN_MOVNTDQA,
27575 IX86_BUILTIN_MPSADBW128,
27576 IX86_BUILTIN_PACKUSDW128,
27577 IX86_BUILTIN_PCMPEQQ,
27578 IX86_BUILTIN_PHMINPOSUW128,
27579
27580 IX86_BUILTIN_PMAXSB128,
27581 IX86_BUILTIN_PMAXSD128,
27582 IX86_BUILTIN_PMAXUD128,
27583 IX86_BUILTIN_PMAXUW128,
27584
27585 IX86_BUILTIN_PMINSB128,
27586 IX86_BUILTIN_PMINSD128,
27587 IX86_BUILTIN_PMINUD128,
27588 IX86_BUILTIN_PMINUW128,
27589
27590 IX86_BUILTIN_PMOVSXBW128,
27591 IX86_BUILTIN_PMOVSXBD128,
27592 IX86_BUILTIN_PMOVSXBQ128,
27593 IX86_BUILTIN_PMOVSXWD128,
27594 IX86_BUILTIN_PMOVSXWQ128,
27595 IX86_BUILTIN_PMOVSXDQ128,
27596
27597 IX86_BUILTIN_PMOVZXBW128,
27598 IX86_BUILTIN_PMOVZXBD128,
27599 IX86_BUILTIN_PMOVZXBQ128,
27600 IX86_BUILTIN_PMOVZXWD128,
27601 IX86_BUILTIN_PMOVZXWQ128,
27602 IX86_BUILTIN_PMOVZXDQ128,
27603
27604 IX86_BUILTIN_PMULDQ128,
27605 IX86_BUILTIN_PMULLD128,
27606
27607 IX86_BUILTIN_ROUNDSD,
27608 IX86_BUILTIN_ROUNDSS,
27609
27610 IX86_BUILTIN_ROUNDPD,
27611 IX86_BUILTIN_ROUNDPS,
27612
27613 IX86_BUILTIN_FLOORPD,
27614 IX86_BUILTIN_CEILPD,
27615 IX86_BUILTIN_TRUNCPD,
27616 IX86_BUILTIN_RINTPD,
27617 IX86_BUILTIN_ROUNDPD_AZ,
27618
27619 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX,
27620 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX,
27621 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX,
27622
27623 IX86_BUILTIN_FLOORPS,
27624 IX86_BUILTIN_CEILPS,
27625 IX86_BUILTIN_TRUNCPS,
27626 IX86_BUILTIN_RINTPS,
27627 IX86_BUILTIN_ROUNDPS_AZ,
27628
27629 IX86_BUILTIN_FLOORPS_SFIX,
27630 IX86_BUILTIN_CEILPS_SFIX,
27631 IX86_BUILTIN_ROUNDPS_AZ_SFIX,
27632
27633 IX86_BUILTIN_PTESTZ,
27634 IX86_BUILTIN_PTESTC,
27635 IX86_BUILTIN_PTESTNZC,
27636
27637 IX86_BUILTIN_VEC_INIT_V2SI,
27638 IX86_BUILTIN_VEC_INIT_V4HI,
27639 IX86_BUILTIN_VEC_INIT_V8QI,
27640 IX86_BUILTIN_VEC_EXT_V2DF,
27641 IX86_BUILTIN_VEC_EXT_V2DI,
27642 IX86_BUILTIN_VEC_EXT_V4SF,
27643 IX86_BUILTIN_VEC_EXT_V4SI,
27644 IX86_BUILTIN_VEC_EXT_V8HI,
27645 IX86_BUILTIN_VEC_EXT_V2SI,
27646 IX86_BUILTIN_VEC_EXT_V4HI,
27647 IX86_BUILTIN_VEC_EXT_V16QI,
27648 IX86_BUILTIN_VEC_SET_V2DI,
27649 IX86_BUILTIN_VEC_SET_V4SF,
27650 IX86_BUILTIN_VEC_SET_V4SI,
27651 IX86_BUILTIN_VEC_SET_V8HI,
27652 IX86_BUILTIN_VEC_SET_V4HI,
27653 IX86_BUILTIN_VEC_SET_V16QI,
27654
27655 IX86_BUILTIN_VEC_PACK_SFIX,
27656 IX86_BUILTIN_VEC_PACK_SFIX256,
27657
27658 /* SSE4.2. */
27659 IX86_BUILTIN_CRC32QI,
27660 IX86_BUILTIN_CRC32HI,
27661 IX86_BUILTIN_CRC32SI,
27662 IX86_BUILTIN_CRC32DI,
27663
27664 IX86_BUILTIN_PCMPESTRI128,
27665 IX86_BUILTIN_PCMPESTRM128,
27666 IX86_BUILTIN_PCMPESTRA128,
27667 IX86_BUILTIN_PCMPESTRC128,
27668 IX86_BUILTIN_PCMPESTRO128,
27669 IX86_BUILTIN_PCMPESTRS128,
27670 IX86_BUILTIN_PCMPESTRZ128,
27671 IX86_BUILTIN_PCMPISTRI128,
27672 IX86_BUILTIN_PCMPISTRM128,
27673 IX86_BUILTIN_PCMPISTRA128,
27674 IX86_BUILTIN_PCMPISTRC128,
27675 IX86_BUILTIN_PCMPISTRO128,
27676 IX86_BUILTIN_PCMPISTRS128,
27677 IX86_BUILTIN_PCMPISTRZ128,
27678
27679 IX86_BUILTIN_PCMPGTQ,
27680
27681 /* AES instructions */
27682 IX86_BUILTIN_AESENC128,
27683 IX86_BUILTIN_AESENCLAST128,
27684 IX86_BUILTIN_AESDEC128,
27685 IX86_BUILTIN_AESDECLAST128,
27686 IX86_BUILTIN_AESIMC128,
27687 IX86_BUILTIN_AESKEYGENASSIST128,
27688
27689 /* PCLMUL instruction */
27690 IX86_BUILTIN_PCLMULQDQ128,
27691
27692 /* AVX */
27693 IX86_BUILTIN_ADDPD256,
27694 IX86_BUILTIN_ADDPS256,
27695 IX86_BUILTIN_ADDSUBPD256,
27696 IX86_BUILTIN_ADDSUBPS256,
27697 IX86_BUILTIN_ANDPD256,
27698 IX86_BUILTIN_ANDPS256,
27699 IX86_BUILTIN_ANDNPD256,
27700 IX86_BUILTIN_ANDNPS256,
27701 IX86_BUILTIN_BLENDPD256,
27702 IX86_BUILTIN_BLENDPS256,
27703 IX86_BUILTIN_BLENDVPD256,
27704 IX86_BUILTIN_BLENDVPS256,
27705 IX86_BUILTIN_DIVPD256,
27706 IX86_BUILTIN_DIVPS256,
27707 IX86_BUILTIN_DPPS256,
27708 IX86_BUILTIN_HADDPD256,
27709 IX86_BUILTIN_HADDPS256,
27710 IX86_BUILTIN_HSUBPD256,
27711 IX86_BUILTIN_HSUBPS256,
27712 IX86_BUILTIN_MAXPD256,
27713 IX86_BUILTIN_MAXPS256,
27714 IX86_BUILTIN_MINPD256,
27715 IX86_BUILTIN_MINPS256,
27716 IX86_BUILTIN_MULPD256,
27717 IX86_BUILTIN_MULPS256,
27718 IX86_BUILTIN_ORPD256,
27719 IX86_BUILTIN_ORPS256,
27720 IX86_BUILTIN_SHUFPD256,
27721 IX86_BUILTIN_SHUFPS256,
27722 IX86_BUILTIN_SUBPD256,
27723 IX86_BUILTIN_SUBPS256,
27724 IX86_BUILTIN_XORPD256,
27725 IX86_BUILTIN_XORPS256,
27726 IX86_BUILTIN_CMPSD,
27727 IX86_BUILTIN_CMPSS,
27728 IX86_BUILTIN_CMPPD,
27729 IX86_BUILTIN_CMPPS,
27730 IX86_BUILTIN_CMPPD256,
27731 IX86_BUILTIN_CMPPS256,
27732 IX86_BUILTIN_CVTDQ2PD256,
27733 IX86_BUILTIN_CVTDQ2PS256,
27734 IX86_BUILTIN_CVTPD2PS256,
27735 IX86_BUILTIN_CVTPS2DQ256,
27736 IX86_BUILTIN_CVTPS2PD256,
27737 IX86_BUILTIN_CVTTPD2DQ256,
27738 IX86_BUILTIN_CVTPD2DQ256,
27739 IX86_BUILTIN_CVTTPS2DQ256,
27740 IX86_BUILTIN_EXTRACTF128PD256,
27741 IX86_BUILTIN_EXTRACTF128PS256,
27742 IX86_BUILTIN_EXTRACTF128SI256,
27743 IX86_BUILTIN_VZEROALL,
27744 IX86_BUILTIN_VZEROUPPER,
27745 IX86_BUILTIN_VPERMILVARPD,
27746 IX86_BUILTIN_VPERMILVARPS,
27747 IX86_BUILTIN_VPERMILVARPD256,
27748 IX86_BUILTIN_VPERMILVARPS256,
27749 IX86_BUILTIN_VPERMILPD,
27750 IX86_BUILTIN_VPERMILPS,
27751 IX86_BUILTIN_VPERMILPD256,
27752 IX86_BUILTIN_VPERMILPS256,
27753 IX86_BUILTIN_VPERMIL2PD,
27754 IX86_BUILTIN_VPERMIL2PS,
27755 IX86_BUILTIN_VPERMIL2PD256,
27756 IX86_BUILTIN_VPERMIL2PS256,
27757 IX86_BUILTIN_VPERM2F128PD256,
27758 IX86_BUILTIN_VPERM2F128PS256,
27759 IX86_BUILTIN_VPERM2F128SI256,
27760 IX86_BUILTIN_VBROADCASTSS,
27761 IX86_BUILTIN_VBROADCASTSD256,
27762 IX86_BUILTIN_VBROADCASTSS256,
27763 IX86_BUILTIN_VBROADCASTPD256,
27764 IX86_BUILTIN_VBROADCASTPS256,
27765 IX86_BUILTIN_VINSERTF128PD256,
27766 IX86_BUILTIN_VINSERTF128PS256,
27767 IX86_BUILTIN_VINSERTF128SI256,
27768 IX86_BUILTIN_LOADUPD256,
27769 IX86_BUILTIN_LOADUPS256,
27770 IX86_BUILTIN_STOREUPD256,
27771 IX86_BUILTIN_STOREUPS256,
27772 IX86_BUILTIN_LDDQU256,
27773 IX86_BUILTIN_MOVNTDQ256,
27774 IX86_BUILTIN_MOVNTPD256,
27775 IX86_BUILTIN_MOVNTPS256,
27776 IX86_BUILTIN_LOADDQU256,
27777 IX86_BUILTIN_STOREDQU256,
27778 IX86_BUILTIN_MASKLOADPD,
27779 IX86_BUILTIN_MASKLOADPS,
27780 IX86_BUILTIN_MASKSTOREPD,
27781 IX86_BUILTIN_MASKSTOREPS,
27782 IX86_BUILTIN_MASKLOADPD256,
27783 IX86_BUILTIN_MASKLOADPS256,
27784 IX86_BUILTIN_MASKSTOREPD256,
27785 IX86_BUILTIN_MASKSTOREPS256,
27786 IX86_BUILTIN_MOVSHDUP256,
27787 IX86_BUILTIN_MOVSLDUP256,
27788 IX86_BUILTIN_MOVDDUP256,
27789
27790 IX86_BUILTIN_SQRTPD256,
27791 IX86_BUILTIN_SQRTPS256,
27792 IX86_BUILTIN_SQRTPS_NR256,
27793 IX86_BUILTIN_RSQRTPS256,
27794 IX86_BUILTIN_RSQRTPS_NR256,
27795
27796 IX86_BUILTIN_RCPPS256,
27797
27798 IX86_BUILTIN_ROUNDPD256,
27799 IX86_BUILTIN_ROUNDPS256,
27800
27801 IX86_BUILTIN_FLOORPD256,
27802 IX86_BUILTIN_CEILPD256,
27803 IX86_BUILTIN_TRUNCPD256,
27804 IX86_BUILTIN_RINTPD256,
27805 IX86_BUILTIN_ROUNDPD_AZ256,
27806
27807 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256,
27808 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256,
27809 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256,
27810
27811 IX86_BUILTIN_FLOORPS256,
27812 IX86_BUILTIN_CEILPS256,
27813 IX86_BUILTIN_TRUNCPS256,
27814 IX86_BUILTIN_RINTPS256,
27815 IX86_BUILTIN_ROUNDPS_AZ256,
27816
27817 IX86_BUILTIN_FLOORPS_SFIX256,
27818 IX86_BUILTIN_CEILPS_SFIX256,
27819 IX86_BUILTIN_ROUNDPS_AZ_SFIX256,
27820
27821 IX86_BUILTIN_UNPCKHPD256,
27822 IX86_BUILTIN_UNPCKLPD256,
27823 IX86_BUILTIN_UNPCKHPS256,
27824 IX86_BUILTIN_UNPCKLPS256,
27825
27826 IX86_BUILTIN_SI256_SI,
27827 IX86_BUILTIN_PS256_PS,
27828 IX86_BUILTIN_PD256_PD,
27829 IX86_BUILTIN_SI_SI256,
27830 IX86_BUILTIN_PS_PS256,
27831 IX86_BUILTIN_PD_PD256,
27832
27833 IX86_BUILTIN_VTESTZPD,
27834 IX86_BUILTIN_VTESTCPD,
27835 IX86_BUILTIN_VTESTNZCPD,
27836 IX86_BUILTIN_VTESTZPS,
27837 IX86_BUILTIN_VTESTCPS,
27838 IX86_BUILTIN_VTESTNZCPS,
27839 IX86_BUILTIN_VTESTZPD256,
27840 IX86_BUILTIN_VTESTCPD256,
27841 IX86_BUILTIN_VTESTNZCPD256,
27842 IX86_BUILTIN_VTESTZPS256,
27843 IX86_BUILTIN_VTESTCPS256,
27844 IX86_BUILTIN_VTESTNZCPS256,
27845 IX86_BUILTIN_PTESTZ256,
27846 IX86_BUILTIN_PTESTC256,
27847 IX86_BUILTIN_PTESTNZC256,
27848
27849 IX86_BUILTIN_MOVMSKPD256,
27850 IX86_BUILTIN_MOVMSKPS256,
27851
27852 /* AVX2 */
27853 IX86_BUILTIN_MPSADBW256,
27854 IX86_BUILTIN_PABSB256,
27855 IX86_BUILTIN_PABSW256,
27856 IX86_BUILTIN_PABSD256,
27857 IX86_BUILTIN_PACKSSDW256,
27858 IX86_BUILTIN_PACKSSWB256,
27859 IX86_BUILTIN_PACKUSDW256,
27860 IX86_BUILTIN_PACKUSWB256,
27861 IX86_BUILTIN_PADDB256,
27862 IX86_BUILTIN_PADDW256,
27863 IX86_BUILTIN_PADDD256,
27864 IX86_BUILTIN_PADDQ256,
27865 IX86_BUILTIN_PADDSB256,
27866 IX86_BUILTIN_PADDSW256,
27867 IX86_BUILTIN_PADDUSB256,
27868 IX86_BUILTIN_PADDUSW256,
27869 IX86_BUILTIN_PALIGNR256,
27870 IX86_BUILTIN_AND256I,
27871 IX86_BUILTIN_ANDNOT256I,
27872 IX86_BUILTIN_PAVGB256,
27873 IX86_BUILTIN_PAVGW256,
27874 IX86_BUILTIN_PBLENDVB256,
27875 IX86_BUILTIN_PBLENDVW256,
27876 IX86_BUILTIN_PCMPEQB256,
27877 IX86_BUILTIN_PCMPEQW256,
27878 IX86_BUILTIN_PCMPEQD256,
27879 IX86_BUILTIN_PCMPEQQ256,
27880 IX86_BUILTIN_PCMPGTB256,
27881 IX86_BUILTIN_PCMPGTW256,
27882 IX86_BUILTIN_PCMPGTD256,
27883 IX86_BUILTIN_PCMPGTQ256,
27884 IX86_BUILTIN_PHADDW256,
27885 IX86_BUILTIN_PHADDD256,
27886 IX86_BUILTIN_PHADDSW256,
27887 IX86_BUILTIN_PHSUBW256,
27888 IX86_BUILTIN_PHSUBD256,
27889 IX86_BUILTIN_PHSUBSW256,
27890 IX86_BUILTIN_PMADDUBSW256,
27891 IX86_BUILTIN_PMADDWD256,
27892 IX86_BUILTIN_PMAXSB256,
27893 IX86_BUILTIN_PMAXSW256,
27894 IX86_BUILTIN_PMAXSD256,
27895 IX86_BUILTIN_PMAXUB256,
27896 IX86_BUILTIN_PMAXUW256,
27897 IX86_BUILTIN_PMAXUD256,
27898 IX86_BUILTIN_PMINSB256,
27899 IX86_BUILTIN_PMINSW256,
27900 IX86_BUILTIN_PMINSD256,
27901 IX86_BUILTIN_PMINUB256,
27902 IX86_BUILTIN_PMINUW256,
27903 IX86_BUILTIN_PMINUD256,
27904 IX86_BUILTIN_PMOVMSKB256,
27905 IX86_BUILTIN_PMOVSXBW256,
27906 IX86_BUILTIN_PMOVSXBD256,
27907 IX86_BUILTIN_PMOVSXBQ256,
27908 IX86_BUILTIN_PMOVSXWD256,
27909 IX86_BUILTIN_PMOVSXWQ256,
27910 IX86_BUILTIN_PMOVSXDQ256,
27911 IX86_BUILTIN_PMOVZXBW256,
27912 IX86_BUILTIN_PMOVZXBD256,
27913 IX86_BUILTIN_PMOVZXBQ256,
27914 IX86_BUILTIN_PMOVZXWD256,
27915 IX86_BUILTIN_PMOVZXWQ256,
27916 IX86_BUILTIN_PMOVZXDQ256,
27917 IX86_BUILTIN_PMULDQ256,
27918 IX86_BUILTIN_PMULHRSW256,
27919 IX86_BUILTIN_PMULHUW256,
27920 IX86_BUILTIN_PMULHW256,
27921 IX86_BUILTIN_PMULLW256,
27922 IX86_BUILTIN_PMULLD256,
27923 IX86_BUILTIN_PMULUDQ256,
27924 IX86_BUILTIN_POR256,
27925 IX86_BUILTIN_PSADBW256,
27926 IX86_BUILTIN_PSHUFB256,
27927 IX86_BUILTIN_PSHUFD256,
27928 IX86_BUILTIN_PSHUFHW256,
27929 IX86_BUILTIN_PSHUFLW256,
27930 IX86_BUILTIN_PSIGNB256,
27931 IX86_BUILTIN_PSIGNW256,
27932 IX86_BUILTIN_PSIGND256,
27933 IX86_BUILTIN_PSLLDQI256,
27934 IX86_BUILTIN_PSLLWI256,
27935 IX86_BUILTIN_PSLLW256,
27936 IX86_BUILTIN_PSLLDI256,
27937 IX86_BUILTIN_PSLLD256,
27938 IX86_BUILTIN_PSLLQI256,
27939 IX86_BUILTIN_PSLLQ256,
27940 IX86_BUILTIN_PSRAWI256,
27941 IX86_BUILTIN_PSRAW256,
27942 IX86_BUILTIN_PSRADI256,
27943 IX86_BUILTIN_PSRAD256,
27944 IX86_BUILTIN_PSRLDQI256,
27945 IX86_BUILTIN_PSRLWI256,
27946 IX86_BUILTIN_PSRLW256,
27947 IX86_BUILTIN_PSRLDI256,
27948 IX86_BUILTIN_PSRLD256,
27949 IX86_BUILTIN_PSRLQI256,
27950 IX86_BUILTIN_PSRLQ256,
27951 IX86_BUILTIN_PSUBB256,
27952 IX86_BUILTIN_PSUBW256,
27953 IX86_BUILTIN_PSUBD256,
27954 IX86_BUILTIN_PSUBQ256,
27955 IX86_BUILTIN_PSUBSB256,
27956 IX86_BUILTIN_PSUBSW256,
27957 IX86_BUILTIN_PSUBUSB256,
27958 IX86_BUILTIN_PSUBUSW256,
27959 IX86_BUILTIN_PUNPCKHBW256,
27960 IX86_BUILTIN_PUNPCKHWD256,
27961 IX86_BUILTIN_PUNPCKHDQ256,
27962 IX86_BUILTIN_PUNPCKHQDQ256,
27963 IX86_BUILTIN_PUNPCKLBW256,
27964 IX86_BUILTIN_PUNPCKLWD256,
27965 IX86_BUILTIN_PUNPCKLDQ256,
27966 IX86_BUILTIN_PUNPCKLQDQ256,
27967 IX86_BUILTIN_PXOR256,
27968 IX86_BUILTIN_MOVNTDQA256,
27969 IX86_BUILTIN_VBROADCASTSS_PS,
27970 IX86_BUILTIN_VBROADCASTSS_PS256,
27971 IX86_BUILTIN_VBROADCASTSD_PD256,
27972 IX86_BUILTIN_VBROADCASTSI256,
27973 IX86_BUILTIN_PBLENDD256,
27974 IX86_BUILTIN_PBLENDD128,
27975 IX86_BUILTIN_PBROADCASTB256,
27976 IX86_BUILTIN_PBROADCASTW256,
27977 IX86_BUILTIN_PBROADCASTD256,
27978 IX86_BUILTIN_PBROADCASTQ256,
27979 IX86_BUILTIN_PBROADCASTB128,
27980 IX86_BUILTIN_PBROADCASTW128,
27981 IX86_BUILTIN_PBROADCASTD128,
27982 IX86_BUILTIN_PBROADCASTQ128,
27983 IX86_BUILTIN_VPERMVARSI256,
27984 IX86_BUILTIN_VPERMDF256,
27985 IX86_BUILTIN_VPERMVARSF256,
27986 IX86_BUILTIN_VPERMDI256,
27987 IX86_BUILTIN_VPERMTI256,
27988 IX86_BUILTIN_VEXTRACT128I256,
27989 IX86_BUILTIN_VINSERT128I256,
27990 IX86_BUILTIN_MASKLOADD,
27991 IX86_BUILTIN_MASKLOADQ,
27992 IX86_BUILTIN_MASKLOADD256,
27993 IX86_BUILTIN_MASKLOADQ256,
27994 IX86_BUILTIN_MASKSTORED,
27995 IX86_BUILTIN_MASKSTOREQ,
27996 IX86_BUILTIN_MASKSTORED256,
27997 IX86_BUILTIN_MASKSTOREQ256,
27998 IX86_BUILTIN_PSLLVV4DI,
27999 IX86_BUILTIN_PSLLVV2DI,
28000 IX86_BUILTIN_PSLLVV8SI,
28001 IX86_BUILTIN_PSLLVV4SI,
28002 IX86_BUILTIN_PSRAVV8SI,
28003 IX86_BUILTIN_PSRAVV4SI,
28004 IX86_BUILTIN_PSRLVV4DI,
28005 IX86_BUILTIN_PSRLVV2DI,
28006 IX86_BUILTIN_PSRLVV8SI,
28007 IX86_BUILTIN_PSRLVV4SI,
28008
28009 IX86_BUILTIN_GATHERSIV2DF,
28010 IX86_BUILTIN_GATHERSIV4DF,
28011 IX86_BUILTIN_GATHERDIV2DF,
28012 IX86_BUILTIN_GATHERDIV4DF,
28013 IX86_BUILTIN_GATHERSIV4SF,
28014 IX86_BUILTIN_GATHERSIV8SF,
28015 IX86_BUILTIN_GATHERDIV4SF,
28016 IX86_BUILTIN_GATHERDIV8SF,
28017 IX86_BUILTIN_GATHERSIV2DI,
28018 IX86_BUILTIN_GATHERSIV4DI,
28019 IX86_BUILTIN_GATHERDIV2DI,
28020 IX86_BUILTIN_GATHERDIV4DI,
28021 IX86_BUILTIN_GATHERSIV4SI,
28022 IX86_BUILTIN_GATHERSIV8SI,
28023 IX86_BUILTIN_GATHERDIV4SI,
28024 IX86_BUILTIN_GATHERDIV8SI,
28025
28026 /* AVX512F */
28027 IX86_BUILTIN_ADDPD512,
28028 IX86_BUILTIN_ADDPS512,
28029 IX86_BUILTIN_ADDSD_ROUND,
28030 IX86_BUILTIN_ADDSS_ROUND,
28031 IX86_BUILTIN_ALIGND512,
28032 IX86_BUILTIN_ALIGNQ512,
28033 IX86_BUILTIN_BLENDMD512,
28034 IX86_BUILTIN_BLENDMPD512,
28035 IX86_BUILTIN_BLENDMPS512,
28036 IX86_BUILTIN_BLENDMQ512,
28037 IX86_BUILTIN_BROADCASTF32X4_512,
28038 IX86_BUILTIN_BROADCASTF64X4_512,
28039 IX86_BUILTIN_BROADCASTI32X4_512,
28040 IX86_BUILTIN_BROADCASTI64X4_512,
28041 IX86_BUILTIN_BROADCASTSD512,
28042 IX86_BUILTIN_BROADCASTSS512,
28043 IX86_BUILTIN_CMPD512,
28044 IX86_BUILTIN_CMPPD512,
28045 IX86_BUILTIN_CMPPS512,
28046 IX86_BUILTIN_CMPQ512,
28047 IX86_BUILTIN_CMPSD_MASK,
28048 IX86_BUILTIN_CMPSS_MASK,
28049 IX86_BUILTIN_COMIDF,
28050 IX86_BUILTIN_COMISF,
28051 IX86_BUILTIN_COMPRESSPD512,
28052 IX86_BUILTIN_COMPRESSPDSTORE512,
28053 IX86_BUILTIN_COMPRESSPS512,
28054 IX86_BUILTIN_COMPRESSPSSTORE512,
28055 IX86_BUILTIN_CVTDQ2PD512,
28056 IX86_BUILTIN_CVTDQ2PS512,
28057 IX86_BUILTIN_CVTPD2DQ512,
28058 IX86_BUILTIN_CVTPD2PS512,
28059 IX86_BUILTIN_CVTPD2UDQ512,
28060 IX86_BUILTIN_CVTPH2PS512,
28061 IX86_BUILTIN_CVTPS2DQ512,
28062 IX86_BUILTIN_CVTPS2PD512,
28063 IX86_BUILTIN_CVTPS2PH512,
28064 IX86_BUILTIN_CVTPS2UDQ512,
28065 IX86_BUILTIN_CVTSD2SS_ROUND,
28066 IX86_BUILTIN_CVTSI2SD64,
28067 IX86_BUILTIN_CVTSI2SS32,
28068 IX86_BUILTIN_CVTSI2SS64,
28069 IX86_BUILTIN_CVTSS2SD_ROUND,
28070 IX86_BUILTIN_CVTTPD2DQ512,
28071 IX86_BUILTIN_CVTTPD2UDQ512,
28072 IX86_BUILTIN_CVTTPS2DQ512,
28073 IX86_BUILTIN_CVTTPS2UDQ512,
28074 IX86_BUILTIN_CVTUDQ2PD512,
28075 IX86_BUILTIN_CVTUDQ2PS512,
28076 IX86_BUILTIN_CVTUSI2SD32,
28077 IX86_BUILTIN_CVTUSI2SD64,
28078 IX86_BUILTIN_CVTUSI2SS32,
28079 IX86_BUILTIN_CVTUSI2SS64,
28080 IX86_BUILTIN_DIVPD512,
28081 IX86_BUILTIN_DIVPS512,
28082 IX86_BUILTIN_DIVSD_ROUND,
28083 IX86_BUILTIN_DIVSS_ROUND,
28084 IX86_BUILTIN_EXPANDPD512,
28085 IX86_BUILTIN_EXPANDPD512Z,
28086 IX86_BUILTIN_EXPANDPDLOAD512,
28087 IX86_BUILTIN_EXPANDPDLOAD512Z,
28088 IX86_BUILTIN_EXPANDPS512,
28089 IX86_BUILTIN_EXPANDPS512Z,
28090 IX86_BUILTIN_EXPANDPSLOAD512,
28091 IX86_BUILTIN_EXPANDPSLOAD512Z,
28092 IX86_BUILTIN_EXTRACTF32X4,
28093 IX86_BUILTIN_EXTRACTF64X4,
28094 IX86_BUILTIN_EXTRACTI32X4,
28095 IX86_BUILTIN_EXTRACTI64X4,
28096 IX86_BUILTIN_FIXUPIMMPD512_MASK,
28097 IX86_BUILTIN_FIXUPIMMPD512_MASKZ,
28098 IX86_BUILTIN_FIXUPIMMPS512_MASK,
28099 IX86_BUILTIN_FIXUPIMMPS512_MASKZ,
28100 IX86_BUILTIN_FIXUPIMMSD128_MASK,
28101 IX86_BUILTIN_FIXUPIMMSD128_MASKZ,
28102 IX86_BUILTIN_FIXUPIMMSS128_MASK,
28103 IX86_BUILTIN_FIXUPIMMSS128_MASKZ,
28104 IX86_BUILTIN_GETEXPPD512,
28105 IX86_BUILTIN_GETEXPPS512,
28106 IX86_BUILTIN_GETEXPSD128,
28107 IX86_BUILTIN_GETEXPSS128,
28108 IX86_BUILTIN_GETMANTPD512,
28109 IX86_BUILTIN_GETMANTPS512,
28110 IX86_BUILTIN_GETMANTSD128,
28111 IX86_BUILTIN_GETMANTSS128,
28112 IX86_BUILTIN_INSERTF32X4,
28113 IX86_BUILTIN_INSERTF64X4,
28114 IX86_BUILTIN_INSERTI32X4,
28115 IX86_BUILTIN_INSERTI64X4,
28116 IX86_BUILTIN_LOADAPD512,
28117 IX86_BUILTIN_LOADAPS512,
28118 IX86_BUILTIN_LOADDQUDI512,
28119 IX86_BUILTIN_LOADDQUSI512,
28120 IX86_BUILTIN_LOADUPD512,
28121 IX86_BUILTIN_LOADUPS512,
28122 IX86_BUILTIN_MAXPD512,
28123 IX86_BUILTIN_MAXPS512,
28124 IX86_BUILTIN_MAXSD_ROUND,
28125 IX86_BUILTIN_MAXSS_ROUND,
28126 IX86_BUILTIN_MINPD512,
28127 IX86_BUILTIN_MINPS512,
28128 IX86_BUILTIN_MINSD_ROUND,
28129 IX86_BUILTIN_MINSS_ROUND,
28130 IX86_BUILTIN_MOVAPD512,
28131 IX86_BUILTIN_MOVAPS512,
28132 IX86_BUILTIN_MOVDDUP512,
28133 IX86_BUILTIN_MOVDQA32LOAD512,
28134 IX86_BUILTIN_MOVDQA32STORE512,
28135 IX86_BUILTIN_MOVDQA32_512,
28136 IX86_BUILTIN_MOVDQA64LOAD512,
28137 IX86_BUILTIN_MOVDQA64STORE512,
28138 IX86_BUILTIN_MOVDQA64_512,
28139 IX86_BUILTIN_MOVNTDQ512,
28140 IX86_BUILTIN_MOVNTDQA512,
28141 IX86_BUILTIN_MOVNTPD512,
28142 IX86_BUILTIN_MOVNTPS512,
28143 IX86_BUILTIN_MOVSHDUP512,
28144 IX86_BUILTIN_MOVSLDUP512,
28145 IX86_BUILTIN_MULPD512,
28146 IX86_BUILTIN_MULPS512,
28147 IX86_BUILTIN_MULSD_ROUND,
28148 IX86_BUILTIN_MULSS_ROUND,
28149 IX86_BUILTIN_PABSD512,
28150 IX86_BUILTIN_PABSQ512,
28151 IX86_BUILTIN_PADDD512,
28152 IX86_BUILTIN_PADDQ512,
28153 IX86_BUILTIN_PANDD512,
28154 IX86_BUILTIN_PANDND512,
28155 IX86_BUILTIN_PANDNQ512,
28156 IX86_BUILTIN_PANDQ512,
28157 IX86_BUILTIN_PBROADCASTD512,
28158 IX86_BUILTIN_PBROADCASTD512_GPR,
28159 IX86_BUILTIN_PBROADCASTMB512,
28160 IX86_BUILTIN_PBROADCASTMW512,
28161 IX86_BUILTIN_PBROADCASTQ512,
28162 IX86_BUILTIN_PBROADCASTQ512_GPR,
28163 IX86_BUILTIN_PBROADCASTQ512_MEM,
28164 IX86_BUILTIN_PCMPEQD512_MASK,
28165 IX86_BUILTIN_PCMPEQQ512_MASK,
28166 IX86_BUILTIN_PCMPGTD512_MASK,
28167 IX86_BUILTIN_PCMPGTQ512_MASK,
28168 IX86_BUILTIN_PCOMPRESSD512,
28169 IX86_BUILTIN_PCOMPRESSDSTORE512,
28170 IX86_BUILTIN_PCOMPRESSQ512,
28171 IX86_BUILTIN_PCOMPRESSQSTORE512,
28172 IX86_BUILTIN_PEXPANDD512,
28173 IX86_BUILTIN_PEXPANDD512Z,
28174 IX86_BUILTIN_PEXPANDDLOAD512,
28175 IX86_BUILTIN_PEXPANDDLOAD512Z,
28176 IX86_BUILTIN_PEXPANDQ512,
28177 IX86_BUILTIN_PEXPANDQ512Z,
28178 IX86_BUILTIN_PEXPANDQLOAD512,
28179 IX86_BUILTIN_PEXPANDQLOAD512Z,
28180 IX86_BUILTIN_PMAXSD512,
28181 IX86_BUILTIN_PMAXSQ512,
28182 IX86_BUILTIN_PMAXUD512,
28183 IX86_BUILTIN_PMAXUQ512,
28184 IX86_BUILTIN_PMINSD512,
28185 IX86_BUILTIN_PMINSQ512,
28186 IX86_BUILTIN_PMINUD512,
28187 IX86_BUILTIN_PMINUQ512,
28188 IX86_BUILTIN_PMOVDB512,
28189 IX86_BUILTIN_PMOVDB512_MEM,
28190 IX86_BUILTIN_PMOVDW512,
28191 IX86_BUILTIN_PMOVDW512_MEM,
28192 IX86_BUILTIN_PMOVQB512,
28193 IX86_BUILTIN_PMOVQB512_MEM,
28194 IX86_BUILTIN_PMOVQD512,
28195 IX86_BUILTIN_PMOVQD512_MEM,
28196 IX86_BUILTIN_PMOVQW512,
28197 IX86_BUILTIN_PMOVQW512_MEM,
28198 IX86_BUILTIN_PMOVSDB512,
28199 IX86_BUILTIN_PMOVSDB512_MEM,
28200 IX86_BUILTIN_PMOVSDW512,
28201 IX86_BUILTIN_PMOVSDW512_MEM,
28202 IX86_BUILTIN_PMOVSQB512,
28203 IX86_BUILTIN_PMOVSQB512_MEM,
28204 IX86_BUILTIN_PMOVSQD512,
28205 IX86_BUILTIN_PMOVSQD512_MEM,
28206 IX86_BUILTIN_PMOVSQW512,
28207 IX86_BUILTIN_PMOVSQW512_MEM,
28208 IX86_BUILTIN_PMOVSXBD512,
28209 IX86_BUILTIN_PMOVSXBQ512,
28210 IX86_BUILTIN_PMOVSXDQ512,
28211 IX86_BUILTIN_PMOVSXWD512,
28212 IX86_BUILTIN_PMOVSXWQ512,
28213 IX86_BUILTIN_PMOVUSDB512,
28214 IX86_BUILTIN_PMOVUSDB512_MEM,
28215 IX86_BUILTIN_PMOVUSDW512,
28216 IX86_BUILTIN_PMOVUSDW512_MEM,
28217 IX86_BUILTIN_PMOVUSQB512,
28218 IX86_BUILTIN_PMOVUSQB512_MEM,
28219 IX86_BUILTIN_PMOVUSQD512,
28220 IX86_BUILTIN_PMOVUSQD512_MEM,
28221 IX86_BUILTIN_PMOVUSQW512,
28222 IX86_BUILTIN_PMOVUSQW512_MEM,
28223 IX86_BUILTIN_PMOVZXBD512,
28224 IX86_BUILTIN_PMOVZXBQ512,
28225 IX86_BUILTIN_PMOVZXDQ512,
28226 IX86_BUILTIN_PMOVZXWD512,
28227 IX86_BUILTIN_PMOVZXWQ512,
28228 IX86_BUILTIN_PMULDQ512,
28229 IX86_BUILTIN_PMULLD512,
28230 IX86_BUILTIN_PMULUDQ512,
28231 IX86_BUILTIN_PORD512,
28232 IX86_BUILTIN_PORQ512,
28233 IX86_BUILTIN_PROLD512,
28234 IX86_BUILTIN_PROLQ512,
28235 IX86_BUILTIN_PROLVD512,
28236 IX86_BUILTIN_PROLVQ512,
28237 IX86_BUILTIN_PRORD512,
28238 IX86_BUILTIN_PRORQ512,
28239 IX86_BUILTIN_PRORVD512,
28240 IX86_BUILTIN_PRORVQ512,
28241 IX86_BUILTIN_PSHUFD512,
28242 IX86_BUILTIN_PSLLD512,
28243 IX86_BUILTIN_PSLLDI512,
28244 IX86_BUILTIN_PSLLQ512,
28245 IX86_BUILTIN_PSLLQI512,
28246 IX86_BUILTIN_PSLLVV16SI,
28247 IX86_BUILTIN_PSLLVV8DI,
28248 IX86_BUILTIN_PSRAD512,
28249 IX86_BUILTIN_PSRADI512,
28250 IX86_BUILTIN_PSRAQ512,
28251 IX86_BUILTIN_PSRAQI512,
28252 IX86_BUILTIN_PSRAVV16SI,
28253 IX86_BUILTIN_PSRAVV8DI,
28254 IX86_BUILTIN_PSRLD512,
28255 IX86_BUILTIN_PSRLDI512,
28256 IX86_BUILTIN_PSRLQ512,
28257 IX86_BUILTIN_PSRLQI512,
28258 IX86_BUILTIN_PSRLVV16SI,
28259 IX86_BUILTIN_PSRLVV8DI,
28260 IX86_BUILTIN_PSUBD512,
28261 IX86_BUILTIN_PSUBQ512,
28262 IX86_BUILTIN_PTESTMD512,
28263 IX86_BUILTIN_PTESTMQ512,
28264 IX86_BUILTIN_PTESTNMD512,
28265 IX86_BUILTIN_PTESTNMQ512,
28266 IX86_BUILTIN_PUNPCKHDQ512,
28267 IX86_BUILTIN_PUNPCKHQDQ512,
28268 IX86_BUILTIN_PUNPCKLDQ512,
28269 IX86_BUILTIN_PUNPCKLQDQ512,
28270 IX86_BUILTIN_PXORD512,
28271 IX86_BUILTIN_PXORQ512,
28272 IX86_BUILTIN_RCP14PD512,
28273 IX86_BUILTIN_RCP14PS512,
28274 IX86_BUILTIN_RCP14SD,
28275 IX86_BUILTIN_RCP14SS,
28276 IX86_BUILTIN_RNDSCALEPD,
28277 IX86_BUILTIN_RNDSCALEPS,
28278 IX86_BUILTIN_RNDSCALESD,
28279 IX86_BUILTIN_RNDSCALESS,
28280 IX86_BUILTIN_RSQRT14PD512,
28281 IX86_BUILTIN_RSQRT14PS512,
28282 IX86_BUILTIN_RSQRT14SD,
28283 IX86_BUILTIN_RSQRT14SS,
28284 IX86_BUILTIN_SCALEFPD512,
28285 IX86_BUILTIN_SCALEFPS512,
28286 IX86_BUILTIN_SCALEFSD,
28287 IX86_BUILTIN_SCALEFSS,
28288 IX86_BUILTIN_SHUFPD512,
28289 IX86_BUILTIN_SHUFPS512,
28290 IX86_BUILTIN_SHUF_F32x4,
28291 IX86_BUILTIN_SHUF_F64x2,
28292 IX86_BUILTIN_SHUF_I32x4,
28293 IX86_BUILTIN_SHUF_I64x2,
28294 IX86_BUILTIN_SQRTPD512,
28295 IX86_BUILTIN_SQRTPD512_MASK,
28296 IX86_BUILTIN_SQRTPS512_MASK,
28297 IX86_BUILTIN_SQRTPS_NR512,
28298 IX86_BUILTIN_SQRTSD_ROUND,
28299 IX86_BUILTIN_SQRTSS_ROUND,
28300 IX86_BUILTIN_STOREAPD512,
28301 IX86_BUILTIN_STOREAPS512,
28302 IX86_BUILTIN_STOREDQUDI512,
28303 IX86_BUILTIN_STOREDQUSI512,
28304 IX86_BUILTIN_STOREUPD512,
28305 IX86_BUILTIN_STOREUPS512,
28306 IX86_BUILTIN_SUBPD512,
28307 IX86_BUILTIN_SUBPS512,
28308 IX86_BUILTIN_SUBSD_ROUND,
28309 IX86_BUILTIN_SUBSS_ROUND,
28310 IX86_BUILTIN_UCMPD512,
28311 IX86_BUILTIN_UCMPQ512,
28312 IX86_BUILTIN_UNPCKHPD512,
28313 IX86_BUILTIN_UNPCKHPS512,
28314 IX86_BUILTIN_UNPCKLPD512,
28315 IX86_BUILTIN_UNPCKLPS512,
28316 IX86_BUILTIN_VCVTSD2SI32,
28317 IX86_BUILTIN_VCVTSD2SI64,
28318 IX86_BUILTIN_VCVTSD2USI32,
28319 IX86_BUILTIN_VCVTSD2USI64,
28320 IX86_BUILTIN_VCVTSS2SI32,
28321 IX86_BUILTIN_VCVTSS2SI64,
28322 IX86_BUILTIN_VCVTSS2USI32,
28323 IX86_BUILTIN_VCVTSS2USI64,
28324 IX86_BUILTIN_VCVTTSD2SI32,
28325 IX86_BUILTIN_VCVTTSD2SI64,
28326 IX86_BUILTIN_VCVTTSD2USI32,
28327 IX86_BUILTIN_VCVTTSD2USI64,
28328 IX86_BUILTIN_VCVTTSS2SI32,
28329 IX86_BUILTIN_VCVTTSS2SI64,
28330 IX86_BUILTIN_VCVTTSS2USI32,
28331 IX86_BUILTIN_VCVTTSS2USI64,
28332 IX86_BUILTIN_VFMADDPD512_MASK,
28333 IX86_BUILTIN_VFMADDPD512_MASK3,
28334 IX86_BUILTIN_VFMADDPD512_MASKZ,
28335 IX86_BUILTIN_VFMADDPS512_MASK,
28336 IX86_BUILTIN_VFMADDPS512_MASK3,
28337 IX86_BUILTIN_VFMADDPS512_MASKZ,
28338 IX86_BUILTIN_VFMADDSD3_ROUND,
28339 IX86_BUILTIN_VFMADDSS3_ROUND,
28340 IX86_BUILTIN_VFMADDSUBPD512_MASK,
28341 IX86_BUILTIN_VFMADDSUBPD512_MASK3,
28342 IX86_BUILTIN_VFMADDSUBPD512_MASKZ,
28343 IX86_BUILTIN_VFMADDSUBPS512_MASK,
28344 IX86_BUILTIN_VFMADDSUBPS512_MASK3,
28345 IX86_BUILTIN_VFMADDSUBPS512_MASKZ,
28346 IX86_BUILTIN_VFMSUBADDPD512_MASK3,
28347 IX86_BUILTIN_VFMSUBADDPS512_MASK3,
28348 IX86_BUILTIN_VFMSUBPD512_MASK3,
28349 IX86_BUILTIN_VFMSUBPS512_MASK3,
28350 IX86_BUILTIN_VFMSUBSD3_MASK3,
28351 IX86_BUILTIN_VFMSUBSS3_MASK3,
28352 IX86_BUILTIN_VFNMADDPD512_MASK,
28353 IX86_BUILTIN_VFNMADDPS512_MASK,
28354 IX86_BUILTIN_VFNMSUBPD512_MASK,
28355 IX86_BUILTIN_VFNMSUBPD512_MASK3,
28356 IX86_BUILTIN_VFNMSUBPS512_MASK,
28357 IX86_BUILTIN_VFNMSUBPS512_MASK3,
28358 IX86_BUILTIN_VPCLZCNTD512,
28359 IX86_BUILTIN_VPCLZCNTQ512,
28360 IX86_BUILTIN_VPCONFLICTD512,
28361 IX86_BUILTIN_VPCONFLICTQ512,
28362 IX86_BUILTIN_VPERMDF512,
28363 IX86_BUILTIN_VPERMDI512,
28364 IX86_BUILTIN_VPERMI2VARD512,
28365 IX86_BUILTIN_VPERMI2VARPD512,
28366 IX86_BUILTIN_VPERMI2VARPS512,
28367 IX86_BUILTIN_VPERMI2VARQ512,
28368 IX86_BUILTIN_VPERMILPD512,
28369 IX86_BUILTIN_VPERMILPS512,
28370 IX86_BUILTIN_VPERMILVARPD512,
28371 IX86_BUILTIN_VPERMILVARPS512,
28372 IX86_BUILTIN_VPERMT2VARD512,
28373 IX86_BUILTIN_VPERMT2VARD512_MASKZ,
28374 IX86_BUILTIN_VPERMT2VARPD512,
28375 IX86_BUILTIN_VPERMT2VARPD512_MASKZ,
28376 IX86_BUILTIN_VPERMT2VARPS512,
28377 IX86_BUILTIN_VPERMT2VARPS512_MASKZ,
28378 IX86_BUILTIN_VPERMT2VARQ512,
28379 IX86_BUILTIN_VPERMT2VARQ512_MASKZ,
28380 IX86_BUILTIN_VPERMVARDF512,
28381 IX86_BUILTIN_VPERMVARDI512,
28382 IX86_BUILTIN_VPERMVARSF512,
28383 IX86_BUILTIN_VPERMVARSI512,
28384 IX86_BUILTIN_VTERNLOGD512_MASK,
28385 IX86_BUILTIN_VTERNLOGD512_MASKZ,
28386 IX86_BUILTIN_VTERNLOGQ512_MASK,
28387 IX86_BUILTIN_VTERNLOGQ512_MASKZ,
28388
28389 /* Mask arithmetic operations */
28390 IX86_BUILTIN_KAND16,
28391 IX86_BUILTIN_KANDN16,
28392 IX86_BUILTIN_KNOT16,
28393 IX86_BUILTIN_KOR16,
28394 IX86_BUILTIN_KORTESTC16,
28395 IX86_BUILTIN_KORTESTZ16,
28396 IX86_BUILTIN_KUNPCKBW,
28397 IX86_BUILTIN_KXNOR16,
28398 IX86_BUILTIN_KXOR16,
28399 IX86_BUILTIN_KMOV16,
28400
28401 /* Alternate 4 and 8 element gather/scatter for the vectorizer
28402 where all operands are 32-byte or 64-byte wide respectively. */
28403 IX86_BUILTIN_GATHERALTSIV4DF,
28404 IX86_BUILTIN_GATHERALTDIV8SF,
28405 IX86_BUILTIN_GATHERALTSIV4DI,
28406 IX86_BUILTIN_GATHERALTDIV8SI,
28407 IX86_BUILTIN_GATHER3ALTDIV16SF,
28408 IX86_BUILTIN_GATHER3ALTDIV16SI,
28409 IX86_BUILTIN_GATHER3ALTSIV8DF,
28410 IX86_BUILTIN_GATHER3ALTSIV8DI,
28411 IX86_BUILTIN_GATHER3DIV16SF,
28412 IX86_BUILTIN_GATHER3DIV16SI,
28413 IX86_BUILTIN_GATHER3DIV8DF,
28414 IX86_BUILTIN_GATHER3DIV8DI,
28415 IX86_BUILTIN_GATHER3SIV16SF,
28416 IX86_BUILTIN_GATHER3SIV16SI,
28417 IX86_BUILTIN_GATHER3SIV8DF,
28418 IX86_BUILTIN_GATHER3SIV8DI,
28419 IX86_BUILTIN_SCATTERDIV16SF,
28420 IX86_BUILTIN_SCATTERDIV16SI,
28421 IX86_BUILTIN_SCATTERDIV8DF,
28422 IX86_BUILTIN_SCATTERDIV8DI,
28423 IX86_BUILTIN_SCATTERSIV16SF,
28424 IX86_BUILTIN_SCATTERSIV16SI,
28425 IX86_BUILTIN_SCATTERSIV8DF,
28426 IX86_BUILTIN_SCATTERSIV8DI,
28427
28428 /* AVX512PF */
28429 IX86_BUILTIN_GATHERPFQPD,
28430 IX86_BUILTIN_GATHERPFDPS,
28431 IX86_BUILTIN_GATHERPFDPD,
28432 IX86_BUILTIN_GATHERPFQPS,
28433 IX86_BUILTIN_SCATTERPFDPD,
28434 IX86_BUILTIN_SCATTERPFDPS,
28435 IX86_BUILTIN_SCATTERPFQPD,
28436 IX86_BUILTIN_SCATTERPFQPS,
28437
28438 /* AVX-512ER */
28439 IX86_BUILTIN_EXP2PD_MASK,
28440 IX86_BUILTIN_EXP2PS_MASK,
28441 IX86_BUILTIN_EXP2PS,
28442 IX86_BUILTIN_RCP28PD,
28443 IX86_BUILTIN_RCP28PS,
28444 IX86_BUILTIN_RCP28SD,
28445 IX86_BUILTIN_RCP28SS,
28446 IX86_BUILTIN_RSQRT28PD,
28447 IX86_BUILTIN_RSQRT28PS,
28448 IX86_BUILTIN_RSQRT28SD,
28449 IX86_BUILTIN_RSQRT28SS,
28450
28451 /* SHA builtins. */
28452 IX86_BUILTIN_SHA1MSG1,
28453 IX86_BUILTIN_SHA1MSG2,
28454 IX86_BUILTIN_SHA1NEXTE,
28455 IX86_BUILTIN_SHA1RNDS4,
28456 IX86_BUILTIN_SHA256MSG1,
28457 IX86_BUILTIN_SHA256MSG2,
28458 IX86_BUILTIN_SHA256RNDS2,
28459
28460 /* TFmode support builtins. */
28461 IX86_BUILTIN_INFQ,
28462 IX86_BUILTIN_HUGE_VALQ,
28463 IX86_BUILTIN_FABSQ,
28464 IX86_BUILTIN_COPYSIGNQ,
28465
28466 /* Vectorizer support builtins. */
28467 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512,
28468 IX86_BUILTIN_CPYSGNPS,
28469 IX86_BUILTIN_CPYSGNPD,
28470 IX86_BUILTIN_CPYSGNPS256,
28471 IX86_BUILTIN_CPYSGNPS512,
28472 IX86_BUILTIN_CPYSGNPD256,
28473 IX86_BUILTIN_CPYSGNPD512,
28474 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512,
28475 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512,
28476
28477
28478 /* FMA4 instructions. */
28479 IX86_BUILTIN_VFMADDSS,
28480 IX86_BUILTIN_VFMADDSD,
28481 IX86_BUILTIN_VFMADDPS,
28482 IX86_BUILTIN_VFMADDPD,
28483 IX86_BUILTIN_VFMADDPS256,
28484 IX86_BUILTIN_VFMADDPD256,
28485 IX86_BUILTIN_VFMADDSUBPS,
28486 IX86_BUILTIN_VFMADDSUBPD,
28487 IX86_BUILTIN_VFMADDSUBPS256,
28488 IX86_BUILTIN_VFMADDSUBPD256,
28489
28490 /* FMA3 instructions. */
28491 IX86_BUILTIN_VFMADDSS3,
28492 IX86_BUILTIN_VFMADDSD3,
28493
28494 /* XOP instructions. */
28495 IX86_BUILTIN_VPCMOV,
28496 IX86_BUILTIN_VPCMOV_V2DI,
28497 IX86_BUILTIN_VPCMOV_V4SI,
28498 IX86_BUILTIN_VPCMOV_V8HI,
28499 IX86_BUILTIN_VPCMOV_V16QI,
28500 IX86_BUILTIN_VPCMOV_V4SF,
28501 IX86_BUILTIN_VPCMOV_V2DF,
28502 IX86_BUILTIN_VPCMOV256,
28503 IX86_BUILTIN_VPCMOV_V4DI256,
28504 IX86_BUILTIN_VPCMOV_V8SI256,
28505 IX86_BUILTIN_VPCMOV_V16HI256,
28506 IX86_BUILTIN_VPCMOV_V32QI256,
28507 IX86_BUILTIN_VPCMOV_V8SF256,
28508 IX86_BUILTIN_VPCMOV_V4DF256,
28509
28510 IX86_BUILTIN_VPPERM,
28511
28512 IX86_BUILTIN_VPMACSSWW,
28513 IX86_BUILTIN_VPMACSWW,
28514 IX86_BUILTIN_VPMACSSWD,
28515 IX86_BUILTIN_VPMACSWD,
28516 IX86_BUILTIN_VPMACSSDD,
28517 IX86_BUILTIN_VPMACSDD,
28518 IX86_BUILTIN_VPMACSSDQL,
28519 IX86_BUILTIN_VPMACSSDQH,
28520 IX86_BUILTIN_VPMACSDQL,
28521 IX86_BUILTIN_VPMACSDQH,
28522 IX86_BUILTIN_VPMADCSSWD,
28523 IX86_BUILTIN_VPMADCSWD,
28524
28525 IX86_BUILTIN_VPHADDBW,
28526 IX86_BUILTIN_VPHADDBD,
28527 IX86_BUILTIN_VPHADDBQ,
28528 IX86_BUILTIN_VPHADDWD,
28529 IX86_BUILTIN_VPHADDWQ,
28530 IX86_BUILTIN_VPHADDDQ,
28531 IX86_BUILTIN_VPHADDUBW,
28532 IX86_BUILTIN_VPHADDUBD,
28533 IX86_BUILTIN_VPHADDUBQ,
28534 IX86_BUILTIN_VPHADDUWD,
28535 IX86_BUILTIN_VPHADDUWQ,
28536 IX86_BUILTIN_VPHADDUDQ,
28537 IX86_BUILTIN_VPHSUBBW,
28538 IX86_BUILTIN_VPHSUBWD,
28539 IX86_BUILTIN_VPHSUBDQ,
28540
28541 IX86_BUILTIN_VPROTB,
28542 IX86_BUILTIN_VPROTW,
28543 IX86_BUILTIN_VPROTD,
28544 IX86_BUILTIN_VPROTQ,
28545 IX86_BUILTIN_VPROTB_IMM,
28546 IX86_BUILTIN_VPROTW_IMM,
28547 IX86_BUILTIN_VPROTD_IMM,
28548 IX86_BUILTIN_VPROTQ_IMM,
28549
28550 IX86_BUILTIN_VPSHLB,
28551 IX86_BUILTIN_VPSHLW,
28552 IX86_BUILTIN_VPSHLD,
28553 IX86_BUILTIN_VPSHLQ,
28554 IX86_BUILTIN_VPSHAB,
28555 IX86_BUILTIN_VPSHAW,
28556 IX86_BUILTIN_VPSHAD,
28557 IX86_BUILTIN_VPSHAQ,
28558
28559 IX86_BUILTIN_VFRCZSS,
28560 IX86_BUILTIN_VFRCZSD,
28561 IX86_BUILTIN_VFRCZPS,
28562 IX86_BUILTIN_VFRCZPD,
28563 IX86_BUILTIN_VFRCZPS256,
28564 IX86_BUILTIN_VFRCZPD256,
28565
28566 IX86_BUILTIN_VPCOMEQUB,
28567 IX86_BUILTIN_VPCOMNEUB,
28568 IX86_BUILTIN_VPCOMLTUB,
28569 IX86_BUILTIN_VPCOMLEUB,
28570 IX86_BUILTIN_VPCOMGTUB,
28571 IX86_BUILTIN_VPCOMGEUB,
28572 IX86_BUILTIN_VPCOMFALSEUB,
28573 IX86_BUILTIN_VPCOMTRUEUB,
28574
28575 IX86_BUILTIN_VPCOMEQUW,
28576 IX86_BUILTIN_VPCOMNEUW,
28577 IX86_BUILTIN_VPCOMLTUW,
28578 IX86_BUILTIN_VPCOMLEUW,
28579 IX86_BUILTIN_VPCOMGTUW,
28580 IX86_BUILTIN_VPCOMGEUW,
28581 IX86_BUILTIN_VPCOMFALSEUW,
28582 IX86_BUILTIN_VPCOMTRUEUW,
28583
28584 IX86_BUILTIN_VPCOMEQUD,
28585 IX86_BUILTIN_VPCOMNEUD,
28586 IX86_BUILTIN_VPCOMLTUD,
28587 IX86_BUILTIN_VPCOMLEUD,
28588 IX86_BUILTIN_VPCOMGTUD,
28589 IX86_BUILTIN_VPCOMGEUD,
28590 IX86_BUILTIN_VPCOMFALSEUD,
28591 IX86_BUILTIN_VPCOMTRUEUD,
28592
28593 IX86_BUILTIN_VPCOMEQUQ,
28594 IX86_BUILTIN_VPCOMNEUQ,
28595 IX86_BUILTIN_VPCOMLTUQ,
28596 IX86_BUILTIN_VPCOMLEUQ,
28597 IX86_BUILTIN_VPCOMGTUQ,
28598 IX86_BUILTIN_VPCOMGEUQ,
28599 IX86_BUILTIN_VPCOMFALSEUQ,
28600 IX86_BUILTIN_VPCOMTRUEUQ,
28601
28602 IX86_BUILTIN_VPCOMEQB,
28603 IX86_BUILTIN_VPCOMNEB,
28604 IX86_BUILTIN_VPCOMLTB,
28605 IX86_BUILTIN_VPCOMLEB,
28606 IX86_BUILTIN_VPCOMGTB,
28607 IX86_BUILTIN_VPCOMGEB,
28608 IX86_BUILTIN_VPCOMFALSEB,
28609 IX86_BUILTIN_VPCOMTRUEB,
28610
28611 IX86_BUILTIN_VPCOMEQW,
28612 IX86_BUILTIN_VPCOMNEW,
28613 IX86_BUILTIN_VPCOMLTW,
28614 IX86_BUILTIN_VPCOMLEW,
28615 IX86_BUILTIN_VPCOMGTW,
28616 IX86_BUILTIN_VPCOMGEW,
28617 IX86_BUILTIN_VPCOMFALSEW,
28618 IX86_BUILTIN_VPCOMTRUEW,
28619
28620 IX86_BUILTIN_VPCOMEQD,
28621 IX86_BUILTIN_VPCOMNED,
28622 IX86_BUILTIN_VPCOMLTD,
28623 IX86_BUILTIN_VPCOMLED,
28624 IX86_BUILTIN_VPCOMGTD,
28625 IX86_BUILTIN_VPCOMGED,
28626 IX86_BUILTIN_VPCOMFALSED,
28627 IX86_BUILTIN_VPCOMTRUED,
28628
28629 IX86_BUILTIN_VPCOMEQQ,
28630 IX86_BUILTIN_VPCOMNEQ,
28631 IX86_BUILTIN_VPCOMLTQ,
28632 IX86_BUILTIN_VPCOMLEQ,
28633 IX86_BUILTIN_VPCOMGTQ,
28634 IX86_BUILTIN_VPCOMGEQ,
28635 IX86_BUILTIN_VPCOMFALSEQ,
28636 IX86_BUILTIN_VPCOMTRUEQ,
28637
28638 /* LWP instructions. */
28639 IX86_BUILTIN_LLWPCB,
28640 IX86_BUILTIN_SLWPCB,
28641 IX86_BUILTIN_LWPVAL32,
28642 IX86_BUILTIN_LWPVAL64,
28643 IX86_BUILTIN_LWPINS32,
28644 IX86_BUILTIN_LWPINS64,
28645
28646 IX86_BUILTIN_CLZS,
28647
28648 /* RTM */
28649 IX86_BUILTIN_XBEGIN,
28650 IX86_BUILTIN_XEND,
28651 IX86_BUILTIN_XABORT,
28652 IX86_BUILTIN_XTEST,
28653
28654 /* BMI instructions. */
28655 IX86_BUILTIN_BEXTR32,
28656 IX86_BUILTIN_BEXTR64,
28657 IX86_BUILTIN_CTZS,
28658
28659 /* TBM instructions. */
28660 IX86_BUILTIN_BEXTRI32,
28661 IX86_BUILTIN_BEXTRI64,
28662
28663 /* BMI2 instructions. */
28664 IX86_BUILTIN_BZHI32,
28665 IX86_BUILTIN_BZHI64,
28666 IX86_BUILTIN_PDEP32,
28667 IX86_BUILTIN_PDEP64,
28668 IX86_BUILTIN_PEXT32,
28669 IX86_BUILTIN_PEXT64,
28670
28671 /* ADX instructions. */
28672 IX86_BUILTIN_ADDCARRYX32,
28673 IX86_BUILTIN_ADDCARRYX64,
28674
28675 /* FSGSBASE instructions. */
28676 IX86_BUILTIN_RDFSBASE32,
28677 IX86_BUILTIN_RDFSBASE64,
28678 IX86_BUILTIN_RDGSBASE32,
28679 IX86_BUILTIN_RDGSBASE64,
28680 IX86_BUILTIN_WRFSBASE32,
28681 IX86_BUILTIN_WRFSBASE64,
28682 IX86_BUILTIN_WRGSBASE32,
28683 IX86_BUILTIN_WRGSBASE64,
28684
28685 /* RDRND instructions. */
28686 IX86_BUILTIN_RDRAND16_STEP,
28687 IX86_BUILTIN_RDRAND32_STEP,
28688 IX86_BUILTIN_RDRAND64_STEP,
28689
28690 /* RDSEED instructions. */
28691 IX86_BUILTIN_RDSEED16_STEP,
28692 IX86_BUILTIN_RDSEED32_STEP,
28693 IX86_BUILTIN_RDSEED64_STEP,
28694
28695 /* F16C instructions. */
28696 IX86_BUILTIN_CVTPH2PS,
28697 IX86_BUILTIN_CVTPH2PS256,
28698 IX86_BUILTIN_CVTPS2PH,
28699 IX86_BUILTIN_CVTPS2PH256,
28700
28701 /* CFString built-in for darwin */
28702 IX86_BUILTIN_CFSTRING,
28703
28704 /* Builtins to get CPU type and supported features. */
28705 IX86_BUILTIN_CPU_INIT,
28706 IX86_BUILTIN_CPU_IS,
28707 IX86_BUILTIN_CPU_SUPPORTS,
28708
28709 /* Read/write FLAGS register built-ins. */
28710 IX86_BUILTIN_READ_FLAGS,
28711 IX86_BUILTIN_WRITE_FLAGS,
28712
28713 IX86_BUILTIN_MAX
28714 };
28715
28716 /* Table for the ix86 builtin decls. */
28717 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
28718
28719 /* Table of all of the builtin functions that are possible with different ISA's
28720 but are waiting to be built until a function is declared to use that
28721 ISA. */
28722 struct builtin_isa {
28723 const char *name; /* function name */
28724 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
28725 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
28726 bool const_p; /* true if the declaration is constant */
28727 bool set_and_not_built_p;
28728 };
28729
28730 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
28731
28732
28733 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
28734 of which isa_flags to use in the ix86_builtins_isa array. Stores the
28735 function decl in the ix86_builtins array. Returns the function decl or
28736 NULL_TREE, if the builtin was not added.
28737
28738 If the front end has a special hook for builtin functions, delay adding
28739 builtin functions that aren't in the current ISA until the ISA is changed
28740 with function specific optimization. Doing so, can save about 300K for the
28741 default compiler. When the builtin is expanded, check at that time whether
28742 it is valid.
28743
28744 If the front end doesn't have a special hook, record all builtins, even if
28745 it isn't an instruction set in the current ISA in case the user uses
28746 function specific options for a different ISA, so that we don't get scope
28747 errors if a builtin is added in the middle of a function scope. */
28748
28749 static inline tree
28750 def_builtin (HOST_WIDE_INT mask, const char *name,
28751 enum ix86_builtin_func_type tcode,
28752 enum ix86_builtins code)
28753 {
28754 tree decl = NULL_TREE;
28755
28756 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
28757 {
28758 ix86_builtins_isa[(int) code].isa = mask;
28759
28760 mask &= ~OPTION_MASK_ISA_64BIT;
28761 if (mask == 0
28762 || (mask & ix86_isa_flags) != 0
28763 || (lang_hooks.builtin_function
28764 == lang_hooks.builtin_function_ext_scope))
28765
28766 {
28767 tree type = ix86_get_builtin_func_type (tcode);
28768 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
28769 NULL, NULL_TREE);
28770 ix86_builtins[(int) code] = decl;
28771 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
28772 }
28773 else
28774 {
28775 ix86_builtins[(int) code] = NULL_TREE;
28776 ix86_builtins_isa[(int) code].tcode = tcode;
28777 ix86_builtins_isa[(int) code].name = name;
28778 ix86_builtins_isa[(int) code].const_p = false;
28779 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
28780 }
28781 }
28782
28783 return decl;
28784 }
28785
28786 /* Like def_builtin, but also marks the function decl "const". */
28787
28788 static inline tree
28789 def_builtin_const (HOST_WIDE_INT mask, const char *name,
28790 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
28791 {
28792 tree decl = def_builtin (mask, name, tcode, code);
28793 if (decl)
28794 TREE_READONLY (decl) = 1;
28795 else
28796 ix86_builtins_isa[(int) code].const_p = true;
28797
28798 return decl;
28799 }
28800
28801 /* Add any new builtin functions for a given ISA that may not have been
28802 declared. This saves a bit of space compared to adding all of the
28803 declarations to the tree, even if we didn't use them. */
28804
28805 static void
28806 ix86_add_new_builtins (HOST_WIDE_INT isa)
28807 {
28808 int i;
28809
28810 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
28811 {
28812 if ((ix86_builtins_isa[i].isa & isa) != 0
28813 && ix86_builtins_isa[i].set_and_not_built_p)
28814 {
28815 tree decl, type;
28816
28817 /* Don't define the builtin again. */
28818 ix86_builtins_isa[i].set_and_not_built_p = false;
28819
28820 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
28821 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
28822 type, i, BUILT_IN_MD, NULL,
28823 NULL_TREE);
28824
28825 ix86_builtins[i] = decl;
28826 if (ix86_builtins_isa[i].const_p)
28827 TREE_READONLY (decl) = 1;
28828 }
28829 }
28830 }
28831
28832 /* Bits for builtin_description.flag. */
28833
28834 /* Set when we don't support the comparison natively, and should
28835 swap_comparison in order to support it. */
28836 #define BUILTIN_DESC_SWAP_OPERANDS 1
28837
28838 struct builtin_description
28839 {
28840 const HOST_WIDE_INT mask;
28841 const enum insn_code icode;
28842 const char *const name;
28843 const enum ix86_builtins code;
28844 const enum rtx_code comparison;
28845 const int flag;
28846 };
28847
28848 static const struct builtin_description bdesc_comi[] =
28849 {
28850 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
28851 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
28852 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
28853 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
28854 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
28855 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
28856 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
28857 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
28858 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
28859 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
28860 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
28861 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
28862 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
28863 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
28864 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
28865 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
28866 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
28867 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
28868 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
28869 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
28870 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
28871 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
28872 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
28873 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
28874 };
28875
28876 static const struct builtin_description bdesc_pcmpestr[] =
28877 {
28878 /* SSE4.2 */
28879 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
28880 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
28881 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
28882 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
28883 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
28884 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
28885 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
28886 };
28887
28888 static const struct builtin_description bdesc_pcmpistr[] =
28889 {
28890 /* SSE4.2 */
28891 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
28892 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
28893 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
28894 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
28895 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
28896 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
28897 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
28898 };
28899
28900 /* Special builtins with variable number of arguments. */
28901 static const struct builtin_description bdesc_special_args[] =
28902 {
28903 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
28904 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
28905 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
28906
28907 /* 80387 (for use internally for atomic compound assignment). */
28908 { 0, CODE_FOR_fnstenv, "__builtin_ia32_fnstenv", IX86_BUILTIN_FNSTENV, UNKNOWN, (int) VOID_FTYPE_PVOID },
28909 { 0, CODE_FOR_fldenv, "__builtin_ia32_fldenv", IX86_BUILTIN_FLDENV, UNKNOWN, (int) VOID_FTYPE_PCVOID },
28910 { 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW, UNKNOWN, (int) VOID_FTYPE_PUSHORT },
28911 { 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX, UNKNOWN, (int) VOID_FTYPE_VOID },
28912
28913 /* MMX */
28914 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
28915
28916 /* 3DNow! */
28917 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
28918
28919 /* FXSR, XSAVE and XSAVEOPT */
28920 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID },
28921 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID },
28922 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
28923 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
28924 { OPTION_MASK_ISA_XSAVEOPT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
28925
28926 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID },
28927 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID },
28928 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
28929 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
28930 { OPTION_MASK_ISA_XSAVEOPT | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
28931
28932 /* SSE */
28933 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storeups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
28934 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
28935 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
28936
28937 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
28938 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
28939 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
28940 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
28941
28942 /* SSE or 3DNow!A */
28943 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
28944 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntq, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
28945
28946 /* SSE2 */
28947 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
28948 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
28949 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storeupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
28950 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedquv16qi, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
28951 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
28952 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
28953 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
28954 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG },
28955 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
28956 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddquv16qi, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
28957
28958 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
28959 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
28960
28961 /* SSE3 */
28962 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
28963
28964 /* SSE4.1 */
28965 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
28966
28967 /* SSE4A */
28968 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
28969 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
28970
28971 /* AVX */
28972 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
28973 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
28974
28975 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
28976 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
28977 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
28978 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
28979 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
28980
28981 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
28982 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
28983 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
28984 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
28985 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddquv32qi, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
28986 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedquv32qi, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
28987 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
28988
28989 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
28990 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
28991 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
28992
28993 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
28994 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
28995 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
28996 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
28997 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
28998 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
28999 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
29000 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
29001
29002 /* AVX2 */
29003 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_movntdqa, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256, UNKNOWN, (int) V4DI_FTYPE_PV4DI },
29004 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI },
29005 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI },
29006 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd256, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI },
29007 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq256, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI },
29008 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_V4SI },
29009 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_V2DI },
29010 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI },
29011 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI },
29012
29013 /* AVX512F */
29014 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16sf_mask, "__builtin_ia32_compressstoresf512_mask", IX86_BUILTIN_COMPRESSPSSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
29015 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16si_mask, "__builtin_ia32_compressstoresi512_mask", IX86_BUILTIN_PCOMPRESSDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
29016 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8df_mask, "__builtin_ia32_compressstoredf512_mask", IX86_BUILTIN_COMPRESSPDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
29017 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8di_mask, "__builtin_ia32_compressstoredi512_mask", IX86_BUILTIN_PCOMPRESSQSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
29018 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandloadsf512_mask", IX86_BUILTIN_EXPANDPSLOAD512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29019 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandloadsf512_maskz", IX86_BUILTIN_EXPANDPSLOAD512Z, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29020 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandloadsi512_mask", IX86_BUILTIN_PEXPANDDLOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29021 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandloadsi512_maskz", IX86_BUILTIN_PEXPANDDLOAD512Z, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29022 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expandloaddf512_mask", IX86_BUILTIN_EXPANDPDLOAD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29023 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expandloaddf512_maskz", IX86_BUILTIN_EXPANDPDLOAD512Z, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29024 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expandloaddi512_mask", IX86_BUILTIN_PEXPANDQLOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29025 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expandloaddi512_maskz", IX86_BUILTIN_PEXPANDQLOAD512Z, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29026 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv16si_mask, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29027 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv8di_mask, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29028 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadupd512_mask, "__builtin_ia32_loadupd512_mask", IX86_BUILTIN_LOADUPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29029 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadups512_mask, "__builtin_ia32_loadups512_mask", IX86_BUILTIN_LOADUPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29030 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadaps512_mask", IX86_BUILTIN_LOADAPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29031 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32load512_mask", IX86_BUILTIN_MOVDQA32LOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29032 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadapd512_mask", IX86_BUILTIN_LOADAPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29033 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64load512_mask", IX86_BUILTIN_MOVDQA64LOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29034 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv16sf, "__builtin_ia32_movntps512", IX86_BUILTIN_MOVNTPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF },
29035 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8df, "__builtin_ia32_movntpd512", IX86_BUILTIN_MOVNTPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF },
29036 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8di, "__builtin_ia32_movntdq512", IX86_BUILTIN_MOVNTDQ512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI },
29037 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntdqa, "__builtin_ia32_movntdqa512", IX86_BUILTIN_MOVNTDQA512, UNKNOWN, (int) V8DI_FTYPE_PV8DI },
29038 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
29039 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
29040 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeupd512_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
29041 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask_store, "__builtin_ia32_pmovusqd512mem_mask", IX86_BUILTIN_PMOVUSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
29042 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask_store, "__builtin_ia32_pmovsqd512mem_mask", IX86_BUILTIN_PMOVSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
29043 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask_store, "__builtin_ia32_pmovqd512mem_mask", IX86_BUILTIN_PMOVQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
29044 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovusqw512mem_mask", IX86_BUILTIN_PMOVUSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
29045 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovsqw512mem_mask", IX86_BUILTIN_PMOVSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
29046 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovqw512mem_mask", IX86_BUILTIN_PMOVQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
29047 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovusdw512mem_mask", IX86_BUILTIN_PMOVUSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
29048 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovsdw512mem_mask", IX86_BUILTIN_PMOVSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
29049 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovdw512mem_mask", IX86_BUILTIN_PMOVDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
29050 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovqb512mem_mask", IX86_BUILTIN_PMOVQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
29051 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovusqb512mem_mask", IX86_BUILTIN_PMOVUSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
29052 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovsqb512mem_mask", IX86_BUILTIN_PMOVSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
29053 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovusdb512mem_mask", IX86_BUILTIN_PMOVUSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
29054 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovsdb512mem_mask", IX86_BUILTIN_PMOVSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
29055 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovdb512mem_mask", IX86_BUILTIN_PMOVDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
29056 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeups512_mask, "__builtin_ia32_storeups512_mask", IX86_BUILTIN_STOREUPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
29057 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeaps512_mask", IX86_BUILTIN_STOREAPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
29058 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_movdqa32store512_mask", IX86_BUILTIN_MOVDQA32STORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
29059 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeapd512_mask", IX86_BUILTIN_STOREAPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
29060 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_movdqa64store512_mask", IX86_BUILTIN_MOVDQA64STORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
29061
29062 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
29063 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
29064 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
29065 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
29066 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
29067 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
29068
29069 /* FSGSBASE */
29070 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
29071 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
29072 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
29073 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
29074 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
29075 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
29076 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
29077 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
29078
29079 /* RTM */
29080 { OPTION_MASK_ISA_RTM, CODE_FOR_xbegin, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
29081 { OPTION_MASK_ISA_RTM, CODE_FOR_xend, "__builtin_ia32_xend", IX86_BUILTIN_XEND, UNKNOWN, (int) VOID_FTYPE_VOID },
29082 { OPTION_MASK_ISA_RTM, CODE_FOR_xtest, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST, UNKNOWN, (int) INT_FTYPE_VOID },
29083 };
29084
29085 /* Builtins with variable number of arguments. */
29086 static const struct builtin_description bdesc_args[] =
29087 {
29088 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
29089 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
29090 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
29091 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
29092 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
29093 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
29094 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
29095
29096 /* MMX */
29097 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29098 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29099 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29100 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29101 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29102 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29103
29104 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29105 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29106 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29107 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29108 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29109 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29110 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29111 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29112
29113 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29114 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29115
29116 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29117 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29118 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29119 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29120
29121 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29122 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29123 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29124 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29125 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29126 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29127
29128 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29129 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29130 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29131 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29132 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
29133 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
29134
29135 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
29136 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
29137 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
29138
29139 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
29140
29141 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
29142 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
29143 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
29144 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
29145 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
29146 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
29147
29148 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
29149 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
29150 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
29151 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
29152 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
29153 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
29154
29155 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
29156 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
29157 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
29158 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
29159
29160 /* 3DNow! */
29161 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
29162 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
29163 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
29164 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
29165
29166 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29167 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29168 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29169 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
29170 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
29171 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
29172 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29173 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29174 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29175 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29176 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29177 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29178 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29179 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29180 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29181
29182 /* 3DNow!A */
29183 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
29184 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
29185 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
29186 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
29187 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29188 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29189
29190 /* SSE */
29191 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
29192 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29193 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29194 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29195 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29196 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29197 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
29198 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
29199 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
29200 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
29201 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
29202 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
29203
29204 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29205
29206 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29207 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29208 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29209 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29210 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29211 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29212 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29213 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29214
29215 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
29216 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
29217 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
29218 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
29219 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
29220 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29221 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
29222 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
29223 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
29224 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
29225 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
29226 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29227 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
29228 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
29229 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
29230 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29231 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
29232 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
29233 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
29234 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29235
29236 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29237 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29238 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29239 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29240
29241 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29242 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29243 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29244 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29245
29246 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29247
29248 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29249 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29250 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29251 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29252 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29253
29254 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
29255 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
29256 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
29257
29258 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
29259
29260 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
29261 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
29262 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
29263
29264 { OPTION_MASK_ISA_SSE, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
29265 { OPTION_MASK_ISA_SSE, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
29266
29267 /* SSE MMX or 3Dnow!A */
29268 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29269 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29270 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29271
29272 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29273 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29274 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29275 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29276
29277 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
29278 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
29279
29280 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
29281
29282 /* SSE2 */
29283 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29284
29285 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
29286 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
29287 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
29288 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
29289 { OPTION_MASK_ISA_SSE2, CODE_FOR_floatv4siv4sf2, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
29290
29291 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
29292 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
29293 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
29294 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
29295 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
29296
29297 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
29298
29299 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
29300 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
29301 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
29302 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
29303
29304 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_fix_notruncv4sfv4si, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
29305 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
29306 { OPTION_MASK_ISA_SSE2, CODE_FOR_fix_truncv4sfv4si2, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
29307
29308 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29309 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29310 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29311 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29312 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29313 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29314 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29315 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29316
29317 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
29318 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
29319 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
29320 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
29321 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
29322 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29323 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
29324 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
29325 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
29326 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
29327 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
29328 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29329 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
29330 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
29331 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
29332 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29333 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
29334 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
29335 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
29336 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29337
29338 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29339 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29340 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29341 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29342
29343 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29344 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29345 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29346 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29347
29348 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29349
29350 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29351 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29352 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29353
29354 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
29355
29356 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29357 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29358 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29359 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29360 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29361 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29362 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29363 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29364
29365 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29366 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29367 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29368 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29369 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29370 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29371 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29372 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29373
29374 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29375 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
29376
29377 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29378 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29379 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29380 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29381
29382 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29383 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29384
29385 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29386 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29387 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29388 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29389 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29390 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29391
29392 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29393 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29394 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29395 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29396
29397 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29398 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29399 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29400 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29401 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29402 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29403 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29404 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29405
29406 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
29407 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
29408 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
29409
29410 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29411 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
29412
29413 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
29414 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_widen_umult_even_v4si, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
29415
29416 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
29417
29418 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
29419 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
29420 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
29421 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
29422
29423 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
29424 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
29425 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
29426 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
29427 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
29428 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
29429 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
29430
29431 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
29432 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
29433 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
29434 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
29435 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
29436 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
29437 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
29438
29439 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
29440 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
29441 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
29442 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
29443
29444 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
29445 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
29446 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
29447
29448 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
29449
29450 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
29451
29452 /* SSE2 MMX */
29453 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
29454 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
29455
29456 /* SSE3 */
29457 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
29458 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29459
29460 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29461 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29462 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29463 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29464 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29465 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29466
29467 /* SSSE3 */
29468 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
29469 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
29470 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
29471 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
29472 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
29473 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
29474
29475 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29476 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29477 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29478 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29479 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29480 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29481 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29482 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29483 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29484 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29485 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29486 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29487 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
29488 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
29489 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29490 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29491 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29492 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29493 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29494 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29495 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29496 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29497 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29498 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29499
29500 /* SSSE3. */
29501 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
29502 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
29503
29504 /* SSE4.1 */
29505 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29506 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29507 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
29508 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
29509 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29510 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29511 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29512 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
29513 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
29514 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
29515
29516 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
29517 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
29518 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
29519 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
29520 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
29521 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
29522 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
29523 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
29524 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
29525 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
29526 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
29527 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
29528 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
29529
29530 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
29531 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29532 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29533 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29534 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29535 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29536 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29537 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29538 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29539 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29540 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
29541 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29542
29543 /* SSE4.1 */
29544 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
29545 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
29546 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29547 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29548
29549 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
29550 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
29551 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
29552 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
29553
29554 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
29555 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
29556
29557 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF },
29558 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
29559
29560 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
29561 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
29562 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
29563 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
29564
29565 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V4SF_ROUND },
29566 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V4SF_ROUND },
29567
29568 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29569 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2_sfix, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX, UNKNOWN, (int) V4SI_FTYPE_V4SF },
29570
29571 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
29572 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
29573 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
29574
29575 /* SSE4.2 */
29576 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29577 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
29578 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
29579 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29580 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29581
29582 /* SSE4A */
29583 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
29584 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
29585 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
29586 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29587
29588 /* AES */
29589 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
29590 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
29591
29592 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29593 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29594 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29595 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29596
29597 /* PCLMUL */
29598 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
29599
29600 /* AVX */
29601 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29602 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29603 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29604 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29605 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29606 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29607 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29608 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29609 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29610 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29611 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29612 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29613 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29614 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29615 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29616 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29617 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29618 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29619 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29620 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29621 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29622 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29623 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29624 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29625 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29626 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29627
29628 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
29629 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
29630 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
29631 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
29632
29633 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29634 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29635 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
29636 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
29637 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29638 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29639 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29640 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29641 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29642 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29643 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29644 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29645 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29646 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
29647 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
29648 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
29649 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv4siv4df2, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
29650 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv8siv8sf2, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
29651 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
29652 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_fix_notruncv8sfv8si, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
29653 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
29654 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv4dfv4si2, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
29655 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
29656 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv8sfv8si2, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
29657 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29658 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29659 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
29660 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
29661 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
29662 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
29663 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
29664 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
29665 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
29666 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
29667
29668 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29669 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29670 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
29671
29672 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
29673 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29674 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29675 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29676 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29677
29678 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29679
29680 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
29681 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
29682
29683 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
29684 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
29685 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
29686 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
29687
29688 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
29689 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
29690
29691 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
29692 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
29693
29694 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
29695 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
29696 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
29697 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
29698
29699 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V8SF_ROUND },
29700 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V8SF_ROUND },
29701
29702 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29703 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2_sfix, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
29704
29705 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29706 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29707 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29708 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29709
29710 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
29711 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
29712 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
29713 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
29714 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
29715 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
29716
29717 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
29718 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
29719 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
29720 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
29721 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
29722 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
29723 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
29724 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
29725 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
29726 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
29727 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
29728 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
29729 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
29730 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
29731 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
29732
29733 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
29734 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
29735
29736 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29737 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29738
29739 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_pack_sfix_v4df, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
29740
29741 /* AVX2 */
29742 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
29743 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
29744 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv16hi2, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI },
29745 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv8si2, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI },
29746 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packssdw, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
29747 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packsswb, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
29748 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packusdw, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
29749 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packuswb, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
29750 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv32qi3, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29751 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv16hi3, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29752 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv8si3, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29753 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv4di3, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29754 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv32qi3, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29755 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv16hi3, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29756 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv32qi3, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29757 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv16hi3, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29758 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv2ti, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT },
29759 { OPTION_MASK_ISA_AVX2, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29760 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29761 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29762 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29763 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI },
29764 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT },
29765 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv32qi3, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29766 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv16hi3, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29767 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv8si3, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29768 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv4di3, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29769 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv32qi3, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29770 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv16hi3, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29771 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv8si3, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29772 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv4di3, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29773 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddwv16hi3, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29774 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phadddv8si3, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29775 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddswv16hi3, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29776 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubwv16hi3, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29777 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubdv8si3, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29778 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubswv16hi3, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29779 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddubsw256, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
29780 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddwd, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI },
29781 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv32qi3, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29782 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv16hi3, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29783 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv8si3 , "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29784 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv32qi3, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29785 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv16hi3, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29786 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv8si3 , "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29787 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv32qi3, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29788 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv16hi3, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29789 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv8si3 , "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29790 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv32qi3, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29791 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv16hi3, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29792 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv8si3 , "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29793 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmovmskb, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256, UNKNOWN, (int) INT_FTYPE_V32QI },
29794 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv16qiv16hi2, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
29795 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8qiv8si2 , "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
29796 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4qiv4di2 , "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
29797 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8hiv8si2 , "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
29798 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4hiv4di2 , "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
29799 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4siv4di2 , "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
29800 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv16qiv16hi2, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
29801 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8qiv8si2 , "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
29802 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4qiv4di2 , "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
29803 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8hiv8si2 , "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
29804 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2 , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
29805 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2 , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
29806 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_smult_even_v8si, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
29807 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29808 { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29809 { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29810 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29811 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256 , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29812 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_umult_even_v8si, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
29813 { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29814 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
29815 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29816 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufdv3, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT },
29817 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufhwv3, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
29818 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshuflwv3, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
29819 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv32qi3, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29820 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv16hi3, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29821 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv8si3 , "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29822 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlv2ti3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
29823 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
29824 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
29825 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
29826 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
29827 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
29828 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
29829 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
29830 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
29831 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
29832 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
29833 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrv2ti3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
29834 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
29835 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
29836 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
29837 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
29838 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
29839 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
29840 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv32qi3, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29841 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv16hi3, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29842 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv8si3, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29843 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv4di3, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29844 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv32qi3, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29845 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv16hi3, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29846 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv32qi3, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29847 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv16hi3, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29848 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv32qi, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29849 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv16hi, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29850 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv8si, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29851 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv4di, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29852 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv32qi, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29853 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv16hi, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29854 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv8si, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29855 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv4di, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29856 { OPTION_MASK_ISA_AVX2, CODE_FOR_xorv4di3, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29857 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29858 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF },
29859 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF },
29860 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vbroadcasti128_v4di, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
29861 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv4si, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
29862 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv8si, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
29863 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv32qi, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256, UNKNOWN, (int) V32QI_FTYPE_V16QI },
29864 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16hi, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256, UNKNOWN, (int) V16HI_FTYPE_V8HI },
29865 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8si, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256, UNKNOWN, (int) V8SI_FTYPE_V4SI },
29866 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4di, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
29867 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16qi, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
29868 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8hi, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
29869 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4si, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
29870 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
29871 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29872 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
29873 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
29874 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
29875 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
29876 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_extracti128, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
29877 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_inserti128, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
29878 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29879 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29880 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29881 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4si, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29882 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv8si, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29883 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv4si, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29884 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4di, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29885 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv2di, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29886 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv8si, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29887 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4si, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29888
29889 { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
29890
29891 /* BMI */
29892 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29893 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29894 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
29895
29896 /* TBM */
29897 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29898 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29899
29900 /* F16C */
29901 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
29902 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
29903 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
29904 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
29905
29906 /* BMI2 */
29907 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29908 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29909 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29910 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29911 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29912 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29913
29914 /* AVX512F */
29915 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv16si_mask, "__builtin_ia32_alignd512_mask", IX86_BUILTIN_ALIGND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
29916 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv8di_mask, "__builtin_ia32_alignq512_mask", IX86_BUILTIN_ALIGNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
29917 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16si, "__builtin_ia32_blendmd_512_mask", IX86_BUILTIN_BLENDMD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
29918 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8df, "__builtin_ia32_blendmpd_512_mask", IX86_BUILTIN_BLENDMPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
29919 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16sf, "__builtin_ia32_blendmps_512_mask", IX86_BUILTIN_BLENDMPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
29920 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8di, "__builtin_ia32_blendmq_512_mask", IX86_BUILTIN_BLENDMQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
29921 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x4_512", IX86_BUILTIN_BROADCASTF32X4_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
29922 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8df_mask, "__builtin_ia32_broadcastf64x4_512", IX86_BUILTIN_BROADCASTF64X4_512, UNKNOWN, (int) V8DF_FTYPE_V4DF_V8DF_QI },
29923 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16si_mask, "__builtin_ia32_broadcasti32x4_512", IX86_BUILTIN_BROADCASTI32X4_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
29924 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8di_mask, "__builtin_ia32_broadcasti64x4_512", IX86_BUILTIN_BROADCASTI64X4_512, UNKNOWN, (int) V8DI_FTYPE_V4DI_V8DI_QI },
29925 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8df_mask, "__builtin_ia32_broadcastsd512", IX86_BUILTIN_BROADCASTSD512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
29926 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16sf_mask, "__builtin_ia32_broadcastss512", IX86_BUILTIN_BROADCASTSS512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
29927 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16si3_mask, "__builtin_ia32_cmpd512_mask", IX86_BUILTIN_CMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
29928 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8di3_mask, "__builtin_ia32_cmpq512_mask", IX86_BUILTIN_CMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
29929 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8df_mask, "__builtin_ia32_compressdf512_mask", IX86_BUILTIN_COMPRESSPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
29930 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16sf_mask, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
29931 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
29932 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtps2ph512_mask, "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_HI },
29933 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv8siv8df_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
29934 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2sd32, "__builtin_ia32_cvtusi2sd32", IX86_BUILTIN_CVTUSI2SD32, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT },
29935 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
29936 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
29937 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandsf512_mask", IX86_BUILTIN_EXPANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
29938 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandsf512_maskz", IX86_BUILTIN_EXPANDPS512Z, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
29939 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf32x4_mask, "__builtin_ia32_extractf32x4_mask", IX86_BUILTIN_EXTRACTF32X4, UNKNOWN, (int) V4SF_FTYPE_V16SF_INT_V4SF_QI },
29940 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf64x4_mask, "__builtin_ia32_extractf64x4_mask", IX86_BUILTIN_EXTRACTF64X4, UNKNOWN, (int) V4DF_FTYPE_V8DF_INT_V4DF_QI },
29941 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti32x4_mask, "__builtin_ia32_extracti32x4_mask", IX86_BUILTIN_EXTRACTI32X4, UNKNOWN, (int) V4SI_FTYPE_V16SI_INT_V4SI_QI },
29942 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti64x4_mask, "__builtin_ia32_extracti64x4_mask", IX86_BUILTIN_EXTRACTI64X4, UNKNOWN, (int) V4DI_FTYPE_V8DI_INT_V4DI_QI },
29943 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf32x4_mask, "__builtin_ia32_insertf32x4_mask", IX86_BUILTIN_INSERTF32X4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI },
29944 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf64x4_mask, "__builtin_ia32_insertf64x4_mask", IX86_BUILTIN_INSERTF64X4, UNKNOWN, (int) V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI },
29945 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti32x4_mask, "__builtin_ia32_inserti32x4_mask", IX86_BUILTIN_INSERTI32X4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI },
29946 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti64x4_mask, "__builtin_ia32_inserti64x4_mask", IX86_BUILTIN_INSERTI64X4, UNKNOWN, (int) V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI },
29947 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_movapd512_mask", IX86_BUILTIN_MOVAPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
29948 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_movaps512_mask", IX86_BUILTIN_MOVAPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
29949 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movddup512_mask, "__builtin_ia32_movddup512_mask", IX86_BUILTIN_MOVDDUP512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
29950 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32_512_mask", IX86_BUILTIN_MOVDQA32_512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
29951 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64_512_mask", IX86_BUILTIN_MOVDQA64_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
29952 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movshdup512_mask, "__builtin_ia32_movshdup512_mask", IX86_BUILTIN_MOVSHDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
29953 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movsldup512_mask, "__builtin_ia32_movsldup512_mask", IX86_BUILTIN_MOVSLDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
29954 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv16si2_mask, "__builtin_ia32_pabsd512_mask", IX86_BUILTIN_PABSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
29955 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv8di2_mask, "__builtin_ia32_pabsq512_mask", IX86_BUILTIN_PABSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
29956 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16si3_mask, "__builtin_ia32_paddd512_mask", IX86_BUILTIN_PADDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
29957 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8di3_mask, "__builtin_ia32_paddq512_mask", IX86_BUILTIN_PADDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
29958 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv16si3_mask, "__builtin_ia32_pandd512_mask", IX86_BUILTIN_PANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
29959 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv16si3_mask, "__builtin_ia32_pandnd512_mask", IX86_BUILTIN_PANDND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
29960 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv8di3_mask, "__builtin_ia32_pandnq512_mask", IX86_BUILTIN_PANDNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
29961 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv8di3_mask, "__builtin_ia32_pandq512_mask", IX86_BUILTIN_PANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
29962 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16si_mask, "__builtin_ia32_pbroadcastd512", IX86_BUILTIN_PBROADCASTD512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
29963 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv16si_mask, "__builtin_ia32_pbroadcastd512_gpr_mask", IX86_BUILTIN_PBROADCASTD512_GPR, UNKNOWN, (int) V16SI_FTYPE_SI_V16SI_HI },
29964 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_QI },
29965 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_HI },
29966 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
29967 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
29968 { OPTION_MASK_ISA_AVX512F & ~OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_memv8di_mask, "__builtin_ia32_pbroadcastq512_mem_mask", IX86_BUILTIN_PBROADCASTQ512_MEM, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
29969 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
29970 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
29971 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
29972 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv8di3_mask, "__builtin_ia32_pcmpgtq512_mask", IX86_BUILTIN_PCMPGTQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
29973 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16si_mask, "__builtin_ia32_compresssi512_mask", IX86_BUILTIN_PCOMPRESSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
29974 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8di_mask, "__builtin_ia32_compressdi512_mask", IX86_BUILTIN_PCOMPRESSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
29975 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandsi512_mask", IX86_BUILTIN_PEXPANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
29976 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandsi512_maskz", IX86_BUILTIN_PEXPANDD512Z, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
29977 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expanddi512_mask", IX86_BUILTIN_PEXPANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
29978 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expanddi512_maskz", IX86_BUILTIN_PEXPANDQ512Z, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
29979 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16si3_mask, "__builtin_ia32_pmaxsd512_mask", IX86_BUILTIN_PMAXSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
29980 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8di3_mask, "__builtin_ia32_pmaxsq512_mask", IX86_BUILTIN_PMAXSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
29981 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv16si3_mask, "__builtin_ia32_pmaxud512_mask", IX86_BUILTIN_PMAXUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
29982 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv8di3_mask, "__builtin_ia32_pmaxuq512_mask", IX86_BUILTIN_PMAXUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
29983 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16si3_mask, "__builtin_ia32_pminsd512_mask", IX86_BUILTIN_PMINSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
29984 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8di3_mask, "__builtin_ia32_pminsq512_mask", IX86_BUILTIN_PMINSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
29985 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv16si3_mask, "__builtin_ia32_pminud512_mask", IX86_BUILTIN_PMINUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
29986 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv8di3_mask, "__builtin_ia32_pminuq512_mask", IX86_BUILTIN_PMINUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
29987 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask, "__builtin_ia32_pmovdb512_mask", IX86_BUILTIN_PMOVDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
29988 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask, "__builtin_ia32_pmovdw512_mask", IX86_BUILTIN_PMOVDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
29989 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask, "__builtin_ia32_pmovqb512_mask", IX86_BUILTIN_PMOVQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
29990 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask, "__builtin_ia32_pmovqd512_mask", IX86_BUILTIN_PMOVQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
29991 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask, "__builtin_ia32_pmovqw512_mask", IX86_BUILTIN_PMOVQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
29992 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask, "__builtin_ia32_pmovsdb512_mask", IX86_BUILTIN_PMOVSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
29993 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask, "__builtin_ia32_pmovsdw512_mask", IX86_BUILTIN_PMOVSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
29994 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask, "__builtin_ia32_pmovsqb512_mask", IX86_BUILTIN_PMOVSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
29995 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask, "__builtin_ia32_pmovsqd512_mask", IX86_BUILTIN_PMOVSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
29996 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask, "__builtin_ia32_pmovsqw512_mask", IX86_BUILTIN_PMOVSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
29997 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16qiv16si2_mask, "__builtin_ia32_pmovsxbd512_mask", IX86_BUILTIN_PMOVSXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
29998 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8qiv8di2_mask, "__builtin_ia32_pmovsxbq512_mask", IX86_BUILTIN_PMOVSXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
29999 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8siv8di2_mask, "__builtin_ia32_pmovsxdq512_mask", IX86_BUILTIN_PMOVSXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
30000 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16hiv16si2_mask, "__builtin_ia32_pmovsxwd512_mask", IX86_BUILTIN_PMOVSXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
30001 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8hiv8di2_mask, "__builtin_ia32_pmovsxwq512_mask", IX86_BUILTIN_PMOVSXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
30002 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask, "__builtin_ia32_pmovusdb512_mask", IX86_BUILTIN_PMOVUSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
30003 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask, "__builtin_ia32_pmovusdw512_mask", IX86_BUILTIN_PMOVUSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
30004 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask, "__builtin_ia32_pmovusqb512_mask", IX86_BUILTIN_PMOVUSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
30005 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask, "__builtin_ia32_pmovusqd512_mask", IX86_BUILTIN_PMOVUSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
30006 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask, "__builtin_ia32_pmovusqw512_mask", IX86_BUILTIN_PMOVUSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
30007 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16qiv16si2_mask, "__builtin_ia32_pmovzxbd512_mask", IX86_BUILTIN_PMOVZXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
30008 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8qiv8di2_mask, "__builtin_ia32_pmovzxbq512_mask", IX86_BUILTIN_PMOVZXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
30009 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8siv8di2_mask, "__builtin_ia32_pmovzxdq512_mask", IX86_BUILTIN_PMOVZXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
30010 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16hiv16si2_mask, "__builtin_ia32_pmovzxwd512_mask", IX86_BUILTIN_PMOVZXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
30011 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8hiv8di2_mask, "__builtin_ia32_pmovzxwq512_mask", IX86_BUILTIN_PMOVZXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
30012 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_smult_even_v16si_mask, "__builtin_ia32_pmuldq512_mask", IX86_BUILTIN_PMULDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
30013 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16si3_mask, "__builtin_ia32_pmulld512_mask" , IX86_BUILTIN_PMULLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30014 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_umult_even_v16si_mask, "__builtin_ia32_pmuludq512_mask", IX86_BUILTIN_PMULUDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
30015 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv16si3_mask, "__builtin_ia32_pord512_mask", IX86_BUILTIN_PORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30016 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv8di3_mask, "__builtin_ia32_porq512_mask", IX86_BUILTIN_PORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30017 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv16si_mask, "__builtin_ia32_prold512_mask", IX86_BUILTIN_PROLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30018 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv8di_mask, "__builtin_ia32_prolq512_mask", IX86_BUILTIN_PROLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30019 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv16si_mask, "__builtin_ia32_prolvd512_mask", IX86_BUILTIN_PROLVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30020 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv8di_mask, "__builtin_ia32_prolvq512_mask", IX86_BUILTIN_PROLVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30021 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv16si_mask, "__builtin_ia32_prord512_mask", IX86_BUILTIN_PRORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30022 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv8di_mask, "__builtin_ia32_prorq512_mask", IX86_BUILTIN_PRORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30023 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv16si_mask, "__builtin_ia32_prorvd512_mask", IX86_BUILTIN_PRORVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30024 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv8di_mask, "__builtin_ia32_prorvq512_mask", IX86_BUILTIN_PRORVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30025 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pshufdv3_mask, "__builtin_ia32_pshufd512_mask", IX86_BUILTIN_PSHUFD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30026 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslld512_mask", IX86_BUILTIN_PSLLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
30027 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslldi512_mask", IX86_BUILTIN_PSLLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30028 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllq512_mask", IX86_BUILTIN_PSLLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
30029 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllqi512_mask", IX86_BUILTIN_PSLLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30030 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv16si_mask, "__builtin_ia32_psllv16si_mask", IX86_BUILTIN_PSLLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30031 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv8di_mask, "__builtin_ia32_psllv8di_mask", IX86_BUILTIN_PSLLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30032 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psrad512_mask", IX86_BUILTIN_PSRAD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
30033 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psradi512_mask", IX86_BUILTIN_PSRADI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30034 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraq512_mask", IX86_BUILTIN_PSRAQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
30035 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraqi512_mask", IX86_BUILTIN_PSRAQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30036 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv16si_mask, "__builtin_ia32_psrav16si_mask", IX86_BUILTIN_PSRAVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30037 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv8di_mask, "__builtin_ia32_psrav8di_mask", IX86_BUILTIN_PSRAVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30038 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrld512_mask", IX86_BUILTIN_PSRLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
30039 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrldi512_mask", IX86_BUILTIN_PSRLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30040 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlq512_mask", IX86_BUILTIN_PSRLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
30041 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlqi512_mask", IX86_BUILTIN_PSRLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30042 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv16si_mask, "__builtin_ia32_psrlv16si_mask", IX86_BUILTIN_PSRLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30043 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv8di_mask, "__builtin_ia32_psrlv8di_mask", IX86_BUILTIN_PSRLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30044 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16si3_mask, "__builtin_ia32_psubd512_mask", IX86_BUILTIN_PSUBD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30045 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8di3_mask, "__builtin_ia32_psubq512_mask", IX86_BUILTIN_PSUBQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30046 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv16si3_mask, "__builtin_ia32_ptestmd512", IX86_BUILTIN_PTESTMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30047 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv8di3_mask, "__builtin_ia32_ptestmq512", IX86_BUILTIN_PTESTMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30048 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512f_testnmv16si3_mask, "__builtin_ia32_ptestnmd512", IX86_BUILTIN_PTESTNMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30049 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512f_testnmv8di3_mask, "__builtin_ia32_ptestnmq512", IX86_BUILTIN_PTESTNMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30050 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv16si_mask, "__builtin_ia32_punpckhdq512_mask", IX86_BUILTIN_PUNPCKHDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30051 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv8di_mask, "__builtin_ia32_punpckhqdq512_mask", IX86_BUILTIN_PUNPCKHQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30052 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv16si_mask, "__builtin_ia32_punpckldq512_mask", IX86_BUILTIN_PUNPCKLDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30053 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv8di_mask, "__builtin_ia32_punpcklqdq512_mask", IX86_BUILTIN_PUNPCKLQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30054 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv16si3_mask, "__builtin_ia32_pxord512_mask", IX86_BUILTIN_PXORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30055 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv8di3_mask, "__builtin_ia32_pxorq512_mask", IX86_BUILTIN_PXORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30056 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v8df_mask, "__builtin_ia32_rcp14pd512_mask", IX86_BUILTIN_RCP14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30057 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v16sf_mask, "__builtin_ia32_rcp14ps512_mask", IX86_BUILTIN_RCP14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30058 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v2df, "__builtin_ia32_rcp14sd", IX86_BUILTIN_RCP14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30059 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v4sf, "__builtin_ia32_rcp14ss", IX86_BUILTIN_RCP14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30060 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30061 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30062 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v2df, "__builtin_ia32_rsqrt14sd", IX86_BUILTIN_RSQRT14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30063 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v4sf, "__builtin_ia32_rsqrt14ss", IX86_BUILTIN_RSQRT14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30064 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufpd512_mask, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
30065 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufps512_mask, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
30066 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
30067 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_mask", IX86_BUILTIN_SHUF_F64x2, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
30068 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_mask", IX86_BUILTIN_SHUF_I32x4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
30069 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_mask", IX86_BUILTIN_SHUF_I64x2, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
30070 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv16si3_mask, "__builtin_ia32_ucmpd512_mask", IX86_BUILTIN_UCMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
30071 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv8di3_mask, "__builtin_ia32_ucmpq512_mask", IX86_BUILTIN_UCMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
30072 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhpd512_mask, "__builtin_ia32_unpckhpd512_mask", IX86_BUILTIN_UNPCKHPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
30073 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhps512_mask, "__builtin_ia32_unpckhps512_mask", IX86_BUILTIN_UNPCKHPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
30074 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklpd512_mask, "__builtin_ia32_unpcklpd512_mask", IX86_BUILTIN_UNPCKLPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
30075 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklps512_mask, "__builtin_ia32_unpcklps512_mask", IX86_BUILTIN_UNPCKLPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
30076 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv16si2_mask, "__builtin_ia32_vplzcntd_512_mask", IX86_BUILTIN_VPCLZCNTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30077 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv8di2_mask, "__builtin_ia32_vplzcntq_512_mask", IX86_BUILTIN_VPCLZCNTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30078 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv16si_mask, "__builtin_ia32_vpconflictsi_512_mask", IX86_BUILTIN_VPCONFLICTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30079 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv8di_mask, "__builtin_ia32_vpconflictdi_512_mask", IX86_BUILTIN_VPCONFLICTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30080 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8df_mask, "__builtin_ia32_permdf512_mask", IX86_BUILTIN_VPERMDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
30081 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8di_mask, "__builtin_ia32_permdi512_mask", IX86_BUILTIN_VPERMDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30082 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16si3_mask, "__builtin_ia32_vpermi2vard512_mask", IX86_BUILTIN_VPERMI2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30083 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8df3_mask, "__builtin_ia32_vpermi2varpd512_mask", IX86_BUILTIN_VPERMI2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
30084 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16sf3_mask, "__builtin_ia32_vpermi2varps512_mask", IX86_BUILTIN_VPERMI2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
30085 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8di3_mask, "__builtin_ia32_vpermi2varq512_mask", IX86_BUILTIN_VPERMI2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30086 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv8df_mask, "__builtin_ia32_vpermilpd512_mask", IX86_BUILTIN_VPERMILPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
30087 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv16sf_mask, "__builtin_ia32_vpermilps512_mask", IX86_BUILTIN_VPERMILPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
30088 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv8df3_mask, "__builtin_ia32_vpermilvarpd512_mask", IX86_BUILTIN_VPERMILVARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
30089 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv16sf3_mask, "__builtin_ia32_vpermilvarps512_mask", IX86_BUILTIN_VPERMILVARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
30090 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_mask, "__builtin_ia32_vpermt2vard512_mask", IX86_BUILTIN_VPERMT2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30091 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_maskz, "__builtin_ia32_vpermt2vard512_maskz", IX86_BUILTIN_VPERMT2VARD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30092 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_mask, "__builtin_ia32_vpermt2varpd512_mask", IX86_BUILTIN_VPERMT2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
30093 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_maskz, "__builtin_ia32_vpermt2varpd512_maskz", IX86_BUILTIN_VPERMT2VARPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
30094 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_mask, "__builtin_ia32_vpermt2varps512_mask", IX86_BUILTIN_VPERMT2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
30095 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_maskz, "__builtin_ia32_vpermt2varps512_maskz", IX86_BUILTIN_VPERMT2VARPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
30096 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_mask, "__builtin_ia32_vpermt2varq512_mask", IX86_BUILTIN_VPERMT2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30097 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_maskz, "__builtin_ia32_vpermt2varq512_maskz", IX86_BUILTIN_VPERMT2VARQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30098 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8df_mask, "__builtin_ia32_permvardf512_mask", IX86_BUILTIN_VPERMVARDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
30099 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8di_mask, "__builtin_ia32_permvardi512_mask", IX86_BUILTIN_VPERMVARDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30100 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16sf_mask, "__builtin_ia32_permvarsf512_mask", IX86_BUILTIN_VPERMVARSF512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
30101 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16si_mask, "__builtin_ia32_permvarsi512_mask", IX86_BUILTIN_VPERMVARSI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30102 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_mask, "__builtin_ia32_pternlogd512_mask", IX86_BUILTIN_VTERNLOGD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
30103 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_maskz, "__builtin_ia32_pternlogd512_maskz", IX86_BUILTIN_VTERNLOGD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
30104 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_mask, "__builtin_ia32_pternlogq512_mask", IX86_BUILTIN_VTERNLOGQ512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
30105 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_maskz, "__builtin_ia32_pternlogq512_maskz", IX86_BUILTIN_VTERNLOGQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
30106
30107 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv16sf3, "__builtin_ia32_copysignps512", IX86_BUILTIN_CPYSGNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF },
30108 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv8df3, "__builtin_ia32_copysignpd512", IX86_BUILTIN_CPYSGNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF },
30109 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF },
30110 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sqrtv16sf2, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512, UNKNOWN, (int) V16SF_FTYPE_V16SF },
30111 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf, "__builtin_ia32_exp2ps", IX86_BUILTIN_EXP2PS, UNKNOWN, (int) V16SF_FTYPE_V16SF },
30112 { OPTION_MASK_ISA_AVX512F, CODE_FOR_roundv8df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix512", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF },
30113 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_floorpd_vec_pack_sfix512", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
30114 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
30115
30116 /* Mask arithmetic operations */
30117 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andhi3, "__builtin_ia32_kandhi", IX86_BUILTIN_KAND16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30118 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kandnhi, "__builtin_ia32_kandnhi", IX86_BUILTIN_KANDN16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30119 { OPTION_MASK_ISA_AVX512F, CODE_FOR_one_cmplhi2, "__builtin_ia32_knothi", IX86_BUILTIN_KNOT16, UNKNOWN, (int) HI_FTYPE_HI },
30120 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorhi3, "__builtin_ia32_korhi", IX86_BUILTIN_KOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30121 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestchi, "__builtin_ia32_kortestchi", IX86_BUILTIN_KORTESTC16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30122 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestzhi, "__builtin_ia32_kortestzhi", IX86_BUILTIN_KORTESTZ16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30123 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kunpckhi, "__builtin_ia32_kunpckhi", IX86_BUILTIN_KUNPCKBW, UNKNOWN, (int) HI_FTYPE_HI_HI },
30124 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kxnorhi, "__builtin_ia32_kxnorhi", IX86_BUILTIN_KXNOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30125 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorhi3, "__builtin_ia32_kxorhi", IX86_BUILTIN_KXOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30126 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kmovw, "__builtin_ia32_kmov16", IX86_BUILTIN_KMOV16, UNKNOWN, (int) HI_FTYPE_HI },
30127
30128 /* SHA */
30129 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg1, 0, IX86_BUILTIN_SHA1MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30130 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg2, 0, IX86_BUILTIN_SHA1MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30131 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1nexte, 0, IX86_BUILTIN_SHA1NEXTE, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30132 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1rnds4, 0, IX86_BUILTIN_SHA1RNDS4, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
30133 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg1, 0, IX86_BUILTIN_SHA256MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30134 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg2, 0, IX86_BUILTIN_SHA256MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30135 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256rnds2, 0, IX86_BUILTIN_SHA256RNDS2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
30136 };
30137
30138 /* Builtins with rounding support. */
30139 static const struct builtin_description bdesc_round_args[] =
30140 {
30141 /* AVX512F */
30142 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8df3_mask_round, "__builtin_ia32_addpd512_mask", IX86_BUILTIN_ADDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30143 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16sf3_mask_round, "__builtin_ia32_addps512_mask", IX86_BUILTIN_ADDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30144 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmaddv2df3_round, "__builtin_ia32_addsd_round", IX86_BUILTIN_ADDSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30145 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmaddv4sf3_round, "__builtin_ia32_addss_round", IX86_BUILTIN_ADDSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30146 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8df3_mask_round, "__builtin_ia32_cmppd512_mask", IX86_BUILTIN_CMPPD512, UNKNOWN, (int) QI_FTYPE_V8DF_V8DF_INT_QI_INT },
30147 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16sf3_mask_round, "__builtin_ia32_cmpps512_mask", IX86_BUILTIN_CMPPS512, UNKNOWN, (int) HI_FTYPE_V16SF_V16SF_INT_HI_INT },
30148 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv2df3_mask_round, "__builtin_ia32_cmpsd_mask", IX86_BUILTIN_CMPSD_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI_INT },
30149 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv4sf3_mask_round, "__builtin_ia32_cmpss_mask", IX86_BUILTIN_CMPSS_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI_INT },
30150 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_comi_round, "__builtin_ia32_vcomisd", IX86_BUILTIN_COMIDF, UNKNOWN, (int) INT_FTYPE_V2DF_V2DF_INT_INT },
30151 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_comi_round, "__builtin_ia32_vcomiss", IX86_BUILTIN_COMISF, UNKNOWN, (int) INT_FTYPE_V4SF_V4SF_INT_INT },
30152 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv16siv16sf2_mask_round, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
30153 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2dq512_mask_round, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30154 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2ps512_mask_round, "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT },
30155 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv8dfv8si_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30156 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtph2ps512_mask_round, "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT },
30157 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30158 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT },
30159 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30160 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2ss_round, "__builtin_ia32_cvtsd2ss_round", IX86_BUILTIN_CVTSD2SS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_INT },
30161 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq_round, "__builtin_ia32_cvtsi2sd64", IX86_BUILTIN_CVTSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT64_INT },
30162 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtsi2ss_round, "__builtin_ia32_cvtsi2ss32", IX86_BUILTIN_CVTSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_INT },
30163 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq_round, "__builtin_ia32_cvtsi2ss64", IX86_BUILTIN_CVTSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT64_INT },
30164 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtss2sd_round, "__builtin_ia32_cvtss2sd_round", IX86_BUILTIN_CVTSS2SD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_INT },
30165 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30166 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30167 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30168 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30169 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
30170 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2sd64_round, "__builtin_ia32_cvtusi2sd64", IX86_BUILTIN_CVTUSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT64_INT },
30171 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2ss32_round, "__builtin_ia32_cvtusi2ss32", IX86_BUILTIN_CVTUSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT_INT },
30172 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2ss64_round, "__builtin_ia32_cvtusi2ss64", IX86_BUILTIN_CVTUSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT64_INT },
30173 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv8df3_mask_round, "__builtin_ia32_divpd512_mask", IX86_BUILTIN_DIVPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30174 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv16sf3_mask_round, "__builtin_ia32_divps512_mask", IX86_BUILTIN_DIVPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30175 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmdivv2df3_round, "__builtin_ia32_divsd_round", IX86_BUILTIN_DIVSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30176 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmdivv4sf3_round, "__builtin_ia32_divss_round", IX86_BUILTIN_DIVSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30177 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_mask_round, "__builtin_ia32_fixupimmpd512_mask", IX86_BUILTIN_FIXUPIMMPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
30178 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_maskz_round, "__builtin_ia32_fixupimmpd512_maskz", IX86_BUILTIN_FIXUPIMMPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
30179 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_mask_round, "__builtin_ia32_fixupimmps512_mask", IX86_BUILTIN_FIXUPIMMPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
30180 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_maskz_round, "__builtin_ia32_fixupimmps512_maskz", IX86_BUILTIN_FIXUPIMMPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
30181 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_mask_round, "__builtin_ia32_fixupimmsd_mask", IX86_BUILTIN_FIXUPIMMSD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
30182 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_maskz_round, "__builtin_ia32_fixupimmsd_maskz", IX86_BUILTIN_FIXUPIMMSD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
30183 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_mask_round, "__builtin_ia32_fixupimmss_mask", IX86_BUILTIN_FIXUPIMMSS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
30184 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_maskz_round, "__builtin_ia32_fixupimmss_maskz", IX86_BUILTIN_FIXUPIMMSS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
30185 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv8df_mask_round, "__builtin_ia32_getexppd512_mask", IX86_BUILTIN_GETEXPPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30186 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv16sf_mask_round, "__builtin_ia32_getexpps512_mask", IX86_BUILTIN_GETEXPPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30187 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv2df_round, "__builtin_ia32_getexpsd128_round", IX86_BUILTIN_GETEXPSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30188 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv4sf_round, "__builtin_ia32_getexpss128_round", IX86_BUILTIN_GETEXPSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30189 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv8df_mask_round, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
30190 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv16sf_mask_round, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
30191 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv2df_round, "__builtin_ia32_getmantsd_round", IX86_BUILTIN_GETMANTSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
30192 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv4sf_round, "__builtin_ia32_getmantss_round", IX86_BUILTIN_GETMANTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
30193 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30194 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30195 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsmaxv2df3_round, "__builtin_ia32_maxsd_round", IX86_BUILTIN_MAXSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30196 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsmaxv4sf3_round, "__builtin_ia32_maxss_round", IX86_BUILTIN_MAXSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30197 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8df3_mask_round, "__builtin_ia32_minpd512_mask", IX86_BUILTIN_MINPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30198 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16sf3_mask_round, "__builtin_ia32_minps512_mask", IX86_BUILTIN_MINPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30199 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsminv2df3_round, "__builtin_ia32_minsd_round", IX86_BUILTIN_MINSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30200 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsminv4sf3_round, "__builtin_ia32_minss_round", IX86_BUILTIN_MINSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30201 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv8df3_mask_round, "__builtin_ia32_mulpd512_mask", IX86_BUILTIN_MULPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30202 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16sf3_mask_round, "__builtin_ia32_mulps512_mask", IX86_BUILTIN_MULPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30203 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmmulv2df3_round, "__builtin_ia32_mulsd_round", IX86_BUILTIN_MULSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30204 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmmulv4sf3_round, "__builtin_ia32_mulss_round", IX86_BUILTIN_MULSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30205 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev8df_mask_round, "__builtin_ia32_rndscalepd_mask", IX86_BUILTIN_RNDSCALEPD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
30206 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev16sf_mask_round, "__builtin_ia32_rndscaleps_mask", IX86_BUILTIN_RNDSCALEPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
30207 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev2df_round, "__builtin_ia32_rndscalesd_round", IX86_BUILTIN_RNDSCALESD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
30208 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev4sf_round, "__builtin_ia32_rndscaless_round", IX86_BUILTIN_RNDSCALESS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
30209 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv8df_mask_round, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30210 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv16sf_mask_round, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30211 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv2df_round, "__builtin_ia32_scalefsd_round", IX86_BUILTIN_SCALEFSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30212 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv4sf_round, "__builtin_ia32_scalefss_round", IX86_BUILTIN_SCALEFSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30213 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2_mask_round, "__builtin_ia32_sqrtpd512_mask", IX86_BUILTIN_SQRTPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30214 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv16sf2_mask_round, "__builtin_ia32_sqrtps512_mask", IX86_BUILTIN_SQRTPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30215 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsqrtv2df2_round, "__builtin_ia32_sqrtsd_round", IX86_BUILTIN_SQRTSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30216 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsqrtv4sf2_round, "__builtin_ia32_sqrtss_round", IX86_BUILTIN_SQRTSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30217 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8df3_mask_round, "__builtin_ia32_subpd512_mask", IX86_BUILTIN_SUBPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30218 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16sf3_mask_round, "__builtin_ia32_subps512_mask", IX86_BUILTIN_SUBPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30219 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsubv2df3_round, "__builtin_ia32_subsd_round", IX86_BUILTIN_SUBSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30220 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsubv4sf3_round, "__builtin_ia32_subss_round", IX86_BUILTIN_SUBSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30221 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2si_round, "__builtin_ia32_vcvtsd2si32", IX86_BUILTIN_VCVTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
30222 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq_round, "__builtin_ia32_vcvtsd2si64", IX86_BUILTIN_VCVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
30223 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtsd2usi_round, "__builtin_ia32_vcvtsd2usi32", IX86_BUILTIN_VCVTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
30224 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtsd2usiq_round, "__builtin_ia32_vcvtsd2usi64", IX86_BUILTIN_VCVTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
30225 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtss2si_round, "__builtin_ia32_vcvtss2si32", IX86_BUILTIN_VCVTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
30226 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq_round, "__builtin_ia32_vcvtss2si64", IX86_BUILTIN_VCVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
30227 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtss2usi_round, "__builtin_ia32_vcvtss2usi32", IX86_BUILTIN_VCVTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
30228 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtss2usiq_round, "__builtin_ia32_vcvtss2usi64", IX86_BUILTIN_VCVTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
30229 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvttsd2si_round, "__builtin_ia32_vcvttsd2si32", IX86_BUILTIN_VCVTTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
30230 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq_round, "__builtin_ia32_vcvttsd2si64", IX86_BUILTIN_VCVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
30231 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttsd2usi_round, "__builtin_ia32_vcvttsd2usi32", IX86_BUILTIN_VCVTTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
30232 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttsd2usiq_round, "__builtin_ia32_vcvttsd2usi64", IX86_BUILTIN_VCVTTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
30233 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvttss2si_round, "__builtin_ia32_vcvttss2si32", IX86_BUILTIN_VCVTTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
30234 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq_round, "__builtin_ia32_vcvttss2si64", IX86_BUILTIN_VCVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
30235 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttss2usi_round, "__builtin_ia32_vcvttss2usi32", IX86_BUILTIN_VCVTTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
30236 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttss2usiq_round, "__builtin_ia32_vcvttss2usi64", IX86_BUILTIN_VCVTTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
30237 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask_round, "__builtin_ia32_vfmaddpd512_mask", IX86_BUILTIN_VFMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30238 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask3_round, "__builtin_ia32_vfmaddpd512_mask3", IX86_BUILTIN_VFMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30239 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_maskz_round, "__builtin_ia32_vfmaddpd512_maskz", IX86_BUILTIN_VFMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30240 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask_round, "__builtin_ia32_vfmaddps512_mask", IX86_BUILTIN_VFMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30241 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask3_round, "__builtin_ia32_vfmaddps512_mask3", IX86_BUILTIN_VFMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30242 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_maskz_round, "__builtin_ia32_vfmaddps512_maskz", IX86_BUILTIN_VFMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30243 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v2df_round, "__builtin_ia32_vfmaddsd3_round", IX86_BUILTIN_VFMADDSD3_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_INT },
30244 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v4sf_round, "__builtin_ia32_vfmaddss3_round", IX86_BUILTIN_VFMADDSS3_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_INT },
30245 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask_round, "__builtin_ia32_vfmaddsubpd512_mask", IX86_BUILTIN_VFMADDSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30246 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask3_round, "__builtin_ia32_vfmaddsubpd512_mask3", IX86_BUILTIN_VFMADDSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30247 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_maskz_round, "__builtin_ia32_vfmaddsubpd512_maskz", IX86_BUILTIN_VFMADDSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30248 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask_round, "__builtin_ia32_vfmaddsubps512_mask", IX86_BUILTIN_VFMADDSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30249 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask3_round, "__builtin_ia32_vfmaddsubps512_mask3", IX86_BUILTIN_VFMADDSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30250 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_maskz_round, "__builtin_ia32_vfmaddsubps512_maskz", IX86_BUILTIN_VFMADDSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30251 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v8df_mask3_round, "__builtin_ia32_vfmsubaddpd512_mask3", IX86_BUILTIN_VFMSUBADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30252 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v16sf_mask3_round, "__builtin_ia32_vfmsubaddps512_mask3", IX86_BUILTIN_VFMSUBADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30253 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v8df_mask3_round, "__builtin_ia32_vfmsubpd512_mask3", IX86_BUILTIN_VFMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30254 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30255 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v8df_mask_round, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30256 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30257 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30258 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30259 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30260 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask3_round, "__builtin_ia32_vfnmsubps512_mask3", IX86_BUILTIN_VFNMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30261
30262 /* AVX512ER */
30263 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v8df_mask_round, "__builtin_ia32_exp2pd_mask", IX86_BUILTIN_EXP2PD_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30264 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf_mask_round, "__builtin_ia32_exp2ps_mask", IX86_BUILTIN_EXP2PS_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30265 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v8df_mask_round, "__builtin_ia32_rcp28pd_mask", IX86_BUILTIN_RCP28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30266 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v16sf_mask_round, "__builtin_ia32_rcp28ps_mask", IX86_BUILTIN_RCP28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30267 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v2df_round, "__builtin_ia32_rcp28sd_round", IX86_BUILTIN_RCP28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30268 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v4sf_round, "__builtin_ia32_rcp28ss_round", IX86_BUILTIN_RCP28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30269 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v8df_mask_round, "__builtin_ia32_rsqrt28pd_mask", IX86_BUILTIN_RSQRT28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30270 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v16sf_mask_round, "__builtin_ia32_rsqrt28ps_mask", IX86_BUILTIN_RSQRT28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30271 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v2df_round, "__builtin_ia32_rsqrt28sd_round", IX86_BUILTIN_RSQRT28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30272 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v4sf_round, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30273 };
30274
30275 /* FMA4 and XOP. */
30276 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
30277 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
30278 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
30279 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
30280 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
30281 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
30282 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
30283 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
30284 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
30285 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
30286 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
30287 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
30288 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
30289 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
30290 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
30291 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
30292 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
30293 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
30294 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
30295 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
30296 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
30297 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
30298 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
30299 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
30300 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
30301 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
30302 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
30303 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
30304 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
30305 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
30306 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
30307 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
30308 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
30309 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
30310 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
30311 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
30312 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
30313 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
30314 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
30315 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
30316 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
30317 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
30318 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
30319 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
30320 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
30321 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
30322 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
30323 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
30324 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
30325 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
30326 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
30327 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
30328
30329 static const struct builtin_description bdesc_multi_arg[] =
30330 {
30331 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
30332 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
30333 UNKNOWN, (int)MULTI_ARG_3_SF },
30334 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
30335 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
30336 UNKNOWN, (int)MULTI_ARG_3_DF },
30337
30338 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf,
30339 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3,
30340 UNKNOWN, (int)MULTI_ARG_3_SF },
30341 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df,
30342 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3,
30343 UNKNOWN, (int)MULTI_ARG_3_DF },
30344
30345 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
30346 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
30347 UNKNOWN, (int)MULTI_ARG_3_SF },
30348 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
30349 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
30350 UNKNOWN, (int)MULTI_ARG_3_DF },
30351 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
30352 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
30353 UNKNOWN, (int)MULTI_ARG_3_SF2 },
30354 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
30355 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
30356 UNKNOWN, (int)MULTI_ARG_3_DF2 },
30357
30358 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
30359 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
30360 UNKNOWN, (int)MULTI_ARG_3_SF },
30361 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
30362 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
30363 UNKNOWN, (int)MULTI_ARG_3_DF },
30364 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
30365 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
30366 UNKNOWN, (int)MULTI_ARG_3_SF2 },
30367 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
30368 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
30369 UNKNOWN, (int)MULTI_ARG_3_DF2 },
30370
30371 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
30372 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
30373 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
30374 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
30375 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
30376 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
30377 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
30378
30379 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
30380 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
30381 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
30382 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
30383 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
30384 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
30385 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
30386
30387 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
30388
30389 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
30390 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
30391 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30392 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30393 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
30394 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
30395 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30396 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30397 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30398 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30399 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30400 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30401
30402 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
30403 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
30404 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
30405 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
30406 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
30407 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
30408 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
30409 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
30410 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
30411 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
30412 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
30413 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
30414 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
30415 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
30416 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
30417 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
30418
30419 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_1_SF },
30420 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_1_DF },
30421 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
30422 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
30423 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
30424 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
30425
30426 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
30427 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
30428 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
30429 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
30430 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
30431 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
30432 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
30433 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
30434 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
30435 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
30436 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
30437 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
30438 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
30439 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
30440 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
30441
30442 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
30443 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
30444 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
30445 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
30446 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
30447 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
30448 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
30449
30450 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
30451 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
30452 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
30453 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
30454 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
30455 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
30456 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
30457
30458 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
30459 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
30460 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
30461 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
30462 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
30463 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
30464 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
30465
30466 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
30467 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
30468 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
30469 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
30470 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
30471 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
30472 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
30473
30474 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
30475 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
30476 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
30477 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
30478 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
30479 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
30480 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
30481
30482 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
30483 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
30484 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
30485 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
30486 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
30487 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
30488 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
30489
30490 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
30491 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
30492 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
30493 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
30494 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
30495 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
30496 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
30497
30498 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
30499 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
30500 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
30501 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
30502 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
30503 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
30504 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
30505
30506 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
30507 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
30508 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
30509 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
30510 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
30511 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
30512 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
30513 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
30514
30515 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
30516 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
30517 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
30518 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
30519 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
30520 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
30521 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
30522 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
30523
30524 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
30525 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
30526 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
30527 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
30528
30529 };
30530 \f
30531 /* TM vector builtins. */
30532
30533 /* Reuse the existing x86-specific `struct builtin_description' cause
30534 we're lazy. Add casts to make them fit. */
30535 static const struct builtin_description bdesc_tm[] =
30536 {
30537 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30538 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30539 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30540 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30541 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30542 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30543 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30544
30545 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30546 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30547 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30548 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30549 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30550 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30551 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30552
30553 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30554 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30555 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30556 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30557 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30558 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30559 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30560
30561 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
30562 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
30563 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
30564 };
30565
30566 /* TM callbacks. */
30567
30568 /* Return the builtin decl needed to load a vector of TYPE. */
30569
30570 static tree
30571 ix86_builtin_tm_load (tree type)
30572 {
30573 if (TREE_CODE (type) == VECTOR_TYPE)
30574 {
30575 switch (tree_to_uhwi (TYPE_SIZE (type)))
30576 {
30577 case 64:
30578 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64);
30579 case 128:
30580 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128);
30581 case 256:
30582 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256);
30583 }
30584 }
30585 return NULL_TREE;
30586 }
30587
30588 /* Return the builtin decl needed to store a vector of TYPE. */
30589
30590 static tree
30591 ix86_builtin_tm_store (tree type)
30592 {
30593 if (TREE_CODE (type) == VECTOR_TYPE)
30594 {
30595 switch (tree_to_uhwi (TYPE_SIZE (type)))
30596 {
30597 case 64:
30598 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64);
30599 case 128:
30600 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128);
30601 case 256:
30602 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256);
30603 }
30604 }
30605 return NULL_TREE;
30606 }
30607 \f
30608 /* Initialize the transactional memory vector load/store builtins. */
30609
30610 static void
30611 ix86_init_tm_builtins (void)
30612 {
30613 enum ix86_builtin_func_type ftype;
30614 const struct builtin_description *d;
30615 size_t i;
30616 tree decl;
30617 tree attrs_load, attrs_type_load, attrs_store, attrs_type_store;
30618 tree attrs_log, attrs_type_log;
30619
30620 if (!flag_tm)
30621 return;
30622
30623 /* If there are no builtins defined, we must be compiling in a
30624 language without trans-mem support. */
30625 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1))
30626 return;
30627
30628 /* Use whatever attributes a normal TM load has. */
30629 decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
30630 attrs_load = DECL_ATTRIBUTES (decl);
30631 attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30632 /* Use whatever attributes a normal TM store has. */
30633 decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1);
30634 attrs_store = DECL_ATTRIBUTES (decl);
30635 attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30636 /* Use whatever attributes a normal TM log has. */
30637 decl = builtin_decl_explicit (BUILT_IN_TM_LOG);
30638 attrs_log = DECL_ATTRIBUTES (decl);
30639 attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30640
30641 for (i = 0, d = bdesc_tm;
30642 i < ARRAY_SIZE (bdesc_tm);
30643 i++, d++)
30644 {
30645 if ((d->mask & ix86_isa_flags) != 0
30646 || (lang_hooks.builtin_function
30647 == lang_hooks.builtin_function_ext_scope))
30648 {
30649 tree type, attrs, attrs_type;
30650 enum built_in_function code = (enum built_in_function) d->code;
30651
30652 ftype = (enum ix86_builtin_func_type) d->flag;
30653 type = ix86_get_builtin_func_type (ftype);
30654
30655 if (BUILTIN_TM_LOAD_P (code))
30656 {
30657 attrs = attrs_load;
30658 attrs_type = attrs_type_load;
30659 }
30660 else if (BUILTIN_TM_STORE_P (code))
30661 {
30662 attrs = attrs_store;
30663 attrs_type = attrs_type_store;
30664 }
30665 else
30666 {
30667 attrs = attrs_log;
30668 attrs_type = attrs_type_log;
30669 }
30670 decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL,
30671 /* The builtin without the prefix for
30672 calling it directly. */
30673 d->name + strlen ("__builtin_"),
30674 attrs);
30675 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
30676 set the TYPE_ATTRIBUTES. */
30677 decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN);
30678
30679 set_builtin_decl (code, decl, false);
30680 }
30681 }
30682 }
30683
30684 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
30685 in the current target ISA to allow the user to compile particular modules
30686 with different target specific options that differ from the command line
30687 options. */
30688 static void
30689 ix86_init_mmx_sse_builtins (void)
30690 {
30691 const struct builtin_description * d;
30692 enum ix86_builtin_func_type ftype;
30693 size_t i;
30694
30695 /* Add all special builtins with variable number of operands. */
30696 for (i = 0, d = bdesc_special_args;
30697 i < ARRAY_SIZE (bdesc_special_args);
30698 i++, d++)
30699 {
30700 if (d->name == 0)
30701 continue;
30702
30703 ftype = (enum ix86_builtin_func_type) d->flag;
30704 def_builtin (d->mask, d->name, ftype, d->code);
30705 }
30706
30707 /* Add all builtins with variable number of operands. */
30708 for (i = 0, d = bdesc_args;
30709 i < ARRAY_SIZE (bdesc_args);
30710 i++, d++)
30711 {
30712 if (d->name == 0)
30713 continue;
30714
30715 ftype = (enum ix86_builtin_func_type) d->flag;
30716 def_builtin_const (d->mask, d->name, ftype, d->code);
30717 }
30718
30719 /* Add all builtins with rounding. */
30720 for (i = 0, d = bdesc_round_args;
30721 i < ARRAY_SIZE (bdesc_round_args);
30722 i++, d++)
30723 {
30724 if (d->name == 0)
30725 continue;
30726
30727 ftype = (enum ix86_builtin_func_type) d->flag;
30728 def_builtin_const (d->mask, d->name, ftype, d->code);
30729 }
30730
30731 /* pcmpestr[im] insns. */
30732 for (i = 0, d = bdesc_pcmpestr;
30733 i < ARRAY_SIZE (bdesc_pcmpestr);
30734 i++, d++)
30735 {
30736 if (d->code == IX86_BUILTIN_PCMPESTRM128)
30737 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
30738 else
30739 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
30740 def_builtin_const (d->mask, d->name, ftype, d->code);
30741 }
30742
30743 /* pcmpistr[im] insns. */
30744 for (i = 0, d = bdesc_pcmpistr;
30745 i < ARRAY_SIZE (bdesc_pcmpistr);
30746 i++, d++)
30747 {
30748 if (d->code == IX86_BUILTIN_PCMPISTRM128)
30749 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
30750 else
30751 ftype = INT_FTYPE_V16QI_V16QI_INT;
30752 def_builtin_const (d->mask, d->name, ftype, d->code);
30753 }
30754
30755 /* comi/ucomi insns. */
30756 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
30757 {
30758 if (d->mask == OPTION_MASK_ISA_SSE2)
30759 ftype = INT_FTYPE_V2DF_V2DF;
30760 else
30761 ftype = INT_FTYPE_V4SF_V4SF;
30762 def_builtin_const (d->mask, d->name, ftype, d->code);
30763 }
30764
30765 /* SSE */
30766 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
30767 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
30768 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
30769 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
30770
30771 /* SSE or 3DNow!A */
30772 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
30773 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
30774 IX86_BUILTIN_MASKMOVQ);
30775
30776 /* SSE2 */
30777 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
30778 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
30779
30780 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
30781 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
30782 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
30783 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
30784
30785 /* SSE3. */
30786 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
30787 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
30788 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
30789 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
30790
30791 /* AES */
30792 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
30793 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
30794 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
30795 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
30796 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
30797 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
30798 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
30799 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
30800 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
30801 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
30802 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
30803 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
30804
30805 /* PCLMUL */
30806 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
30807 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
30808
30809 /* RDRND */
30810 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
30811 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
30812 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
30813 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
30814 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
30815 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
30816 IX86_BUILTIN_RDRAND64_STEP);
30817
30818 /* AVX2 */
30819 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
30820 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
30821 IX86_BUILTIN_GATHERSIV2DF);
30822
30823 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
30824 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
30825 IX86_BUILTIN_GATHERSIV4DF);
30826
30827 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
30828 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
30829 IX86_BUILTIN_GATHERDIV2DF);
30830
30831 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
30832 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
30833 IX86_BUILTIN_GATHERDIV4DF);
30834
30835 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
30836 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
30837 IX86_BUILTIN_GATHERSIV4SF);
30838
30839 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
30840 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
30841 IX86_BUILTIN_GATHERSIV8SF);
30842
30843 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
30844 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
30845 IX86_BUILTIN_GATHERDIV4SF);
30846
30847 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
30848 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
30849 IX86_BUILTIN_GATHERDIV8SF);
30850
30851 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
30852 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
30853 IX86_BUILTIN_GATHERSIV2DI);
30854
30855 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
30856 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
30857 IX86_BUILTIN_GATHERSIV4DI);
30858
30859 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
30860 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
30861 IX86_BUILTIN_GATHERDIV2DI);
30862
30863 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
30864 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
30865 IX86_BUILTIN_GATHERDIV4DI);
30866
30867 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
30868 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
30869 IX86_BUILTIN_GATHERSIV4SI);
30870
30871 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
30872 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
30873 IX86_BUILTIN_GATHERSIV8SI);
30874
30875 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
30876 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
30877 IX86_BUILTIN_GATHERDIV4SI);
30878
30879 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
30880 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
30881 IX86_BUILTIN_GATHERDIV8SI);
30882
30883 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ",
30884 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
30885 IX86_BUILTIN_GATHERALTSIV4DF);
30886
30887 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ",
30888 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
30889 IX86_BUILTIN_GATHERALTDIV8SF);
30890
30891 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ",
30892 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
30893 IX86_BUILTIN_GATHERALTSIV4DI);
30894
30895 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ",
30896 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
30897 IX86_BUILTIN_GATHERALTDIV8SI);
30898
30899 /* AVX512F */
30900 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16sf",
30901 V16SF_FTYPE_V16SF_PCFLOAT_V16SI_HI_INT,
30902 IX86_BUILTIN_GATHER3SIV16SF);
30903
30904 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8df",
30905 V8DF_FTYPE_V8DF_PCDOUBLE_V8SI_QI_INT,
30906 IX86_BUILTIN_GATHER3SIV8DF);
30907
30908 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16sf",
30909 V8SF_FTYPE_V8SF_PCFLOAT_V8DI_QI_INT,
30910 IX86_BUILTIN_GATHER3DIV16SF);
30911
30912 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8df",
30913 V8DF_FTYPE_V8DF_PCDOUBLE_V8DI_QI_INT,
30914 IX86_BUILTIN_GATHER3DIV8DF);
30915
30916 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16si",
30917 V16SI_FTYPE_V16SI_PCINT_V16SI_HI_INT,
30918 IX86_BUILTIN_GATHER3SIV16SI);
30919
30920 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8di",
30921 V8DI_FTYPE_V8DI_PCINT64_V8SI_QI_INT,
30922 IX86_BUILTIN_GATHER3SIV8DI);
30923
30924 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16si",
30925 V8SI_FTYPE_V8SI_PCINT_V8DI_QI_INT,
30926 IX86_BUILTIN_GATHER3DIV16SI);
30927
30928 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8di",
30929 V8DI_FTYPE_V8DI_PCINT64_V8DI_QI_INT,
30930 IX86_BUILTIN_GATHER3DIV8DI);
30931
30932 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8df ",
30933 V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT,
30934 IX86_BUILTIN_GATHER3ALTSIV8DF);
30935
30936 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8sf ",
30937 V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT,
30938 IX86_BUILTIN_GATHER3ALTDIV16SF);
30939
30940 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8di ",
30941 V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT,
30942 IX86_BUILTIN_GATHER3ALTSIV8DI);
30943
30944 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8si ",
30945 V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT,
30946 IX86_BUILTIN_GATHER3ALTDIV16SI);
30947
30948 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16sf",
30949 VOID_FTYPE_PFLOAT_HI_V16SI_V16SF_INT,
30950 IX86_BUILTIN_SCATTERSIV16SF);
30951
30952 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8df",
30953 VOID_FTYPE_PDOUBLE_QI_V8SI_V8DF_INT,
30954 IX86_BUILTIN_SCATTERSIV8DF);
30955
30956 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16sf",
30957 VOID_FTYPE_PFLOAT_QI_V8DI_V8SF_INT,
30958 IX86_BUILTIN_SCATTERDIV16SF);
30959
30960 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8df",
30961 VOID_FTYPE_PDOUBLE_QI_V8DI_V8DF_INT,
30962 IX86_BUILTIN_SCATTERDIV8DF);
30963
30964 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16si",
30965 VOID_FTYPE_PINT_HI_V16SI_V16SI_INT,
30966 IX86_BUILTIN_SCATTERSIV16SI);
30967
30968 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8di",
30969 VOID_FTYPE_PLONGLONG_QI_V8SI_V8DI_INT,
30970 IX86_BUILTIN_SCATTERSIV8DI);
30971
30972 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16si",
30973 VOID_FTYPE_PINT_QI_V8DI_V8SI_INT,
30974 IX86_BUILTIN_SCATTERDIV16SI);
30975
30976 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8di",
30977 VOID_FTYPE_PLONGLONG_QI_V8DI_V8DI_INT,
30978 IX86_BUILTIN_SCATTERDIV8DI);
30979
30980 /* AVX512PF */
30981 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdpd",
30982 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
30983 IX86_BUILTIN_GATHERPFDPD);
30984 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdps",
30985 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
30986 IX86_BUILTIN_GATHERPFDPS);
30987 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqpd",
30988 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
30989 IX86_BUILTIN_GATHERPFQPD);
30990 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqps",
30991 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
30992 IX86_BUILTIN_GATHERPFQPS);
30993 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdpd",
30994 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
30995 IX86_BUILTIN_SCATTERPFDPD);
30996 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdps",
30997 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
30998 IX86_BUILTIN_SCATTERPFDPS);
30999 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqpd",
31000 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
31001 IX86_BUILTIN_SCATTERPFQPD);
31002 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqps",
31003 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
31004 IX86_BUILTIN_SCATTERPFQPS);
31005
31006 /* SHA */
31007 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg1",
31008 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG1);
31009 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg2",
31010 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG2);
31011 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1nexte",
31012 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1NEXTE);
31013 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1rnds4",
31014 V4SI_FTYPE_V4SI_V4SI_INT, IX86_BUILTIN_SHA1RNDS4);
31015 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg1",
31016 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG1);
31017 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg2",
31018 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG2);
31019 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256rnds2",
31020 V4SI_FTYPE_V4SI_V4SI_V4SI, IX86_BUILTIN_SHA256RNDS2);
31021
31022 /* RTM. */
31023 def_builtin (OPTION_MASK_ISA_RTM, "__builtin_ia32_xabort",
31024 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT);
31025
31026 /* MMX access to the vec_init patterns. */
31027 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
31028 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
31029
31030 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
31031 V4HI_FTYPE_HI_HI_HI_HI,
31032 IX86_BUILTIN_VEC_INIT_V4HI);
31033
31034 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
31035 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
31036 IX86_BUILTIN_VEC_INIT_V8QI);
31037
31038 /* Access to the vec_extract patterns. */
31039 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
31040 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
31041 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
31042 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
31043 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
31044 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
31045 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
31046 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
31047 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
31048 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
31049
31050 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
31051 "__builtin_ia32_vec_ext_v4hi",
31052 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
31053
31054 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
31055 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
31056
31057 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
31058 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
31059
31060 /* Access to the vec_set patterns. */
31061 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
31062 "__builtin_ia32_vec_set_v2di",
31063 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
31064
31065 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
31066 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
31067
31068 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
31069 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
31070
31071 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
31072 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
31073
31074 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
31075 "__builtin_ia32_vec_set_v4hi",
31076 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
31077
31078 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
31079 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
31080
31081 /* RDSEED */
31082 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_hi_step",
31083 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDSEED16_STEP);
31084 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_si_step",
31085 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDSEED32_STEP);
31086 def_builtin (OPTION_MASK_ISA_RDSEED | OPTION_MASK_ISA_64BIT,
31087 "__builtin_ia32_rdseed_di_step",
31088 INT_FTYPE_PULONGLONG, IX86_BUILTIN_RDSEED64_STEP);
31089
31090 /* ADCX */
31091 def_builtin (0, "__builtin_ia32_addcarryx_u32",
31092 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_ADDCARRYX32);
31093 def_builtin (OPTION_MASK_ISA_64BIT,
31094 "__builtin_ia32_addcarryx_u64",
31095 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
31096 IX86_BUILTIN_ADDCARRYX64);
31097
31098 /* Read/write FLAGS. */
31099 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u32",
31100 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
31101 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u64",
31102 UINT64_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
31103 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u32",
31104 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_WRITE_FLAGS);
31105 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u64",
31106 VOID_FTYPE_UINT64, IX86_BUILTIN_WRITE_FLAGS);
31107
31108
31109 /* Add FMA4 multi-arg argument instructions */
31110 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
31111 {
31112 if (d->name == 0)
31113 continue;
31114
31115 ftype = (enum ix86_builtin_func_type) d->flag;
31116 def_builtin_const (d->mask, d->name, ftype, d->code);
31117 }
31118 }
31119
31120 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
31121 to return a pointer to VERSION_DECL if the outcome of the expression
31122 formed by PREDICATE_CHAIN is true. This function will be called during
31123 version dispatch to decide which function version to execute. It returns
31124 the basic block at the end, to which more conditions can be added. */
31125
31126 static basic_block
31127 add_condition_to_bb (tree function_decl, tree version_decl,
31128 tree predicate_chain, basic_block new_bb)
31129 {
31130 gimple return_stmt;
31131 tree convert_expr, result_var;
31132 gimple convert_stmt;
31133 gimple call_cond_stmt;
31134 gimple if_else_stmt;
31135
31136 basic_block bb1, bb2, bb3;
31137 edge e12, e23;
31138
31139 tree cond_var, and_expr_var = NULL_TREE;
31140 gimple_seq gseq;
31141
31142 tree predicate_decl, predicate_arg;
31143
31144 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
31145
31146 gcc_assert (new_bb != NULL);
31147 gseq = bb_seq (new_bb);
31148
31149
31150 convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
31151 build_fold_addr_expr (version_decl));
31152 result_var = create_tmp_var (ptr_type_node, NULL);
31153 convert_stmt = gimple_build_assign (result_var, convert_expr);
31154 return_stmt = gimple_build_return (result_var);
31155
31156 if (predicate_chain == NULL_TREE)
31157 {
31158 gimple_seq_add_stmt (&gseq, convert_stmt);
31159 gimple_seq_add_stmt (&gseq, return_stmt);
31160 set_bb_seq (new_bb, gseq);
31161 gimple_set_bb (convert_stmt, new_bb);
31162 gimple_set_bb (return_stmt, new_bb);
31163 pop_cfun ();
31164 return new_bb;
31165 }
31166
31167 while (predicate_chain != NULL)
31168 {
31169 cond_var = create_tmp_var (integer_type_node, NULL);
31170 predicate_decl = TREE_PURPOSE (predicate_chain);
31171 predicate_arg = TREE_VALUE (predicate_chain);
31172 call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
31173 gimple_call_set_lhs (call_cond_stmt, cond_var);
31174
31175 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
31176 gimple_set_bb (call_cond_stmt, new_bb);
31177 gimple_seq_add_stmt (&gseq, call_cond_stmt);
31178
31179 predicate_chain = TREE_CHAIN (predicate_chain);
31180
31181 if (and_expr_var == NULL)
31182 and_expr_var = cond_var;
31183 else
31184 {
31185 gimple assign_stmt;
31186 /* Use MIN_EXPR to check if any integer is zero?.
31187 and_expr_var = min_expr <cond_var, and_expr_var> */
31188 assign_stmt = gimple_build_assign (and_expr_var,
31189 build2 (MIN_EXPR, integer_type_node,
31190 cond_var, and_expr_var));
31191
31192 gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
31193 gimple_set_bb (assign_stmt, new_bb);
31194 gimple_seq_add_stmt (&gseq, assign_stmt);
31195 }
31196 }
31197
31198 if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
31199 integer_zero_node,
31200 NULL_TREE, NULL_TREE);
31201 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
31202 gimple_set_bb (if_else_stmt, new_bb);
31203 gimple_seq_add_stmt (&gseq, if_else_stmt);
31204
31205 gimple_seq_add_stmt (&gseq, convert_stmt);
31206 gimple_seq_add_stmt (&gseq, return_stmt);
31207 set_bb_seq (new_bb, gseq);
31208
31209 bb1 = new_bb;
31210 e12 = split_block (bb1, if_else_stmt);
31211 bb2 = e12->dest;
31212 e12->flags &= ~EDGE_FALLTHRU;
31213 e12->flags |= EDGE_TRUE_VALUE;
31214
31215 e23 = split_block (bb2, return_stmt);
31216
31217 gimple_set_bb (convert_stmt, bb2);
31218 gimple_set_bb (return_stmt, bb2);
31219
31220 bb3 = e23->dest;
31221 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
31222
31223 remove_edge (e23);
31224 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
31225
31226 pop_cfun ();
31227
31228 return bb3;
31229 }
31230
31231 /* This parses the attribute arguments to target in DECL and determines
31232 the right builtin to use to match the platform specification.
31233 It returns the priority value for this version decl. If PREDICATE_LIST
31234 is not NULL, it stores the list of cpu features that need to be checked
31235 before dispatching this function. */
31236
31237 static unsigned int
31238 get_builtin_code_for_version (tree decl, tree *predicate_list)
31239 {
31240 tree attrs;
31241 struct cl_target_option cur_target;
31242 tree target_node;
31243 struct cl_target_option *new_target;
31244 const char *arg_str = NULL;
31245 const char *attrs_str = NULL;
31246 char *tok_str = NULL;
31247 char *token;
31248
31249 /* Priority of i386 features, greater value is higher priority. This is
31250 used to decide the order in which function dispatch must happen. For
31251 instance, a version specialized for SSE4.2 should be checked for dispatch
31252 before a version for SSE3, as SSE4.2 implies SSE3. */
31253 enum feature_priority
31254 {
31255 P_ZERO = 0,
31256 P_MMX,
31257 P_SSE,
31258 P_SSE2,
31259 P_SSE3,
31260 P_SSSE3,
31261 P_PROC_SSSE3,
31262 P_SSE4_A,
31263 P_PROC_SSE4_A,
31264 P_SSE4_1,
31265 P_SSE4_2,
31266 P_PROC_SSE4_2,
31267 P_POPCNT,
31268 P_AVX,
31269 P_PROC_AVX,
31270 P_FMA4,
31271 P_XOP,
31272 P_PROC_XOP,
31273 P_FMA,
31274 P_PROC_FMA,
31275 P_AVX2,
31276 P_PROC_AVX2
31277 };
31278
31279 enum feature_priority priority = P_ZERO;
31280
31281 /* These are the target attribute strings for which a dispatcher is
31282 available, from fold_builtin_cpu. */
31283
31284 static struct _feature_list
31285 {
31286 const char *const name;
31287 const enum feature_priority priority;
31288 }
31289 const feature_list[] =
31290 {
31291 {"mmx", P_MMX},
31292 {"sse", P_SSE},
31293 {"sse2", P_SSE2},
31294 {"sse3", P_SSE3},
31295 {"sse4a", P_SSE4_A},
31296 {"ssse3", P_SSSE3},
31297 {"sse4.1", P_SSE4_1},
31298 {"sse4.2", P_SSE4_2},
31299 {"popcnt", P_POPCNT},
31300 {"avx", P_AVX},
31301 {"fma4", P_FMA4},
31302 {"xop", P_XOP},
31303 {"fma", P_FMA},
31304 {"avx2", P_AVX2}
31305 };
31306
31307
31308 static unsigned int NUM_FEATURES
31309 = sizeof (feature_list) / sizeof (struct _feature_list);
31310
31311 unsigned int i;
31312
31313 tree predicate_chain = NULL_TREE;
31314 tree predicate_decl, predicate_arg;
31315
31316 attrs = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
31317 gcc_assert (attrs != NULL);
31318
31319 attrs = TREE_VALUE (TREE_VALUE (attrs));
31320
31321 gcc_assert (TREE_CODE (attrs) == STRING_CST);
31322 attrs_str = TREE_STRING_POINTER (attrs);
31323
31324 /* Return priority zero for default function. */
31325 if (strcmp (attrs_str, "default") == 0)
31326 return 0;
31327
31328 /* Handle arch= if specified. For priority, set it to be 1 more than
31329 the best instruction set the processor can handle. For instance, if
31330 there is a version for atom and a version for ssse3 (the highest ISA
31331 priority for atom), the atom version must be checked for dispatch
31332 before the ssse3 version. */
31333 if (strstr (attrs_str, "arch=") != NULL)
31334 {
31335 cl_target_option_save (&cur_target, &global_options);
31336 target_node = ix86_valid_target_attribute_tree (attrs, &global_options,
31337 &global_options_set);
31338
31339 gcc_assert (target_node);
31340 new_target = TREE_TARGET_OPTION (target_node);
31341 gcc_assert (new_target);
31342
31343 if (new_target->arch_specified && new_target->arch > 0)
31344 {
31345 switch (new_target->arch)
31346 {
31347 case PROCESSOR_CORE2:
31348 arg_str = "core2";
31349 priority = P_PROC_SSSE3;
31350 break;
31351 case PROCESSOR_NEHALEM:
31352 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AES)
31353 arg_str = "westmere";
31354 else
31355 /* We translate "arch=corei7" and "arch=nehalem" to
31356 "corei7" so that it will be mapped to M_INTEL_COREI7
31357 as cpu type to cover all M_INTEL_COREI7_XXXs. */
31358 arg_str = "corei7";
31359 priority = P_PROC_SSE4_2;
31360 break;
31361 case PROCESSOR_SANDYBRIDGE:
31362 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_F16C)
31363 arg_str = "ivybridge";
31364 else
31365 arg_str = "sandybridge";
31366 priority = P_PROC_AVX;
31367 break;
31368 case PROCESSOR_HASWELL:
31369 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_ADX)
31370 arg_str = "broadwell";
31371 else
31372 arg_str = "haswell";
31373 priority = P_PROC_AVX2;
31374 break;
31375 case PROCESSOR_BONNELL:
31376 arg_str = "bonnell";
31377 priority = P_PROC_SSSE3;
31378 break;
31379 case PROCESSOR_SILVERMONT:
31380 arg_str = "silvermont";
31381 priority = P_PROC_SSE4_2;
31382 break;
31383 case PROCESSOR_AMDFAM10:
31384 arg_str = "amdfam10h";
31385 priority = P_PROC_SSE4_A;
31386 break;
31387 case PROCESSOR_BTVER1:
31388 arg_str = "btver1";
31389 priority = P_PROC_SSE4_A;
31390 break;
31391 case PROCESSOR_BTVER2:
31392 arg_str = "btver2";
31393 priority = P_PROC_AVX;
31394 break;
31395 case PROCESSOR_BDVER1:
31396 arg_str = "bdver1";
31397 priority = P_PROC_XOP;
31398 break;
31399 case PROCESSOR_BDVER2:
31400 arg_str = "bdver2";
31401 priority = P_PROC_FMA;
31402 break;
31403 case PROCESSOR_BDVER3:
31404 arg_str = "bdver3";
31405 priority = P_PROC_FMA;
31406 break;
31407 case PROCESSOR_BDVER4:
31408 arg_str = "bdver4";
31409 priority = P_PROC_AVX2;
31410 break;
31411 }
31412 }
31413
31414 cl_target_option_restore (&global_options, &cur_target);
31415
31416 if (predicate_list && arg_str == NULL)
31417 {
31418 error_at (DECL_SOURCE_LOCATION (decl),
31419 "No dispatcher found for the versioning attributes");
31420 return 0;
31421 }
31422
31423 if (predicate_list)
31424 {
31425 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS];
31426 /* For a C string literal the length includes the trailing NULL. */
31427 predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
31428 predicate_chain = tree_cons (predicate_decl, predicate_arg,
31429 predicate_chain);
31430 }
31431 }
31432
31433 /* Process feature name. */
31434 tok_str = (char *) xmalloc (strlen (attrs_str) + 1);
31435 strcpy (tok_str, attrs_str);
31436 token = strtok (tok_str, ",");
31437 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_SUPPORTS];
31438
31439 while (token != NULL)
31440 {
31441 /* Do not process "arch=" */
31442 if (strncmp (token, "arch=", 5) == 0)
31443 {
31444 token = strtok (NULL, ",");
31445 continue;
31446 }
31447 for (i = 0; i < NUM_FEATURES; ++i)
31448 {
31449 if (strcmp (token, feature_list[i].name) == 0)
31450 {
31451 if (predicate_list)
31452 {
31453 predicate_arg = build_string_literal (
31454 strlen (feature_list[i].name) + 1,
31455 feature_list[i].name);
31456 predicate_chain = tree_cons (predicate_decl, predicate_arg,
31457 predicate_chain);
31458 }
31459 /* Find the maximum priority feature. */
31460 if (feature_list[i].priority > priority)
31461 priority = feature_list[i].priority;
31462
31463 break;
31464 }
31465 }
31466 if (predicate_list && i == NUM_FEATURES)
31467 {
31468 error_at (DECL_SOURCE_LOCATION (decl),
31469 "No dispatcher found for %s", token);
31470 return 0;
31471 }
31472 token = strtok (NULL, ",");
31473 }
31474 free (tok_str);
31475
31476 if (predicate_list && predicate_chain == NULL_TREE)
31477 {
31478 error_at (DECL_SOURCE_LOCATION (decl),
31479 "No dispatcher found for the versioning attributes : %s",
31480 attrs_str);
31481 return 0;
31482 }
31483 else if (predicate_list)
31484 {
31485 predicate_chain = nreverse (predicate_chain);
31486 *predicate_list = predicate_chain;
31487 }
31488
31489 return priority;
31490 }
31491
31492 /* This compares the priority of target features in function DECL1
31493 and DECL2. It returns positive value if DECL1 is higher priority,
31494 negative value if DECL2 is higher priority and 0 if they are the
31495 same. */
31496
31497 static int
31498 ix86_compare_version_priority (tree decl1, tree decl2)
31499 {
31500 unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
31501 unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
31502
31503 return (int)priority1 - (int)priority2;
31504 }
31505
31506 /* V1 and V2 point to function versions with different priorities
31507 based on the target ISA. This function compares their priorities. */
31508
31509 static int
31510 feature_compare (const void *v1, const void *v2)
31511 {
31512 typedef struct _function_version_info
31513 {
31514 tree version_decl;
31515 tree predicate_chain;
31516 unsigned int dispatch_priority;
31517 } function_version_info;
31518
31519 const function_version_info c1 = *(const function_version_info *)v1;
31520 const function_version_info c2 = *(const function_version_info *)v2;
31521 return (c2.dispatch_priority - c1.dispatch_priority);
31522 }
31523
31524 /* This function generates the dispatch function for
31525 multi-versioned functions. DISPATCH_DECL is the function which will
31526 contain the dispatch logic. FNDECLS are the function choices for
31527 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
31528 in DISPATCH_DECL in which the dispatch code is generated. */
31529
31530 static int
31531 dispatch_function_versions (tree dispatch_decl,
31532 void *fndecls_p,
31533 basic_block *empty_bb)
31534 {
31535 tree default_decl;
31536 gimple ifunc_cpu_init_stmt;
31537 gimple_seq gseq;
31538 int ix;
31539 tree ele;
31540 vec<tree> *fndecls;
31541 unsigned int num_versions = 0;
31542 unsigned int actual_versions = 0;
31543 unsigned int i;
31544
31545 struct _function_version_info
31546 {
31547 tree version_decl;
31548 tree predicate_chain;
31549 unsigned int dispatch_priority;
31550 }*function_version_info;
31551
31552 gcc_assert (dispatch_decl != NULL
31553 && fndecls_p != NULL
31554 && empty_bb != NULL);
31555
31556 /*fndecls_p is actually a vector. */
31557 fndecls = static_cast<vec<tree> *> (fndecls_p);
31558
31559 /* At least one more version other than the default. */
31560 num_versions = fndecls->length ();
31561 gcc_assert (num_versions >= 2);
31562
31563 function_version_info = (struct _function_version_info *)
31564 XNEWVEC (struct _function_version_info, (num_versions - 1));
31565
31566 /* The first version in the vector is the default decl. */
31567 default_decl = (*fndecls)[0];
31568
31569 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
31570
31571 gseq = bb_seq (*empty_bb);
31572 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
31573 constructors, so explicity call __builtin_cpu_init here. */
31574 ifunc_cpu_init_stmt = gimple_build_call_vec (
31575 ix86_builtins [(int) IX86_BUILTIN_CPU_INIT], vNULL);
31576 gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
31577 gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
31578 set_bb_seq (*empty_bb, gseq);
31579
31580 pop_cfun ();
31581
31582
31583 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
31584 {
31585 tree version_decl = ele;
31586 tree predicate_chain = NULL_TREE;
31587 unsigned int priority;
31588 /* Get attribute string, parse it and find the right predicate decl.
31589 The predicate function could be a lengthy combination of many
31590 features, like arch-type and various isa-variants. */
31591 priority = get_builtin_code_for_version (version_decl,
31592 &predicate_chain);
31593
31594 if (predicate_chain == NULL_TREE)
31595 continue;
31596
31597 function_version_info [actual_versions].version_decl = version_decl;
31598 function_version_info [actual_versions].predicate_chain
31599 = predicate_chain;
31600 function_version_info [actual_versions].dispatch_priority = priority;
31601 actual_versions++;
31602 }
31603
31604 /* Sort the versions according to descending order of dispatch priority. The
31605 priority is based on the ISA. This is not a perfect solution. There
31606 could still be ambiguity. If more than one function version is suitable
31607 to execute, which one should be dispatched? In future, allow the user
31608 to specify a dispatch priority next to the version. */
31609 qsort (function_version_info, actual_versions,
31610 sizeof (struct _function_version_info), feature_compare);
31611
31612 for (i = 0; i < actual_versions; ++i)
31613 *empty_bb = add_condition_to_bb (dispatch_decl,
31614 function_version_info[i].version_decl,
31615 function_version_info[i].predicate_chain,
31616 *empty_bb);
31617
31618 /* dispatch default version at the end. */
31619 *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
31620 NULL, *empty_bb);
31621
31622 free (function_version_info);
31623 return 0;
31624 }
31625
31626 /* Comparator function to be used in qsort routine to sort attribute
31627 specification strings to "target". */
31628
31629 static int
31630 attr_strcmp (const void *v1, const void *v2)
31631 {
31632 const char *c1 = *(char *const*)v1;
31633 const char *c2 = *(char *const*)v2;
31634 return strcmp (c1, c2);
31635 }
31636
31637 /* ARGLIST is the argument to target attribute. This function tokenizes
31638 the comma separated arguments, sorts them and returns a string which
31639 is a unique identifier for the comma separated arguments. It also
31640 replaces non-identifier characters "=,-" with "_". */
31641
31642 static char *
31643 sorted_attr_string (tree arglist)
31644 {
31645 tree arg;
31646 size_t str_len_sum = 0;
31647 char **args = NULL;
31648 char *attr_str, *ret_str;
31649 char *attr = NULL;
31650 unsigned int argnum = 1;
31651 unsigned int i;
31652
31653 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
31654 {
31655 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
31656 size_t len = strlen (str);
31657 str_len_sum += len + 1;
31658 if (arg != arglist)
31659 argnum++;
31660 for (i = 0; i < strlen (str); i++)
31661 if (str[i] == ',')
31662 argnum++;
31663 }
31664
31665 attr_str = XNEWVEC (char, str_len_sum);
31666 str_len_sum = 0;
31667 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
31668 {
31669 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
31670 size_t len = strlen (str);
31671 memcpy (attr_str + str_len_sum, str, len);
31672 attr_str[str_len_sum + len] = TREE_CHAIN (arg) ? ',' : '\0';
31673 str_len_sum += len + 1;
31674 }
31675
31676 /* Replace "=,-" with "_". */
31677 for (i = 0; i < strlen (attr_str); i++)
31678 if (attr_str[i] == '=' || attr_str[i]== '-')
31679 attr_str[i] = '_';
31680
31681 if (argnum == 1)
31682 return attr_str;
31683
31684 args = XNEWVEC (char *, argnum);
31685
31686 i = 0;
31687 attr = strtok (attr_str, ",");
31688 while (attr != NULL)
31689 {
31690 args[i] = attr;
31691 i++;
31692 attr = strtok (NULL, ",");
31693 }
31694
31695 qsort (args, argnum, sizeof (char *), attr_strcmp);
31696
31697 ret_str = XNEWVEC (char, str_len_sum);
31698 str_len_sum = 0;
31699 for (i = 0; i < argnum; i++)
31700 {
31701 size_t len = strlen (args[i]);
31702 memcpy (ret_str + str_len_sum, args[i], len);
31703 ret_str[str_len_sum + len] = i < argnum - 1 ? '_' : '\0';
31704 str_len_sum += len + 1;
31705 }
31706
31707 XDELETEVEC (args);
31708 XDELETEVEC (attr_str);
31709 return ret_str;
31710 }
31711
31712 /* This function changes the assembler name for functions that are
31713 versions. If DECL is a function version and has a "target"
31714 attribute, it appends the attribute string to its assembler name. */
31715
31716 static tree
31717 ix86_mangle_function_version_assembler_name (tree decl, tree id)
31718 {
31719 tree version_attr;
31720 const char *orig_name, *version_string;
31721 char *attr_str, *assembler_name;
31722
31723 if (DECL_DECLARED_INLINE_P (decl)
31724 && lookup_attribute ("gnu_inline",
31725 DECL_ATTRIBUTES (decl)))
31726 error_at (DECL_SOURCE_LOCATION (decl),
31727 "Function versions cannot be marked as gnu_inline,"
31728 " bodies have to be generated");
31729
31730 if (DECL_VIRTUAL_P (decl)
31731 || DECL_VINDEX (decl))
31732 sorry ("Virtual function multiversioning not supported");
31733
31734 version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
31735
31736 /* target attribute string cannot be NULL. */
31737 gcc_assert (version_attr != NULL_TREE);
31738
31739 orig_name = IDENTIFIER_POINTER (id);
31740 version_string
31741 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr)));
31742
31743 if (strcmp (version_string, "default") == 0)
31744 return id;
31745
31746 attr_str = sorted_attr_string (TREE_VALUE (version_attr));
31747 assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2);
31748
31749 sprintf (assembler_name, "%s.%s", orig_name, attr_str);
31750
31751 /* Allow assembler name to be modified if already set. */
31752 if (DECL_ASSEMBLER_NAME_SET_P (decl))
31753 SET_DECL_RTL (decl, NULL);
31754
31755 tree ret = get_identifier (assembler_name);
31756 XDELETEVEC (attr_str);
31757 XDELETEVEC (assembler_name);
31758 return ret;
31759 }
31760
31761 /* This function returns true if FN1 and FN2 are versions of the same function,
31762 that is, the target strings of the function decls are different. This assumes
31763 that FN1 and FN2 have the same signature. */
31764
31765 static bool
31766 ix86_function_versions (tree fn1, tree fn2)
31767 {
31768 tree attr1, attr2;
31769 char *target1, *target2;
31770 bool result;
31771
31772 if (TREE_CODE (fn1) != FUNCTION_DECL
31773 || TREE_CODE (fn2) != FUNCTION_DECL)
31774 return false;
31775
31776 attr1 = lookup_attribute ("target", DECL_ATTRIBUTES (fn1));
31777 attr2 = lookup_attribute ("target", DECL_ATTRIBUTES (fn2));
31778
31779 /* At least one function decl should have the target attribute specified. */
31780 if (attr1 == NULL_TREE && attr2 == NULL_TREE)
31781 return false;
31782
31783 /* Diagnose missing target attribute if one of the decls is already
31784 multi-versioned. */
31785 if (attr1 == NULL_TREE || attr2 == NULL_TREE)
31786 {
31787 if (DECL_FUNCTION_VERSIONED (fn1) || DECL_FUNCTION_VERSIONED (fn2))
31788 {
31789 if (attr2 != NULL_TREE)
31790 {
31791 tree tem = fn1;
31792 fn1 = fn2;
31793 fn2 = tem;
31794 attr1 = attr2;
31795 }
31796 error_at (DECL_SOURCE_LOCATION (fn2),
31797 "missing %<target%> attribute for multi-versioned %D",
31798 fn2);
31799 inform (DECL_SOURCE_LOCATION (fn1),
31800 "previous declaration of %D", fn1);
31801 /* Prevent diagnosing of the same error multiple times. */
31802 DECL_ATTRIBUTES (fn2)
31803 = tree_cons (get_identifier ("target"),
31804 copy_node (TREE_VALUE (attr1)),
31805 DECL_ATTRIBUTES (fn2));
31806 }
31807 return false;
31808 }
31809
31810 target1 = sorted_attr_string (TREE_VALUE (attr1));
31811 target2 = sorted_attr_string (TREE_VALUE (attr2));
31812
31813 /* The sorted target strings must be different for fn1 and fn2
31814 to be versions. */
31815 if (strcmp (target1, target2) == 0)
31816 result = false;
31817 else
31818 result = true;
31819
31820 XDELETEVEC (target1);
31821 XDELETEVEC (target2);
31822
31823 return result;
31824 }
31825
31826 static tree
31827 ix86_mangle_decl_assembler_name (tree decl, tree id)
31828 {
31829 /* For function version, add the target suffix to the assembler name. */
31830 if (TREE_CODE (decl) == FUNCTION_DECL
31831 && DECL_FUNCTION_VERSIONED (decl))
31832 id = ix86_mangle_function_version_assembler_name (decl, id);
31833 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
31834 id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
31835 #endif
31836
31837 return id;
31838 }
31839
31840 /* Return a new name by appending SUFFIX to the DECL name. If make_unique
31841 is true, append the full path name of the source file. */
31842
31843 static char *
31844 make_name (tree decl, const char *suffix, bool make_unique)
31845 {
31846 char *global_var_name;
31847 int name_len;
31848 const char *name;
31849 const char *unique_name = NULL;
31850
31851 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
31852
31853 /* Get a unique name that can be used globally without any chances
31854 of collision at link time. */
31855 if (make_unique)
31856 unique_name = IDENTIFIER_POINTER (get_file_function_name ("\0"));
31857
31858 name_len = strlen (name) + strlen (suffix) + 2;
31859
31860 if (make_unique)
31861 name_len += strlen (unique_name) + 1;
31862 global_var_name = XNEWVEC (char, name_len);
31863
31864 /* Use '.' to concatenate names as it is demangler friendly. */
31865 if (make_unique)
31866 snprintf (global_var_name, name_len, "%s.%s.%s", name, unique_name,
31867 suffix);
31868 else
31869 snprintf (global_var_name, name_len, "%s.%s", name, suffix);
31870
31871 return global_var_name;
31872 }
31873
31874 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
31875
31876 /* Make a dispatcher declaration for the multi-versioned function DECL.
31877 Calls to DECL function will be replaced with calls to the dispatcher
31878 by the front-end. Return the decl created. */
31879
31880 static tree
31881 make_dispatcher_decl (const tree decl)
31882 {
31883 tree func_decl;
31884 char *func_name;
31885 tree fn_type, func_type;
31886 bool is_uniq = false;
31887
31888 if (TREE_PUBLIC (decl) == 0)
31889 is_uniq = true;
31890
31891 func_name = make_name (decl, "ifunc", is_uniq);
31892
31893 fn_type = TREE_TYPE (decl);
31894 func_type = build_function_type (TREE_TYPE (fn_type),
31895 TYPE_ARG_TYPES (fn_type));
31896
31897 func_decl = build_fn_decl (func_name, func_type);
31898 XDELETEVEC (func_name);
31899 TREE_USED (func_decl) = 1;
31900 DECL_CONTEXT (func_decl) = NULL_TREE;
31901 DECL_INITIAL (func_decl) = error_mark_node;
31902 DECL_ARTIFICIAL (func_decl) = 1;
31903 /* Mark this func as external, the resolver will flip it again if
31904 it gets generated. */
31905 DECL_EXTERNAL (func_decl) = 1;
31906 /* This will be of type IFUNCs have to be externally visible. */
31907 TREE_PUBLIC (func_decl) = 1;
31908
31909 return func_decl;
31910 }
31911
31912 #endif
31913
31914 /* Returns true if decl is multi-versioned and DECL is the default function,
31915 that is it is not tagged with target specific optimization. */
31916
31917 static bool
31918 is_function_default_version (const tree decl)
31919 {
31920 if (TREE_CODE (decl) != FUNCTION_DECL
31921 || !DECL_FUNCTION_VERSIONED (decl))
31922 return false;
31923 tree attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
31924 gcc_assert (attr);
31925 attr = TREE_VALUE (TREE_VALUE (attr));
31926 return (TREE_CODE (attr) == STRING_CST
31927 && strcmp (TREE_STRING_POINTER (attr), "default") == 0);
31928 }
31929
31930 /* Make a dispatcher declaration for the multi-versioned function DECL.
31931 Calls to DECL function will be replaced with calls to the dispatcher
31932 by the front-end. Returns the decl of the dispatcher function. */
31933
31934 static tree
31935 ix86_get_function_versions_dispatcher (void *decl)
31936 {
31937 tree fn = (tree) decl;
31938 struct cgraph_node *node = NULL;
31939 struct cgraph_node *default_node = NULL;
31940 struct cgraph_function_version_info *node_v = NULL;
31941 struct cgraph_function_version_info *first_v = NULL;
31942
31943 tree dispatch_decl = NULL;
31944
31945 struct cgraph_function_version_info *default_version_info = NULL;
31946
31947 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
31948
31949 node = cgraph_get_node (fn);
31950 gcc_assert (node != NULL);
31951
31952 node_v = get_cgraph_node_version (node);
31953 gcc_assert (node_v != NULL);
31954
31955 if (node_v->dispatcher_resolver != NULL)
31956 return node_v->dispatcher_resolver;
31957
31958 /* Find the default version and make it the first node. */
31959 first_v = node_v;
31960 /* Go to the beginning of the chain. */
31961 while (first_v->prev != NULL)
31962 first_v = first_v->prev;
31963 default_version_info = first_v;
31964 while (default_version_info != NULL)
31965 {
31966 if (is_function_default_version
31967 (default_version_info->this_node->decl))
31968 break;
31969 default_version_info = default_version_info->next;
31970 }
31971
31972 /* If there is no default node, just return NULL. */
31973 if (default_version_info == NULL)
31974 return NULL;
31975
31976 /* Make default info the first node. */
31977 if (first_v != default_version_info)
31978 {
31979 default_version_info->prev->next = default_version_info->next;
31980 if (default_version_info->next)
31981 default_version_info->next->prev = default_version_info->prev;
31982 first_v->prev = default_version_info;
31983 default_version_info->next = first_v;
31984 default_version_info->prev = NULL;
31985 }
31986
31987 default_node = default_version_info->this_node;
31988
31989 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
31990 if (targetm.has_ifunc_p ())
31991 {
31992 struct cgraph_function_version_info *it_v = NULL;
31993 struct cgraph_node *dispatcher_node = NULL;
31994 struct cgraph_function_version_info *dispatcher_version_info = NULL;
31995
31996 /* Right now, the dispatching is done via ifunc. */
31997 dispatch_decl = make_dispatcher_decl (default_node->decl);
31998
31999 dispatcher_node = cgraph_get_create_node (dispatch_decl);
32000 gcc_assert (dispatcher_node != NULL);
32001 dispatcher_node->dispatcher_function = 1;
32002 dispatcher_version_info
32003 = insert_new_cgraph_node_version (dispatcher_node);
32004 dispatcher_version_info->next = default_version_info;
32005 dispatcher_node->definition = 1;
32006
32007 /* Set the dispatcher for all the versions. */
32008 it_v = default_version_info;
32009 while (it_v != NULL)
32010 {
32011 it_v->dispatcher_resolver = dispatch_decl;
32012 it_v = it_v->next;
32013 }
32014 }
32015 else
32016 #endif
32017 {
32018 error_at (DECL_SOURCE_LOCATION (default_node->decl),
32019 "multiversioning needs ifunc which is not supported "
32020 "on this target");
32021 }
32022
32023 return dispatch_decl;
32024 }
32025
32026 /* Makes a function attribute of the form NAME(ARG_NAME) and chains
32027 it to CHAIN. */
32028
32029 static tree
32030 make_attribute (const char *name, const char *arg_name, tree chain)
32031 {
32032 tree attr_name;
32033 tree attr_arg_name;
32034 tree attr_args;
32035 tree attr;
32036
32037 attr_name = get_identifier (name);
32038 attr_arg_name = build_string (strlen (arg_name), arg_name);
32039 attr_args = tree_cons (NULL_TREE, attr_arg_name, NULL_TREE);
32040 attr = tree_cons (attr_name, attr_args, chain);
32041 return attr;
32042 }
32043
32044 /* Make the resolver function decl to dispatch the versions of
32045 a multi-versioned function, DEFAULT_DECL. Create an
32046 empty basic block in the resolver and store the pointer in
32047 EMPTY_BB. Return the decl of the resolver function. */
32048
32049 static tree
32050 make_resolver_func (const tree default_decl,
32051 const tree dispatch_decl,
32052 basic_block *empty_bb)
32053 {
32054 char *resolver_name;
32055 tree decl, type, decl_name, t;
32056 bool is_uniq = false;
32057
32058 /* IFUNC's have to be globally visible. So, if the default_decl is
32059 not, then the name of the IFUNC should be made unique. */
32060 if (TREE_PUBLIC (default_decl) == 0)
32061 is_uniq = true;
32062
32063 /* Append the filename to the resolver function if the versions are
32064 not externally visible. This is because the resolver function has
32065 to be externally visible for the loader to find it. So, appending
32066 the filename will prevent conflicts with a resolver function from
32067 another module which is based on the same version name. */
32068 resolver_name = make_name (default_decl, "resolver", is_uniq);
32069
32070 /* The resolver function should return a (void *). */
32071 type = build_function_type_list (ptr_type_node, NULL_TREE);
32072
32073 decl = build_fn_decl (resolver_name, type);
32074 decl_name = get_identifier (resolver_name);
32075 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
32076
32077 DECL_NAME (decl) = decl_name;
32078 TREE_USED (decl) = 1;
32079 DECL_ARTIFICIAL (decl) = 1;
32080 DECL_IGNORED_P (decl) = 0;
32081 /* IFUNC resolvers have to be externally visible. */
32082 TREE_PUBLIC (decl) = 1;
32083 DECL_UNINLINABLE (decl) = 1;
32084
32085 /* Resolver is not external, body is generated. */
32086 DECL_EXTERNAL (decl) = 0;
32087 DECL_EXTERNAL (dispatch_decl) = 0;
32088
32089 DECL_CONTEXT (decl) = NULL_TREE;
32090 DECL_INITIAL (decl) = make_node (BLOCK);
32091 DECL_STATIC_CONSTRUCTOR (decl) = 0;
32092
32093 if (DECL_COMDAT_GROUP (default_decl)
32094 || TREE_PUBLIC (default_decl))
32095 {
32096 /* In this case, each translation unit with a call to this
32097 versioned function will put out a resolver. Ensure it
32098 is comdat to keep just one copy. */
32099 DECL_COMDAT (decl) = 1;
32100 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
32101 }
32102 /* Build result decl and add to function_decl. */
32103 t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
32104 DECL_ARTIFICIAL (t) = 1;
32105 DECL_IGNORED_P (t) = 1;
32106 DECL_RESULT (decl) = t;
32107
32108 gimplify_function_tree (decl);
32109 push_cfun (DECL_STRUCT_FUNCTION (decl));
32110 *empty_bb = init_lowered_empty_function (decl, false);
32111
32112 cgraph_add_new_function (decl, true);
32113 cgraph_call_function_insertion_hooks (cgraph_get_create_node (decl));
32114
32115 pop_cfun ();
32116
32117 gcc_assert (dispatch_decl != NULL);
32118 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
32119 DECL_ATTRIBUTES (dispatch_decl)
32120 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
32121
32122 /* Create the alias for dispatch to resolver here. */
32123 /*cgraph_create_function_alias (dispatch_decl, decl);*/
32124 cgraph_same_body_alias (NULL, dispatch_decl, decl);
32125 XDELETEVEC (resolver_name);
32126 return decl;
32127 }
32128
32129 /* Generate the dispatching code body to dispatch multi-versioned function
32130 DECL. The target hook is called to process the "target" attributes and
32131 provide the code to dispatch the right function at run-time. NODE points
32132 to the dispatcher decl whose body will be created. */
32133
32134 static tree
32135 ix86_generate_version_dispatcher_body (void *node_p)
32136 {
32137 tree resolver_decl;
32138 basic_block empty_bb;
32139 tree default_ver_decl;
32140 struct cgraph_node *versn;
32141 struct cgraph_node *node;
32142
32143 struct cgraph_function_version_info *node_version_info = NULL;
32144 struct cgraph_function_version_info *versn_info = NULL;
32145
32146 node = (cgraph_node *)node_p;
32147
32148 node_version_info = get_cgraph_node_version (node);
32149 gcc_assert (node->dispatcher_function
32150 && node_version_info != NULL);
32151
32152 if (node_version_info->dispatcher_resolver)
32153 return node_version_info->dispatcher_resolver;
32154
32155 /* The first version in the chain corresponds to the default version. */
32156 default_ver_decl = node_version_info->next->this_node->decl;
32157
32158 /* node is going to be an alias, so remove the finalized bit. */
32159 node->definition = false;
32160
32161 resolver_decl = make_resolver_func (default_ver_decl,
32162 node->decl, &empty_bb);
32163
32164 node_version_info->dispatcher_resolver = resolver_decl;
32165
32166 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
32167
32168 auto_vec<tree, 2> fn_ver_vec;
32169
32170 for (versn_info = node_version_info->next; versn_info;
32171 versn_info = versn_info->next)
32172 {
32173 versn = versn_info->this_node;
32174 /* Check for virtual functions here again, as by this time it should
32175 have been determined if this function needs a vtable index or
32176 not. This happens for methods in derived classes that override
32177 virtual methods in base classes but are not explicitly marked as
32178 virtual. */
32179 if (DECL_VINDEX (versn->decl))
32180 sorry ("Virtual function multiversioning not supported");
32181
32182 fn_ver_vec.safe_push (versn->decl);
32183 }
32184
32185 dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
32186 rebuild_cgraph_edges ();
32187 pop_cfun ();
32188 return resolver_decl;
32189 }
32190 /* This builds the processor_model struct type defined in
32191 libgcc/config/i386/cpuinfo.c */
32192
32193 static tree
32194 build_processor_model_struct (void)
32195 {
32196 const char *field_name[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
32197 "__cpu_features"};
32198 tree field = NULL_TREE, field_chain = NULL_TREE;
32199 int i;
32200 tree type = make_node (RECORD_TYPE);
32201
32202 /* The first 3 fields are unsigned int. */
32203 for (i = 0; i < 3; ++i)
32204 {
32205 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
32206 get_identifier (field_name[i]), unsigned_type_node);
32207 if (field_chain != NULL_TREE)
32208 DECL_CHAIN (field) = field_chain;
32209 field_chain = field;
32210 }
32211
32212 /* The last field is an array of unsigned integers of size one. */
32213 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
32214 get_identifier (field_name[3]),
32215 build_array_type (unsigned_type_node,
32216 build_index_type (size_one_node)));
32217 if (field_chain != NULL_TREE)
32218 DECL_CHAIN (field) = field_chain;
32219 field_chain = field;
32220
32221 finish_builtin_struct (type, "__processor_model", field_chain, NULL_TREE);
32222 return type;
32223 }
32224
32225 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
32226
32227 static tree
32228 make_var_decl (tree type, const char *name)
32229 {
32230 tree new_decl;
32231
32232 new_decl = build_decl (UNKNOWN_LOCATION,
32233 VAR_DECL,
32234 get_identifier(name),
32235 type);
32236
32237 DECL_EXTERNAL (new_decl) = 1;
32238 TREE_STATIC (new_decl) = 1;
32239 TREE_PUBLIC (new_decl) = 1;
32240 DECL_INITIAL (new_decl) = 0;
32241 DECL_ARTIFICIAL (new_decl) = 0;
32242 DECL_PRESERVE_P (new_decl) = 1;
32243
32244 make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl));
32245 assemble_variable (new_decl, 0, 0, 0);
32246
32247 return new_decl;
32248 }
32249
32250 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
32251 into an integer defined in libgcc/config/i386/cpuinfo.c */
32252
32253 static tree
32254 fold_builtin_cpu (tree fndecl, tree *args)
32255 {
32256 unsigned int i;
32257 enum ix86_builtins fn_code = (enum ix86_builtins)
32258 DECL_FUNCTION_CODE (fndecl);
32259 tree param_string_cst = NULL;
32260
32261 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
32262 enum processor_features
32263 {
32264 F_CMOV = 0,
32265 F_MMX,
32266 F_POPCNT,
32267 F_SSE,
32268 F_SSE2,
32269 F_SSE3,
32270 F_SSSE3,
32271 F_SSE4_1,
32272 F_SSE4_2,
32273 F_AVX,
32274 F_AVX2,
32275 F_SSE4_A,
32276 F_FMA4,
32277 F_XOP,
32278 F_FMA,
32279 F_MAX
32280 };
32281
32282 /* These are the values for vendor types and cpu types and subtypes
32283 in cpuinfo.c. Cpu types and subtypes should be subtracted by
32284 the corresponding start value. */
32285 enum processor_model
32286 {
32287 M_INTEL = 1,
32288 M_AMD,
32289 M_CPU_TYPE_START,
32290 M_INTEL_BONNELL,
32291 M_INTEL_CORE2,
32292 M_INTEL_COREI7,
32293 M_AMDFAM10H,
32294 M_AMDFAM15H,
32295 M_INTEL_SILVERMONT,
32296 M_AMD_BTVER1,
32297 M_AMD_BTVER2,
32298 M_CPU_SUBTYPE_START,
32299 M_INTEL_COREI7_NEHALEM,
32300 M_INTEL_COREI7_WESTMERE,
32301 M_INTEL_COREI7_SANDYBRIDGE,
32302 M_AMDFAM10H_BARCELONA,
32303 M_AMDFAM10H_SHANGHAI,
32304 M_AMDFAM10H_ISTANBUL,
32305 M_AMDFAM15H_BDVER1,
32306 M_AMDFAM15H_BDVER2,
32307 M_AMDFAM15H_BDVER3,
32308 M_AMDFAM15H_BDVER4,
32309 M_INTEL_COREI7_IVYBRIDGE,
32310 M_INTEL_COREI7_HASWELL
32311 };
32312
32313 static struct _arch_names_table
32314 {
32315 const char *const name;
32316 const enum processor_model model;
32317 }
32318 const arch_names_table[] =
32319 {
32320 {"amd", M_AMD},
32321 {"intel", M_INTEL},
32322 {"atom", M_INTEL_BONNELL},
32323 {"slm", M_INTEL_SILVERMONT},
32324 {"core2", M_INTEL_CORE2},
32325 {"corei7", M_INTEL_COREI7},
32326 {"nehalem", M_INTEL_COREI7_NEHALEM},
32327 {"westmere", M_INTEL_COREI7_WESTMERE},
32328 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE},
32329 {"ivybridge", M_INTEL_COREI7_IVYBRIDGE},
32330 {"haswell", M_INTEL_COREI7_HASWELL},
32331 {"bonnell", M_INTEL_BONNELL},
32332 {"silvermont", M_INTEL_SILVERMONT},
32333 {"amdfam10h", M_AMDFAM10H},
32334 {"barcelona", M_AMDFAM10H_BARCELONA},
32335 {"shanghai", M_AMDFAM10H_SHANGHAI},
32336 {"istanbul", M_AMDFAM10H_ISTANBUL},
32337 {"btver1", M_AMD_BTVER1},
32338 {"amdfam15h", M_AMDFAM15H},
32339 {"bdver1", M_AMDFAM15H_BDVER1},
32340 {"bdver2", M_AMDFAM15H_BDVER2},
32341 {"bdver3", M_AMDFAM15H_BDVER3},
32342 {"bdver4", M_AMDFAM15H_BDVER4},
32343 {"btver2", M_AMD_BTVER2},
32344 };
32345
32346 static struct _isa_names_table
32347 {
32348 const char *const name;
32349 const enum processor_features feature;
32350 }
32351 const isa_names_table[] =
32352 {
32353 {"cmov", F_CMOV},
32354 {"mmx", F_MMX},
32355 {"popcnt", F_POPCNT},
32356 {"sse", F_SSE},
32357 {"sse2", F_SSE2},
32358 {"sse3", F_SSE3},
32359 {"ssse3", F_SSSE3},
32360 {"sse4a", F_SSE4_A},
32361 {"sse4.1", F_SSE4_1},
32362 {"sse4.2", F_SSE4_2},
32363 {"avx", F_AVX},
32364 {"fma4", F_FMA4},
32365 {"xop", F_XOP},
32366 {"fma", F_FMA},
32367 {"avx2", F_AVX2}
32368 };
32369
32370 tree __processor_model_type = build_processor_model_struct ();
32371 tree __cpu_model_var = make_var_decl (__processor_model_type,
32372 "__cpu_model");
32373
32374
32375 varpool_add_new_variable (__cpu_model_var);
32376
32377 gcc_assert ((args != NULL) && (*args != NULL));
32378
32379 param_string_cst = *args;
32380 while (param_string_cst
32381 && TREE_CODE (param_string_cst) != STRING_CST)
32382 {
32383 /* *args must be a expr that can contain other EXPRS leading to a
32384 STRING_CST. */
32385 if (!EXPR_P (param_string_cst))
32386 {
32387 error ("Parameter to builtin must be a string constant or literal");
32388 return integer_zero_node;
32389 }
32390 param_string_cst = TREE_OPERAND (EXPR_CHECK (param_string_cst), 0);
32391 }
32392
32393 gcc_assert (param_string_cst);
32394
32395 if (fn_code == IX86_BUILTIN_CPU_IS)
32396 {
32397 tree ref;
32398 tree field;
32399 tree final;
32400
32401 unsigned int field_val = 0;
32402 unsigned int NUM_ARCH_NAMES
32403 = sizeof (arch_names_table) / sizeof (struct _arch_names_table);
32404
32405 for (i = 0; i < NUM_ARCH_NAMES; i++)
32406 if (strcmp (arch_names_table[i].name,
32407 TREE_STRING_POINTER (param_string_cst)) == 0)
32408 break;
32409
32410 if (i == NUM_ARCH_NAMES)
32411 {
32412 error ("Parameter to builtin not valid: %s",
32413 TREE_STRING_POINTER (param_string_cst));
32414 return integer_zero_node;
32415 }
32416
32417 field = TYPE_FIELDS (__processor_model_type);
32418 field_val = arch_names_table[i].model;
32419
32420 /* CPU types are stored in the next field. */
32421 if (field_val > M_CPU_TYPE_START
32422 && field_val < M_CPU_SUBTYPE_START)
32423 {
32424 field = DECL_CHAIN (field);
32425 field_val -= M_CPU_TYPE_START;
32426 }
32427
32428 /* CPU subtypes are stored in the next field. */
32429 if (field_val > M_CPU_SUBTYPE_START)
32430 {
32431 field = DECL_CHAIN ( DECL_CHAIN (field));
32432 field_val -= M_CPU_SUBTYPE_START;
32433 }
32434
32435 /* Get the appropriate field in __cpu_model. */
32436 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
32437 field, NULL_TREE);
32438
32439 /* Check the value. */
32440 final = build2 (EQ_EXPR, unsigned_type_node, ref,
32441 build_int_cstu (unsigned_type_node, field_val));
32442 return build1 (CONVERT_EXPR, integer_type_node, final);
32443 }
32444 else if (fn_code == IX86_BUILTIN_CPU_SUPPORTS)
32445 {
32446 tree ref;
32447 tree array_elt;
32448 tree field;
32449 tree final;
32450
32451 unsigned int field_val = 0;
32452 unsigned int NUM_ISA_NAMES
32453 = sizeof (isa_names_table) / sizeof (struct _isa_names_table);
32454
32455 for (i = 0; i < NUM_ISA_NAMES; i++)
32456 if (strcmp (isa_names_table[i].name,
32457 TREE_STRING_POINTER (param_string_cst)) == 0)
32458 break;
32459
32460 if (i == NUM_ISA_NAMES)
32461 {
32462 error ("Parameter to builtin not valid: %s",
32463 TREE_STRING_POINTER (param_string_cst));
32464 return integer_zero_node;
32465 }
32466
32467 field = TYPE_FIELDS (__processor_model_type);
32468 /* Get the last field, which is __cpu_features. */
32469 while (DECL_CHAIN (field))
32470 field = DECL_CHAIN (field);
32471
32472 /* Get the appropriate field: __cpu_model.__cpu_features */
32473 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
32474 field, NULL_TREE);
32475
32476 /* Access the 0th element of __cpu_features array. */
32477 array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
32478 integer_zero_node, NULL_TREE, NULL_TREE);
32479
32480 field_val = (1 << isa_names_table[i].feature);
32481 /* Return __cpu_model.__cpu_features[0] & field_val */
32482 final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
32483 build_int_cstu (unsigned_type_node, field_val));
32484 return build1 (CONVERT_EXPR, integer_type_node, final);
32485 }
32486 gcc_unreachable ();
32487 }
32488
32489 static tree
32490 ix86_fold_builtin (tree fndecl, int n_args,
32491 tree *args, bool ignore ATTRIBUTE_UNUSED)
32492 {
32493 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
32494 {
32495 enum ix86_builtins fn_code = (enum ix86_builtins)
32496 DECL_FUNCTION_CODE (fndecl);
32497 if (fn_code == IX86_BUILTIN_CPU_IS
32498 || fn_code == IX86_BUILTIN_CPU_SUPPORTS)
32499 {
32500 gcc_assert (n_args == 1);
32501 return fold_builtin_cpu (fndecl, args);
32502 }
32503 }
32504
32505 #ifdef SUBTARGET_FOLD_BUILTIN
32506 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
32507 #endif
32508
32509 return NULL_TREE;
32510 }
32511
32512 /* Make builtins to detect cpu type and features supported. NAME is
32513 the builtin name, CODE is the builtin code, and FTYPE is the function
32514 type of the builtin. */
32515
32516 static void
32517 make_cpu_type_builtin (const char* name, int code,
32518 enum ix86_builtin_func_type ftype, bool is_const)
32519 {
32520 tree decl;
32521 tree type;
32522
32523 type = ix86_get_builtin_func_type (ftype);
32524 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
32525 NULL, NULL_TREE);
32526 gcc_assert (decl != NULL_TREE);
32527 ix86_builtins[(int) code] = decl;
32528 TREE_READONLY (decl) = is_const;
32529 }
32530
32531 /* Make builtins to get CPU type and features supported. The created
32532 builtins are :
32533
32534 __builtin_cpu_init (), to detect cpu type and features,
32535 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
32536 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
32537 */
32538
32539 static void
32540 ix86_init_platform_type_builtins (void)
32541 {
32542 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT,
32543 INT_FTYPE_VOID, false);
32544 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS,
32545 INT_FTYPE_PCCHAR, true);
32546 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS,
32547 INT_FTYPE_PCCHAR, true);
32548 }
32549
32550 /* Internal method for ix86_init_builtins. */
32551
32552 static void
32553 ix86_init_builtins_va_builtins_abi (void)
32554 {
32555 tree ms_va_ref, sysv_va_ref;
32556 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
32557 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
32558 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
32559 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
32560
32561 if (!TARGET_64BIT)
32562 return;
32563 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
32564 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
32565 ms_va_ref = build_reference_type (ms_va_list_type_node);
32566 sysv_va_ref =
32567 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
32568
32569 fnvoid_va_end_ms =
32570 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
32571 fnvoid_va_start_ms =
32572 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
32573 fnvoid_va_end_sysv =
32574 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
32575 fnvoid_va_start_sysv =
32576 build_varargs_function_type_list (void_type_node, sysv_va_ref,
32577 NULL_TREE);
32578 fnvoid_va_copy_ms =
32579 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
32580 NULL_TREE);
32581 fnvoid_va_copy_sysv =
32582 build_function_type_list (void_type_node, sysv_va_ref,
32583 sysv_va_ref, NULL_TREE);
32584
32585 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
32586 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
32587 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
32588 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
32589 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
32590 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
32591 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
32592 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32593 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
32594 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32595 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
32596 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32597 }
32598
32599 static void
32600 ix86_init_builtin_types (void)
32601 {
32602 tree float128_type_node, float80_type_node;
32603
32604 /* The __float80 type. */
32605 float80_type_node = long_double_type_node;
32606 if (TYPE_MODE (float80_type_node) != XFmode)
32607 {
32608 /* The __float80 type. */
32609 float80_type_node = make_node (REAL_TYPE);
32610
32611 TYPE_PRECISION (float80_type_node) = 80;
32612 layout_type (float80_type_node);
32613 }
32614 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
32615
32616 /* The __float128 type. */
32617 float128_type_node = make_node (REAL_TYPE);
32618 TYPE_PRECISION (float128_type_node) = 128;
32619 layout_type (float128_type_node);
32620 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
32621
32622 /* This macro is built by i386-builtin-types.awk. */
32623 DEFINE_BUILTIN_PRIMITIVE_TYPES;
32624 }
32625
32626 static void
32627 ix86_init_builtins (void)
32628 {
32629 tree t;
32630
32631 ix86_init_builtin_types ();
32632
32633 /* Builtins to get CPU type and features. */
32634 ix86_init_platform_type_builtins ();
32635
32636 /* TFmode support builtins. */
32637 def_builtin_const (0, "__builtin_infq",
32638 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
32639 def_builtin_const (0, "__builtin_huge_valq",
32640 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
32641
32642 /* We will expand them to normal call if SSE isn't available since
32643 they are used by libgcc. */
32644 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
32645 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
32646 BUILT_IN_MD, "__fabstf2", NULL_TREE);
32647 TREE_READONLY (t) = 1;
32648 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
32649
32650 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
32651 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
32652 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
32653 TREE_READONLY (t) = 1;
32654 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
32655
32656 ix86_init_tm_builtins ();
32657 ix86_init_mmx_sse_builtins ();
32658
32659 if (TARGET_LP64)
32660 ix86_init_builtins_va_builtins_abi ();
32661
32662 #ifdef SUBTARGET_INIT_BUILTINS
32663 SUBTARGET_INIT_BUILTINS;
32664 #endif
32665 }
32666
32667 /* Return the ix86 builtin for CODE. */
32668
32669 static tree
32670 ix86_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
32671 {
32672 if (code >= IX86_BUILTIN_MAX)
32673 return error_mark_node;
32674
32675 return ix86_builtins[code];
32676 }
32677
32678 /* Errors in the source file can cause expand_expr to return const0_rtx
32679 where we expect a vector. To avoid crashing, use one of the vector
32680 clear instructions. */
32681 static rtx
32682 safe_vector_operand (rtx x, enum machine_mode mode)
32683 {
32684 if (x == const0_rtx)
32685 x = CONST0_RTX (mode);
32686 return x;
32687 }
32688
32689 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
32690
32691 static rtx
32692 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
32693 {
32694 rtx pat;
32695 tree arg0 = CALL_EXPR_ARG (exp, 0);
32696 tree arg1 = CALL_EXPR_ARG (exp, 1);
32697 rtx op0 = expand_normal (arg0);
32698 rtx op1 = expand_normal (arg1);
32699 enum machine_mode tmode = insn_data[icode].operand[0].mode;
32700 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
32701 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
32702
32703 if (VECTOR_MODE_P (mode0))
32704 op0 = safe_vector_operand (op0, mode0);
32705 if (VECTOR_MODE_P (mode1))
32706 op1 = safe_vector_operand (op1, mode1);
32707
32708 if (optimize || !target
32709 || GET_MODE (target) != tmode
32710 || !insn_data[icode].operand[0].predicate (target, tmode))
32711 target = gen_reg_rtx (tmode);
32712
32713 if (GET_MODE (op1) == SImode && mode1 == TImode)
32714 {
32715 rtx x = gen_reg_rtx (V4SImode);
32716 emit_insn (gen_sse2_loadd (x, op1));
32717 op1 = gen_lowpart (TImode, x);
32718 }
32719
32720 if (!insn_data[icode].operand[1].predicate (op0, mode0))
32721 op0 = copy_to_mode_reg (mode0, op0);
32722 if (!insn_data[icode].operand[2].predicate (op1, mode1))
32723 op1 = copy_to_mode_reg (mode1, op1);
32724
32725 pat = GEN_FCN (icode) (target, op0, op1);
32726 if (! pat)
32727 return 0;
32728
32729 emit_insn (pat);
32730
32731 return target;
32732 }
32733
32734 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
32735
32736 static rtx
32737 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
32738 enum ix86_builtin_func_type m_type,
32739 enum rtx_code sub_code)
32740 {
32741 rtx pat;
32742 int i;
32743 int nargs;
32744 bool comparison_p = false;
32745 bool tf_p = false;
32746 bool last_arg_constant = false;
32747 int num_memory = 0;
32748 struct {
32749 rtx op;
32750 enum machine_mode mode;
32751 } args[4];
32752
32753 enum machine_mode tmode = insn_data[icode].operand[0].mode;
32754
32755 switch (m_type)
32756 {
32757 case MULTI_ARG_4_DF2_DI_I:
32758 case MULTI_ARG_4_DF2_DI_I1:
32759 case MULTI_ARG_4_SF2_SI_I:
32760 case MULTI_ARG_4_SF2_SI_I1:
32761 nargs = 4;
32762 last_arg_constant = true;
32763 break;
32764
32765 case MULTI_ARG_3_SF:
32766 case MULTI_ARG_3_DF:
32767 case MULTI_ARG_3_SF2:
32768 case MULTI_ARG_3_DF2:
32769 case MULTI_ARG_3_DI:
32770 case MULTI_ARG_3_SI:
32771 case MULTI_ARG_3_SI_DI:
32772 case MULTI_ARG_3_HI:
32773 case MULTI_ARG_3_HI_SI:
32774 case MULTI_ARG_3_QI:
32775 case MULTI_ARG_3_DI2:
32776 case MULTI_ARG_3_SI2:
32777 case MULTI_ARG_3_HI2:
32778 case MULTI_ARG_3_QI2:
32779 nargs = 3;
32780 break;
32781
32782 case MULTI_ARG_2_SF:
32783 case MULTI_ARG_2_DF:
32784 case MULTI_ARG_2_DI:
32785 case MULTI_ARG_2_SI:
32786 case MULTI_ARG_2_HI:
32787 case MULTI_ARG_2_QI:
32788 nargs = 2;
32789 break;
32790
32791 case MULTI_ARG_2_DI_IMM:
32792 case MULTI_ARG_2_SI_IMM:
32793 case MULTI_ARG_2_HI_IMM:
32794 case MULTI_ARG_2_QI_IMM:
32795 nargs = 2;
32796 last_arg_constant = true;
32797 break;
32798
32799 case MULTI_ARG_1_SF:
32800 case MULTI_ARG_1_DF:
32801 case MULTI_ARG_1_SF2:
32802 case MULTI_ARG_1_DF2:
32803 case MULTI_ARG_1_DI:
32804 case MULTI_ARG_1_SI:
32805 case MULTI_ARG_1_HI:
32806 case MULTI_ARG_1_QI:
32807 case MULTI_ARG_1_SI_DI:
32808 case MULTI_ARG_1_HI_DI:
32809 case MULTI_ARG_1_HI_SI:
32810 case MULTI_ARG_1_QI_DI:
32811 case MULTI_ARG_1_QI_SI:
32812 case MULTI_ARG_1_QI_HI:
32813 nargs = 1;
32814 break;
32815
32816 case MULTI_ARG_2_DI_CMP:
32817 case MULTI_ARG_2_SI_CMP:
32818 case MULTI_ARG_2_HI_CMP:
32819 case MULTI_ARG_2_QI_CMP:
32820 nargs = 2;
32821 comparison_p = true;
32822 break;
32823
32824 case MULTI_ARG_2_SF_TF:
32825 case MULTI_ARG_2_DF_TF:
32826 case MULTI_ARG_2_DI_TF:
32827 case MULTI_ARG_2_SI_TF:
32828 case MULTI_ARG_2_HI_TF:
32829 case MULTI_ARG_2_QI_TF:
32830 nargs = 2;
32831 tf_p = true;
32832 break;
32833
32834 default:
32835 gcc_unreachable ();
32836 }
32837
32838 if (optimize || !target
32839 || GET_MODE (target) != tmode
32840 || !insn_data[icode].operand[0].predicate (target, tmode))
32841 target = gen_reg_rtx (tmode);
32842
32843 gcc_assert (nargs <= 4);
32844
32845 for (i = 0; i < nargs; i++)
32846 {
32847 tree arg = CALL_EXPR_ARG (exp, i);
32848 rtx op = expand_normal (arg);
32849 int adjust = (comparison_p) ? 1 : 0;
32850 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
32851
32852 if (last_arg_constant && i == nargs - 1)
32853 {
32854 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
32855 {
32856 enum insn_code new_icode = icode;
32857 switch (icode)
32858 {
32859 case CODE_FOR_xop_vpermil2v2df3:
32860 case CODE_FOR_xop_vpermil2v4sf3:
32861 case CODE_FOR_xop_vpermil2v4df3:
32862 case CODE_FOR_xop_vpermil2v8sf3:
32863 error ("the last argument must be a 2-bit immediate");
32864 return gen_reg_rtx (tmode);
32865 case CODE_FOR_xop_rotlv2di3:
32866 new_icode = CODE_FOR_rotlv2di3;
32867 goto xop_rotl;
32868 case CODE_FOR_xop_rotlv4si3:
32869 new_icode = CODE_FOR_rotlv4si3;
32870 goto xop_rotl;
32871 case CODE_FOR_xop_rotlv8hi3:
32872 new_icode = CODE_FOR_rotlv8hi3;
32873 goto xop_rotl;
32874 case CODE_FOR_xop_rotlv16qi3:
32875 new_icode = CODE_FOR_rotlv16qi3;
32876 xop_rotl:
32877 if (CONST_INT_P (op))
32878 {
32879 int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
32880 op = GEN_INT (INTVAL (op) & mask);
32881 gcc_checking_assert
32882 (insn_data[icode].operand[i + 1].predicate (op, mode));
32883 }
32884 else
32885 {
32886 gcc_checking_assert
32887 (nargs == 2
32888 && insn_data[new_icode].operand[0].mode == tmode
32889 && insn_data[new_icode].operand[1].mode == tmode
32890 && insn_data[new_icode].operand[2].mode == mode
32891 && insn_data[new_icode].operand[0].predicate
32892 == insn_data[icode].operand[0].predicate
32893 && insn_data[new_icode].operand[1].predicate
32894 == insn_data[icode].operand[1].predicate);
32895 icode = new_icode;
32896 goto non_constant;
32897 }
32898 break;
32899 default:
32900 gcc_unreachable ();
32901 }
32902 }
32903 }
32904 else
32905 {
32906 non_constant:
32907 if (VECTOR_MODE_P (mode))
32908 op = safe_vector_operand (op, mode);
32909
32910 /* If we aren't optimizing, only allow one memory operand to be
32911 generated. */
32912 if (memory_operand (op, mode))
32913 num_memory++;
32914
32915 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
32916
32917 if (optimize
32918 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
32919 || num_memory > 1)
32920 op = force_reg (mode, op);
32921 }
32922
32923 args[i].op = op;
32924 args[i].mode = mode;
32925 }
32926
32927 switch (nargs)
32928 {
32929 case 1:
32930 pat = GEN_FCN (icode) (target, args[0].op);
32931 break;
32932
32933 case 2:
32934 if (tf_p)
32935 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
32936 GEN_INT ((int)sub_code));
32937 else if (! comparison_p)
32938 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
32939 else
32940 {
32941 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
32942 args[0].op,
32943 args[1].op);
32944
32945 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
32946 }
32947 break;
32948
32949 case 3:
32950 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
32951 break;
32952
32953 case 4:
32954 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
32955 break;
32956
32957 default:
32958 gcc_unreachable ();
32959 }
32960
32961 if (! pat)
32962 return 0;
32963
32964 emit_insn (pat);
32965 return target;
32966 }
32967
32968 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
32969 insns with vec_merge. */
32970
32971 static rtx
32972 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
32973 rtx target)
32974 {
32975 rtx pat;
32976 tree arg0 = CALL_EXPR_ARG (exp, 0);
32977 rtx op1, op0 = expand_normal (arg0);
32978 enum machine_mode tmode = insn_data[icode].operand[0].mode;
32979 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
32980
32981 if (optimize || !target
32982 || GET_MODE (target) != tmode
32983 || !insn_data[icode].operand[0].predicate (target, tmode))
32984 target = gen_reg_rtx (tmode);
32985
32986 if (VECTOR_MODE_P (mode0))
32987 op0 = safe_vector_operand (op0, mode0);
32988
32989 if ((optimize && !register_operand (op0, mode0))
32990 || !insn_data[icode].operand[1].predicate (op0, mode0))
32991 op0 = copy_to_mode_reg (mode0, op0);
32992
32993 op1 = op0;
32994 if (!insn_data[icode].operand[2].predicate (op1, mode0))
32995 op1 = copy_to_mode_reg (mode0, op1);
32996
32997 pat = GEN_FCN (icode) (target, op0, op1);
32998 if (! pat)
32999 return 0;
33000 emit_insn (pat);
33001 return target;
33002 }
33003
33004 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
33005
33006 static rtx
33007 ix86_expand_sse_compare (const struct builtin_description *d,
33008 tree exp, rtx target, bool swap)
33009 {
33010 rtx pat;
33011 tree arg0 = CALL_EXPR_ARG (exp, 0);
33012 tree arg1 = CALL_EXPR_ARG (exp, 1);
33013 rtx op0 = expand_normal (arg0);
33014 rtx op1 = expand_normal (arg1);
33015 rtx op2;
33016 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
33017 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
33018 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
33019 enum rtx_code comparison = d->comparison;
33020
33021 if (VECTOR_MODE_P (mode0))
33022 op0 = safe_vector_operand (op0, mode0);
33023 if (VECTOR_MODE_P (mode1))
33024 op1 = safe_vector_operand (op1, mode1);
33025
33026 /* Swap operands if we have a comparison that isn't available in
33027 hardware. */
33028 if (swap)
33029 {
33030 rtx tmp = gen_reg_rtx (mode1);
33031 emit_move_insn (tmp, op1);
33032 op1 = op0;
33033 op0 = tmp;
33034 }
33035
33036 if (optimize || !target
33037 || GET_MODE (target) != tmode
33038 || !insn_data[d->icode].operand[0].predicate (target, tmode))
33039 target = gen_reg_rtx (tmode);
33040
33041 if ((optimize && !register_operand (op0, mode0))
33042 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
33043 op0 = copy_to_mode_reg (mode0, op0);
33044 if ((optimize && !register_operand (op1, mode1))
33045 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
33046 op1 = copy_to_mode_reg (mode1, op1);
33047
33048 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
33049 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
33050 if (! pat)
33051 return 0;
33052 emit_insn (pat);
33053 return target;
33054 }
33055
33056 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
33057
33058 static rtx
33059 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
33060 rtx target)
33061 {
33062 rtx pat;
33063 tree arg0 = CALL_EXPR_ARG (exp, 0);
33064 tree arg1 = CALL_EXPR_ARG (exp, 1);
33065 rtx op0 = expand_normal (arg0);
33066 rtx op1 = expand_normal (arg1);
33067 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
33068 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
33069 enum rtx_code comparison = d->comparison;
33070
33071 if (VECTOR_MODE_P (mode0))
33072 op0 = safe_vector_operand (op0, mode0);
33073 if (VECTOR_MODE_P (mode1))
33074 op1 = safe_vector_operand (op1, mode1);
33075
33076 /* Swap operands if we have a comparison that isn't available in
33077 hardware. */
33078 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
33079 {
33080 rtx tmp = op1;
33081 op1 = op0;
33082 op0 = tmp;
33083 }
33084
33085 target = gen_reg_rtx (SImode);
33086 emit_move_insn (target, const0_rtx);
33087 target = gen_rtx_SUBREG (QImode, target, 0);
33088
33089 if ((optimize && !register_operand (op0, mode0))
33090 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33091 op0 = copy_to_mode_reg (mode0, op0);
33092 if ((optimize && !register_operand (op1, mode1))
33093 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
33094 op1 = copy_to_mode_reg (mode1, op1);
33095
33096 pat = GEN_FCN (d->icode) (op0, op1);
33097 if (! pat)
33098 return 0;
33099 emit_insn (pat);
33100 emit_insn (gen_rtx_SET (VOIDmode,
33101 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33102 gen_rtx_fmt_ee (comparison, QImode,
33103 SET_DEST (pat),
33104 const0_rtx)));
33105
33106 return SUBREG_REG (target);
33107 }
33108
33109 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
33110
33111 static rtx
33112 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
33113 rtx target)
33114 {
33115 rtx pat;
33116 tree arg0 = CALL_EXPR_ARG (exp, 0);
33117 rtx op1, op0 = expand_normal (arg0);
33118 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
33119 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
33120
33121 if (optimize || target == 0
33122 || GET_MODE (target) != tmode
33123 || !insn_data[d->icode].operand[0].predicate (target, tmode))
33124 target = gen_reg_rtx (tmode);
33125
33126 if (VECTOR_MODE_P (mode0))
33127 op0 = safe_vector_operand (op0, mode0);
33128
33129 if ((optimize && !register_operand (op0, mode0))
33130 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33131 op0 = copy_to_mode_reg (mode0, op0);
33132
33133 op1 = GEN_INT (d->comparison);
33134
33135 pat = GEN_FCN (d->icode) (target, op0, op1);
33136 if (! pat)
33137 return 0;
33138 emit_insn (pat);
33139 return target;
33140 }
33141
33142 static rtx
33143 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d,
33144 tree exp, rtx target)
33145 {
33146 rtx pat;
33147 tree arg0 = CALL_EXPR_ARG (exp, 0);
33148 tree arg1 = CALL_EXPR_ARG (exp, 1);
33149 rtx op0 = expand_normal (arg0);
33150 rtx op1 = expand_normal (arg1);
33151 rtx op2;
33152 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
33153 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
33154 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
33155
33156 if (optimize || target == 0
33157 || GET_MODE (target) != tmode
33158 || !insn_data[d->icode].operand[0].predicate (target, tmode))
33159 target = gen_reg_rtx (tmode);
33160
33161 op0 = safe_vector_operand (op0, mode0);
33162 op1 = safe_vector_operand (op1, mode1);
33163
33164 if ((optimize && !register_operand (op0, mode0))
33165 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33166 op0 = copy_to_mode_reg (mode0, op0);
33167 if ((optimize && !register_operand (op1, mode1))
33168 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
33169 op1 = copy_to_mode_reg (mode1, op1);
33170
33171 op2 = GEN_INT (d->comparison);
33172
33173 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
33174 if (! pat)
33175 return 0;
33176 emit_insn (pat);
33177 return target;
33178 }
33179
33180 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
33181
33182 static rtx
33183 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
33184 rtx target)
33185 {
33186 rtx pat;
33187 tree arg0 = CALL_EXPR_ARG (exp, 0);
33188 tree arg1 = CALL_EXPR_ARG (exp, 1);
33189 rtx op0 = expand_normal (arg0);
33190 rtx op1 = expand_normal (arg1);
33191 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
33192 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
33193 enum rtx_code comparison = d->comparison;
33194
33195 if (VECTOR_MODE_P (mode0))
33196 op0 = safe_vector_operand (op0, mode0);
33197 if (VECTOR_MODE_P (mode1))
33198 op1 = safe_vector_operand (op1, mode1);
33199
33200 target = gen_reg_rtx (SImode);
33201 emit_move_insn (target, const0_rtx);
33202 target = gen_rtx_SUBREG (QImode, target, 0);
33203
33204 if ((optimize && !register_operand (op0, mode0))
33205 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33206 op0 = copy_to_mode_reg (mode0, op0);
33207 if ((optimize && !register_operand (op1, mode1))
33208 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
33209 op1 = copy_to_mode_reg (mode1, op1);
33210
33211 pat = GEN_FCN (d->icode) (op0, op1);
33212 if (! pat)
33213 return 0;
33214 emit_insn (pat);
33215 emit_insn (gen_rtx_SET (VOIDmode,
33216 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33217 gen_rtx_fmt_ee (comparison, QImode,
33218 SET_DEST (pat),
33219 const0_rtx)));
33220
33221 return SUBREG_REG (target);
33222 }
33223
33224 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
33225
33226 static rtx
33227 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
33228 tree exp, rtx target)
33229 {
33230 rtx pat;
33231 tree arg0 = CALL_EXPR_ARG (exp, 0);
33232 tree arg1 = CALL_EXPR_ARG (exp, 1);
33233 tree arg2 = CALL_EXPR_ARG (exp, 2);
33234 tree arg3 = CALL_EXPR_ARG (exp, 3);
33235 tree arg4 = CALL_EXPR_ARG (exp, 4);
33236 rtx scratch0, scratch1;
33237 rtx op0 = expand_normal (arg0);
33238 rtx op1 = expand_normal (arg1);
33239 rtx op2 = expand_normal (arg2);
33240 rtx op3 = expand_normal (arg3);
33241 rtx op4 = expand_normal (arg4);
33242 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
33243
33244 tmode0 = insn_data[d->icode].operand[0].mode;
33245 tmode1 = insn_data[d->icode].operand[1].mode;
33246 modev2 = insn_data[d->icode].operand[2].mode;
33247 modei3 = insn_data[d->icode].operand[3].mode;
33248 modev4 = insn_data[d->icode].operand[4].mode;
33249 modei5 = insn_data[d->icode].operand[5].mode;
33250 modeimm = insn_data[d->icode].operand[6].mode;
33251
33252 if (VECTOR_MODE_P (modev2))
33253 op0 = safe_vector_operand (op0, modev2);
33254 if (VECTOR_MODE_P (modev4))
33255 op2 = safe_vector_operand (op2, modev4);
33256
33257 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
33258 op0 = copy_to_mode_reg (modev2, op0);
33259 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
33260 op1 = copy_to_mode_reg (modei3, op1);
33261 if ((optimize && !register_operand (op2, modev4))
33262 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
33263 op2 = copy_to_mode_reg (modev4, op2);
33264 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
33265 op3 = copy_to_mode_reg (modei5, op3);
33266
33267 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
33268 {
33269 error ("the fifth argument must be an 8-bit immediate");
33270 return const0_rtx;
33271 }
33272
33273 if (d->code == IX86_BUILTIN_PCMPESTRI128)
33274 {
33275 if (optimize || !target
33276 || GET_MODE (target) != tmode0
33277 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
33278 target = gen_reg_rtx (tmode0);
33279
33280 scratch1 = gen_reg_rtx (tmode1);
33281
33282 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
33283 }
33284 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
33285 {
33286 if (optimize || !target
33287 || GET_MODE (target) != tmode1
33288 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
33289 target = gen_reg_rtx (tmode1);
33290
33291 scratch0 = gen_reg_rtx (tmode0);
33292
33293 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
33294 }
33295 else
33296 {
33297 gcc_assert (d->flag);
33298
33299 scratch0 = gen_reg_rtx (tmode0);
33300 scratch1 = gen_reg_rtx (tmode1);
33301
33302 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
33303 }
33304
33305 if (! pat)
33306 return 0;
33307
33308 emit_insn (pat);
33309
33310 if (d->flag)
33311 {
33312 target = gen_reg_rtx (SImode);
33313 emit_move_insn (target, const0_rtx);
33314 target = gen_rtx_SUBREG (QImode, target, 0);
33315
33316 emit_insn
33317 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33318 gen_rtx_fmt_ee (EQ, QImode,
33319 gen_rtx_REG ((enum machine_mode) d->flag,
33320 FLAGS_REG),
33321 const0_rtx)));
33322 return SUBREG_REG (target);
33323 }
33324 else
33325 return target;
33326 }
33327
33328
33329 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
33330
33331 static rtx
33332 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
33333 tree exp, rtx target)
33334 {
33335 rtx pat;
33336 tree arg0 = CALL_EXPR_ARG (exp, 0);
33337 tree arg1 = CALL_EXPR_ARG (exp, 1);
33338 tree arg2 = CALL_EXPR_ARG (exp, 2);
33339 rtx scratch0, scratch1;
33340 rtx op0 = expand_normal (arg0);
33341 rtx op1 = expand_normal (arg1);
33342 rtx op2 = expand_normal (arg2);
33343 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
33344
33345 tmode0 = insn_data[d->icode].operand[0].mode;
33346 tmode1 = insn_data[d->icode].operand[1].mode;
33347 modev2 = insn_data[d->icode].operand[2].mode;
33348 modev3 = insn_data[d->icode].operand[3].mode;
33349 modeimm = insn_data[d->icode].operand[4].mode;
33350
33351 if (VECTOR_MODE_P (modev2))
33352 op0 = safe_vector_operand (op0, modev2);
33353 if (VECTOR_MODE_P (modev3))
33354 op1 = safe_vector_operand (op1, modev3);
33355
33356 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
33357 op0 = copy_to_mode_reg (modev2, op0);
33358 if ((optimize && !register_operand (op1, modev3))
33359 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
33360 op1 = copy_to_mode_reg (modev3, op1);
33361
33362 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
33363 {
33364 error ("the third argument must be an 8-bit immediate");
33365 return const0_rtx;
33366 }
33367
33368 if (d->code == IX86_BUILTIN_PCMPISTRI128)
33369 {
33370 if (optimize || !target
33371 || GET_MODE (target) != tmode0
33372 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
33373 target = gen_reg_rtx (tmode0);
33374
33375 scratch1 = gen_reg_rtx (tmode1);
33376
33377 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
33378 }
33379 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
33380 {
33381 if (optimize || !target
33382 || GET_MODE (target) != tmode1
33383 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
33384 target = gen_reg_rtx (tmode1);
33385
33386 scratch0 = gen_reg_rtx (tmode0);
33387
33388 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
33389 }
33390 else
33391 {
33392 gcc_assert (d->flag);
33393
33394 scratch0 = gen_reg_rtx (tmode0);
33395 scratch1 = gen_reg_rtx (tmode1);
33396
33397 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
33398 }
33399
33400 if (! pat)
33401 return 0;
33402
33403 emit_insn (pat);
33404
33405 if (d->flag)
33406 {
33407 target = gen_reg_rtx (SImode);
33408 emit_move_insn (target, const0_rtx);
33409 target = gen_rtx_SUBREG (QImode, target, 0);
33410
33411 emit_insn
33412 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33413 gen_rtx_fmt_ee (EQ, QImode,
33414 gen_rtx_REG ((enum machine_mode) d->flag,
33415 FLAGS_REG),
33416 const0_rtx)));
33417 return SUBREG_REG (target);
33418 }
33419 else
33420 return target;
33421 }
33422
33423 /* Subroutine of ix86_expand_builtin to take care of insns with
33424 variable number of operands. */
33425
33426 static rtx
33427 ix86_expand_args_builtin (const struct builtin_description *d,
33428 tree exp, rtx target)
33429 {
33430 rtx pat, real_target;
33431 unsigned int i, nargs;
33432 unsigned int nargs_constant = 0;
33433 unsigned int mask_pos = 0;
33434 int num_memory = 0;
33435 struct
33436 {
33437 rtx op;
33438 enum machine_mode mode;
33439 } args[6];
33440 bool last_arg_count = false;
33441 enum insn_code icode = d->icode;
33442 const struct insn_data_d *insn_p = &insn_data[icode];
33443 enum machine_mode tmode = insn_p->operand[0].mode;
33444 enum machine_mode rmode = VOIDmode;
33445 bool swap = false;
33446 enum rtx_code comparison = d->comparison;
33447
33448 switch ((enum ix86_builtin_func_type) d->flag)
33449 {
33450 case V2DF_FTYPE_V2DF_ROUND:
33451 case V4DF_FTYPE_V4DF_ROUND:
33452 case V4SF_FTYPE_V4SF_ROUND:
33453 case V8SF_FTYPE_V8SF_ROUND:
33454 case V4SI_FTYPE_V4SF_ROUND:
33455 case V8SI_FTYPE_V8SF_ROUND:
33456 return ix86_expand_sse_round (d, exp, target);
33457 case V4SI_FTYPE_V2DF_V2DF_ROUND:
33458 case V8SI_FTYPE_V4DF_V4DF_ROUND:
33459 case V16SI_FTYPE_V8DF_V8DF_ROUND:
33460 return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
33461 case INT_FTYPE_V8SF_V8SF_PTEST:
33462 case INT_FTYPE_V4DI_V4DI_PTEST:
33463 case INT_FTYPE_V4DF_V4DF_PTEST:
33464 case INT_FTYPE_V4SF_V4SF_PTEST:
33465 case INT_FTYPE_V2DI_V2DI_PTEST:
33466 case INT_FTYPE_V2DF_V2DF_PTEST:
33467 return ix86_expand_sse_ptest (d, exp, target);
33468 case FLOAT128_FTYPE_FLOAT128:
33469 case FLOAT_FTYPE_FLOAT:
33470 case INT_FTYPE_INT:
33471 case UINT64_FTYPE_INT:
33472 case UINT16_FTYPE_UINT16:
33473 case INT64_FTYPE_INT64:
33474 case INT64_FTYPE_V4SF:
33475 case INT64_FTYPE_V2DF:
33476 case INT_FTYPE_V16QI:
33477 case INT_FTYPE_V8QI:
33478 case INT_FTYPE_V8SF:
33479 case INT_FTYPE_V4DF:
33480 case INT_FTYPE_V4SF:
33481 case INT_FTYPE_V2DF:
33482 case INT_FTYPE_V32QI:
33483 case V16QI_FTYPE_V16QI:
33484 case V8SI_FTYPE_V8SF:
33485 case V8SI_FTYPE_V4SI:
33486 case V8HI_FTYPE_V8HI:
33487 case V8HI_FTYPE_V16QI:
33488 case V8QI_FTYPE_V8QI:
33489 case V8SF_FTYPE_V8SF:
33490 case V8SF_FTYPE_V8SI:
33491 case V8SF_FTYPE_V4SF:
33492 case V8SF_FTYPE_V8HI:
33493 case V4SI_FTYPE_V4SI:
33494 case V4SI_FTYPE_V16QI:
33495 case V4SI_FTYPE_V4SF:
33496 case V4SI_FTYPE_V8SI:
33497 case V4SI_FTYPE_V8HI:
33498 case V4SI_FTYPE_V4DF:
33499 case V4SI_FTYPE_V2DF:
33500 case V4HI_FTYPE_V4HI:
33501 case V4DF_FTYPE_V4DF:
33502 case V4DF_FTYPE_V4SI:
33503 case V4DF_FTYPE_V4SF:
33504 case V4DF_FTYPE_V2DF:
33505 case V4SF_FTYPE_V4SF:
33506 case V4SF_FTYPE_V4SI:
33507 case V4SF_FTYPE_V8SF:
33508 case V4SF_FTYPE_V4DF:
33509 case V4SF_FTYPE_V8HI:
33510 case V4SF_FTYPE_V2DF:
33511 case V2DI_FTYPE_V2DI:
33512 case V2DI_FTYPE_V16QI:
33513 case V2DI_FTYPE_V8HI:
33514 case V2DI_FTYPE_V4SI:
33515 case V2DF_FTYPE_V2DF:
33516 case V2DF_FTYPE_V4SI:
33517 case V2DF_FTYPE_V4DF:
33518 case V2DF_FTYPE_V4SF:
33519 case V2DF_FTYPE_V2SI:
33520 case V2SI_FTYPE_V2SI:
33521 case V2SI_FTYPE_V4SF:
33522 case V2SI_FTYPE_V2SF:
33523 case V2SI_FTYPE_V2DF:
33524 case V2SF_FTYPE_V2SF:
33525 case V2SF_FTYPE_V2SI:
33526 case V32QI_FTYPE_V32QI:
33527 case V32QI_FTYPE_V16QI:
33528 case V16HI_FTYPE_V16HI:
33529 case V16HI_FTYPE_V8HI:
33530 case V8SI_FTYPE_V8SI:
33531 case V16HI_FTYPE_V16QI:
33532 case V8SI_FTYPE_V16QI:
33533 case V4DI_FTYPE_V16QI:
33534 case V8SI_FTYPE_V8HI:
33535 case V4DI_FTYPE_V8HI:
33536 case V4DI_FTYPE_V4SI:
33537 case V4DI_FTYPE_V2DI:
33538 case HI_FTYPE_HI:
33539 case UINT_FTYPE_V2DF:
33540 case UINT_FTYPE_V4SF:
33541 case UINT64_FTYPE_V2DF:
33542 case UINT64_FTYPE_V4SF:
33543 case V16QI_FTYPE_V8DI:
33544 case V16HI_FTYPE_V16SI:
33545 case V16SI_FTYPE_HI:
33546 case V16SI_FTYPE_V16SI:
33547 case V16SI_FTYPE_INT:
33548 case V16SF_FTYPE_FLOAT:
33549 case V16SF_FTYPE_V4SF:
33550 case V16SF_FTYPE_V16SF:
33551 case V8HI_FTYPE_V8DI:
33552 case V8UHI_FTYPE_V8UHI:
33553 case V8SI_FTYPE_V8DI:
33554 case V8USI_FTYPE_V8USI:
33555 case V8SF_FTYPE_V8DF:
33556 case V8DI_FTYPE_QI:
33557 case V8DI_FTYPE_INT64:
33558 case V8DI_FTYPE_V4DI:
33559 case V8DI_FTYPE_V8DI:
33560 case V8DF_FTYPE_DOUBLE:
33561 case V8DF_FTYPE_V4DF:
33562 case V8DF_FTYPE_V8DF:
33563 case V8DF_FTYPE_V8SI:
33564 nargs = 1;
33565 break;
33566 case V4SF_FTYPE_V4SF_VEC_MERGE:
33567 case V2DF_FTYPE_V2DF_VEC_MERGE:
33568 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
33569 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
33570 case V16QI_FTYPE_V16QI_V16QI:
33571 case V16QI_FTYPE_V8HI_V8HI:
33572 case V16SI_FTYPE_V16SI_V16SI:
33573 case V16SF_FTYPE_V16SF_V16SF:
33574 case V16SF_FTYPE_V16SF_V16SI:
33575 case V8QI_FTYPE_V8QI_V8QI:
33576 case V8QI_FTYPE_V4HI_V4HI:
33577 case V8HI_FTYPE_V8HI_V8HI:
33578 case V8HI_FTYPE_V16QI_V16QI:
33579 case V8HI_FTYPE_V4SI_V4SI:
33580 case V8SF_FTYPE_V8SF_V8SF:
33581 case V8SF_FTYPE_V8SF_V8SI:
33582 case V8DI_FTYPE_V8DI_V8DI:
33583 case V8DF_FTYPE_V8DF_V8DF:
33584 case V8DF_FTYPE_V8DF_V8DI:
33585 case V4SI_FTYPE_V4SI_V4SI:
33586 case V4SI_FTYPE_V8HI_V8HI:
33587 case V4SI_FTYPE_V4SF_V4SF:
33588 case V4SI_FTYPE_V2DF_V2DF:
33589 case V4HI_FTYPE_V4HI_V4HI:
33590 case V4HI_FTYPE_V8QI_V8QI:
33591 case V4HI_FTYPE_V2SI_V2SI:
33592 case V4DF_FTYPE_V4DF_V4DF:
33593 case V4DF_FTYPE_V4DF_V4DI:
33594 case V4SF_FTYPE_V4SF_V4SF:
33595 case V4SF_FTYPE_V4SF_V4SI:
33596 case V4SF_FTYPE_V4SF_V2SI:
33597 case V4SF_FTYPE_V4SF_V2DF:
33598 case V4SF_FTYPE_V4SF_UINT:
33599 case V4SF_FTYPE_V4SF_UINT64:
33600 case V4SF_FTYPE_V4SF_DI:
33601 case V4SF_FTYPE_V4SF_SI:
33602 case V2DI_FTYPE_V2DI_V2DI:
33603 case V2DI_FTYPE_V16QI_V16QI:
33604 case V2DI_FTYPE_V4SI_V4SI:
33605 case V2UDI_FTYPE_V4USI_V4USI:
33606 case V2DI_FTYPE_V2DI_V16QI:
33607 case V2DI_FTYPE_V2DF_V2DF:
33608 case V2SI_FTYPE_V2SI_V2SI:
33609 case V2SI_FTYPE_V4HI_V4HI:
33610 case V2SI_FTYPE_V2SF_V2SF:
33611 case V2DF_FTYPE_V2DF_V2DF:
33612 case V2DF_FTYPE_V2DF_V4SF:
33613 case V2DF_FTYPE_V2DF_V2DI:
33614 case V2DF_FTYPE_V2DF_DI:
33615 case V2DF_FTYPE_V2DF_SI:
33616 case V2DF_FTYPE_V2DF_UINT:
33617 case V2DF_FTYPE_V2DF_UINT64:
33618 case V2SF_FTYPE_V2SF_V2SF:
33619 case V1DI_FTYPE_V1DI_V1DI:
33620 case V1DI_FTYPE_V8QI_V8QI:
33621 case V1DI_FTYPE_V2SI_V2SI:
33622 case V32QI_FTYPE_V16HI_V16HI:
33623 case V16HI_FTYPE_V8SI_V8SI:
33624 case V32QI_FTYPE_V32QI_V32QI:
33625 case V16HI_FTYPE_V32QI_V32QI:
33626 case V16HI_FTYPE_V16HI_V16HI:
33627 case V8SI_FTYPE_V4DF_V4DF:
33628 case V8SI_FTYPE_V8SI_V8SI:
33629 case V8SI_FTYPE_V16HI_V16HI:
33630 case V4DI_FTYPE_V4DI_V4DI:
33631 case V4DI_FTYPE_V8SI_V8SI:
33632 case V4UDI_FTYPE_V8USI_V8USI:
33633 case QI_FTYPE_V8DI_V8DI:
33634 case HI_FTYPE_V16SI_V16SI:
33635 if (comparison == UNKNOWN)
33636 return ix86_expand_binop_builtin (icode, exp, target);
33637 nargs = 2;
33638 break;
33639 case V4SF_FTYPE_V4SF_V4SF_SWAP:
33640 case V2DF_FTYPE_V2DF_V2DF_SWAP:
33641 gcc_assert (comparison != UNKNOWN);
33642 nargs = 2;
33643 swap = true;
33644 break;
33645 case V16HI_FTYPE_V16HI_V8HI_COUNT:
33646 case V16HI_FTYPE_V16HI_SI_COUNT:
33647 case V8SI_FTYPE_V8SI_V4SI_COUNT:
33648 case V8SI_FTYPE_V8SI_SI_COUNT:
33649 case V4DI_FTYPE_V4DI_V2DI_COUNT:
33650 case V4DI_FTYPE_V4DI_INT_COUNT:
33651 case V8HI_FTYPE_V8HI_V8HI_COUNT:
33652 case V8HI_FTYPE_V8HI_SI_COUNT:
33653 case V4SI_FTYPE_V4SI_V4SI_COUNT:
33654 case V4SI_FTYPE_V4SI_SI_COUNT:
33655 case V4HI_FTYPE_V4HI_V4HI_COUNT:
33656 case V4HI_FTYPE_V4HI_SI_COUNT:
33657 case V2DI_FTYPE_V2DI_V2DI_COUNT:
33658 case V2DI_FTYPE_V2DI_SI_COUNT:
33659 case V2SI_FTYPE_V2SI_V2SI_COUNT:
33660 case V2SI_FTYPE_V2SI_SI_COUNT:
33661 case V1DI_FTYPE_V1DI_V1DI_COUNT:
33662 case V1DI_FTYPE_V1DI_SI_COUNT:
33663 nargs = 2;
33664 last_arg_count = true;
33665 break;
33666 case UINT64_FTYPE_UINT64_UINT64:
33667 case UINT_FTYPE_UINT_UINT:
33668 case UINT_FTYPE_UINT_USHORT:
33669 case UINT_FTYPE_UINT_UCHAR:
33670 case UINT16_FTYPE_UINT16_INT:
33671 case UINT8_FTYPE_UINT8_INT:
33672 case HI_FTYPE_HI_HI:
33673 case V16SI_FTYPE_V8DF_V8DF:
33674 nargs = 2;
33675 break;
33676 case V2DI_FTYPE_V2DI_INT_CONVERT:
33677 nargs = 2;
33678 rmode = V1TImode;
33679 nargs_constant = 1;
33680 break;
33681 case V4DI_FTYPE_V4DI_INT_CONVERT:
33682 nargs = 2;
33683 rmode = V2TImode;
33684 nargs_constant = 1;
33685 break;
33686 case V8HI_FTYPE_V8HI_INT:
33687 case V8HI_FTYPE_V8SF_INT:
33688 case V16HI_FTYPE_V16SF_INT:
33689 case V8HI_FTYPE_V4SF_INT:
33690 case V8SF_FTYPE_V8SF_INT:
33691 case V4SF_FTYPE_V16SF_INT:
33692 case V16SF_FTYPE_V16SF_INT:
33693 case V4SI_FTYPE_V4SI_INT:
33694 case V4SI_FTYPE_V8SI_INT:
33695 case V4HI_FTYPE_V4HI_INT:
33696 case V4DF_FTYPE_V4DF_INT:
33697 case V4DF_FTYPE_V8DF_INT:
33698 case V4SF_FTYPE_V4SF_INT:
33699 case V4SF_FTYPE_V8SF_INT:
33700 case V2DI_FTYPE_V2DI_INT:
33701 case V2DF_FTYPE_V2DF_INT:
33702 case V2DF_FTYPE_V4DF_INT:
33703 case V16HI_FTYPE_V16HI_INT:
33704 case V8SI_FTYPE_V8SI_INT:
33705 case V16SI_FTYPE_V16SI_INT:
33706 case V4SI_FTYPE_V16SI_INT:
33707 case V4DI_FTYPE_V4DI_INT:
33708 case V2DI_FTYPE_V4DI_INT:
33709 case V4DI_FTYPE_V8DI_INT:
33710 case HI_FTYPE_HI_INT:
33711 nargs = 2;
33712 nargs_constant = 1;
33713 break;
33714 case V16QI_FTYPE_V16QI_V16QI_V16QI:
33715 case V8SF_FTYPE_V8SF_V8SF_V8SF:
33716 case V4DF_FTYPE_V4DF_V4DF_V4DF:
33717 case V4SF_FTYPE_V4SF_V4SF_V4SF:
33718 case V2DF_FTYPE_V2DF_V2DF_V2DF:
33719 case V32QI_FTYPE_V32QI_V32QI_V32QI:
33720 case HI_FTYPE_V16SI_V16SI_HI:
33721 case QI_FTYPE_V8DI_V8DI_QI:
33722 case V16HI_FTYPE_V16SI_V16HI_HI:
33723 case V16QI_FTYPE_V16SI_V16QI_HI:
33724 case V16QI_FTYPE_V8DI_V16QI_QI:
33725 case V16SF_FTYPE_V16SF_V16SF_HI:
33726 case V16SF_FTYPE_V16SF_V16SF_V16SF:
33727 case V16SF_FTYPE_V16SF_V16SI_V16SF:
33728 case V16SF_FTYPE_V16SI_V16SF_HI:
33729 case V16SF_FTYPE_V16SI_V16SF_V16SF:
33730 case V16SF_FTYPE_V4SF_V16SF_HI:
33731 case V16SI_FTYPE_SI_V16SI_HI:
33732 case V16SI_FTYPE_V16HI_V16SI_HI:
33733 case V16SI_FTYPE_V16QI_V16SI_HI:
33734 case V16SI_FTYPE_V16SF_V16SI_HI:
33735 case V16SI_FTYPE_V16SI_V16SI_HI:
33736 case V16SI_FTYPE_V16SI_V16SI_V16SI:
33737 case V16SI_FTYPE_V4SI_V16SI_HI:
33738 case V2DI_FTYPE_V2DI_V2DI_V2DI:
33739 case V4DI_FTYPE_V4DI_V4DI_V4DI:
33740 case V8DF_FTYPE_V2DF_V8DF_QI:
33741 case V8DF_FTYPE_V4DF_V8DF_QI:
33742 case V8DF_FTYPE_V8DF_V8DF_QI:
33743 case V8DF_FTYPE_V8DF_V8DF_V8DF:
33744 case V8DF_FTYPE_V8DF_V8DI_V8DF:
33745 case V8DF_FTYPE_V8DI_V8DF_V8DF:
33746 case V8DF_FTYPE_V8SF_V8DF_QI:
33747 case V8DF_FTYPE_V8SI_V8DF_QI:
33748 case V8DI_FTYPE_DI_V8DI_QI:
33749 case V8DI_FTYPE_V16QI_V8DI_QI:
33750 case V8DI_FTYPE_V2DI_V8DI_QI:
33751 case V8DI_FTYPE_V4DI_V8DI_QI:
33752 case V8DI_FTYPE_V8DI_V8DI_QI:
33753 case V8DI_FTYPE_V8DI_V8DI_V8DI:
33754 case V8DI_FTYPE_V8HI_V8DI_QI:
33755 case V8DI_FTYPE_V8SI_V8DI_QI:
33756 case V8HI_FTYPE_V8DI_V8HI_QI:
33757 case V8SF_FTYPE_V8DF_V8SF_QI:
33758 case V8SI_FTYPE_V8DF_V8SI_QI:
33759 case V8SI_FTYPE_V8DI_V8SI_QI:
33760 case V4SI_FTYPE_V4SI_V4SI_V4SI:
33761 nargs = 3;
33762 break;
33763 case V32QI_FTYPE_V32QI_V32QI_INT:
33764 case V16HI_FTYPE_V16HI_V16HI_INT:
33765 case V16QI_FTYPE_V16QI_V16QI_INT:
33766 case V4DI_FTYPE_V4DI_V4DI_INT:
33767 case V8HI_FTYPE_V8HI_V8HI_INT:
33768 case V8SI_FTYPE_V8SI_V8SI_INT:
33769 case V8SI_FTYPE_V8SI_V4SI_INT:
33770 case V8SF_FTYPE_V8SF_V8SF_INT:
33771 case V8SF_FTYPE_V8SF_V4SF_INT:
33772 case V4SI_FTYPE_V4SI_V4SI_INT:
33773 case V4DF_FTYPE_V4DF_V4DF_INT:
33774 case V16SF_FTYPE_V16SF_V16SF_INT:
33775 case V16SF_FTYPE_V16SF_V4SF_INT:
33776 case V16SI_FTYPE_V16SI_V4SI_INT:
33777 case V4DF_FTYPE_V4DF_V2DF_INT:
33778 case V4SF_FTYPE_V4SF_V4SF_INT:
33779 case V2DI_FTYPE_V2DI_V2DI_INT:
33780 case V4DI_FTYPE_V4DI_V2DI_INT:
33781 case V2DF_FTYPE_V2DF_V2DF_INT:
33782 case QI_FTYPE_V8DI_V8DI_INT:
33783 case QI_FTYPE_V8DF_V8DF_INT:
33784 case QI_FTYPE_V2DF_V2DF_INT:
33785 case QI_FTYPE_V4SF_V4SF_INT:
33786 case HI_FTYPE_V16SI_V16SI_INT:
33787 case HI_FTYPE_V16SF_V16SF_INT:
33788 nargs = 3;
33789 nargs_constant = 1;
33790 break;
33791 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
33792 nargs = 3;
33793 rmode = V4DImode;
33794 nargs_constant = 1;
33795 break;
33796 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
33797 nargs = 3;
33798 rmode = V2DImode;
33799 nargs_constant = 1;
33800 break;
33801 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
33802 nargs = 3;
33803 rmode = DImode;
33804 nargs_constant = 1;
33805 break;
33806 case V2DI_FTYPE_V2DI_UINT_UINT:
33807 nargs = 3;
33808 nargs_constant = 2;
33809 break;
33810 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI:
33811 case V16SF_FTYPE_V16SF_V16SI_V16SF_HI:
33812 case V16SF_FTYPE_V16SI_V16SF_V16SF_HI:
33813 case V16SI_FTYPE_V16SI_V16SI_V16SI_HI:
33814 case V16SI_FTYPE_V16SI_V4SI_V16SI_HI:
33815 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI:
33816 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI:
33817 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI:
33818 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI:
33819 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI:
33820 case V8DF_FTYPE_V8DF_V8DI_V8DF_QI:
33821 case V8DF_FTYPE_V8DI_V8DF_V8DF_QI:
33822 case V8DI_FTYPE_V16SI_V16SI_V8DI_QI:
33823 case V8DI_FTYPE_V8DI_SI_V8DI_V8DI:
33824 case V8DI_FTYPE_V8DI_V2DI_V8DI_QI:
33825 case V8DI_FTYPE_V8DI_V8DI_V8DI_QI:
33826 nargs = 4;
33827 break;
33828 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
33829 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
33830 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
33831 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
33832 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT:
33833 nargs = 4;
33834 nargs_constant = 1;
33835 break;
33836 case QI_FTYPE_V2DF_V2DF_INT_QI:
33837 case QI_FTYPE_V4SF_V4SF_INT_QI:
33838 nargs = 4;
33839 mask_pos = 1;
33840 nargs_constant = 1;
33841 break;
33842 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
33843 nargs = 4;
33844 nargs_constant = 2;
33845 break;
33846 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED:
33847 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG:
33848 nargs = 4;
33849 break;
33850 case QI_FTYPE_V8DI_V8DI_INT_QI:
33851 case HI_FTYPE_V16SI_V16SI_INT_HI:
33852 case QI_FTYPE_V8DF_V8DF_INT_QI:
33853 case HI_FTYPE_V16SF_V16SF_INT_HI:
33854 mask_pos = 1;
33855 nargs = 4;
33856 nargs_constant = 1;
33857 break;
33858 case V8DF_FTYPE_V8DF_INT_V8DF_QI:
33859 case V16SF_FTYPE_V16SF_INT_V16SF_HI:
33860 case V16HI_FTYPE_V16SF_INT_V16HI_HI:
33861 case V16SI_FTYPE_V16SI_INT_V16SI_HI:
33862 case V4SI_FTYPE_V16SI_INT_V4SI_QI:
33863 case V4DI_FTYPE_V8DI_INT_V4DI_QI:
33864 case V4DF_FTYPE_V8DF_INT_V4DF_QI:
33865 case V4SF_FTYPE_V16SF_INT_V4SF_QI:
33866 case V8DI_FTYPE_V8DI_INT_V8DI_QI:
33867 nargs = 4;
33868 mask_pos = 2;
33869 nargs_constant = 1;
33870 break;
33871 case V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI:
33872 case V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI:
33873 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI:
33874 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI:
33875 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI:
33876 case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI:
33877 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI:
33878 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI:
33879 case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI:
33880 case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI:
33881 nargs = 5;
33882 mask_pos = 2;
33883 nargs_constant = 1;
33884 break;
33885 case V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI:
33886 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI:
33887 case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI:
33888 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI:
33889 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI:
33890 nargs = 5;
33891 mask_pos = 1;
33892 nargs_constant = 1;
33893 break;
33894
33895 default:
33896 gcc_unreachable ();
33897 }
33898
33899 gcc_assert (nargs <= ARRAY_SIZE (args));
33900
33901 if (comparison != UNKNOWN)
33902 {
33903 gcc_assert (nargs == 2);
33904 return ix86_expand_sse_compare (d, exp, target, swap);
33905 }
33906
33907 if (rmode == VOIDmode || rmode == tmode)
33908 {
33909 if (optimize
33910 || target == 0
33911 || GET_MODE (target) != tmode
33912 || !insn_p->operand[0].predicate (target, tmode))
33913 target = gen_reg_rtx (tmode);
33914 real_target = target;
33915 }
33916 else
33917 {
33918 real_target = gen_reg_rtx (tmode);
33919 target = simplify_gen_subreg (rmode, real_target, tmode, 0);
33920 }
33921
33922 for (i = 0; i < nargs; i++)
33923 {
33924 tree arg = CALL_EXPR_ARG (exp, i);
33925 rtx op = expand_normal (arg);
33926 enum machine_mode mode = insn_p->operand[i + 1].mode;
33927 bool match = insn_p->operand[i + 1].predicate (op, mode);
33928
33929 if (last_arg_count && (i + 1) == nargs)
33930 {
33931 /* SIMD shift insns take either an 8-bit immediate or
33932 register as count. But builtin functions take int as
33933 count. If count doesn't match, we put it in register. */
33934 if (!match)
33935 {
33936 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
33937 if (!insn_p->operand[i + 1].predicate (op, mode))
33938 op = copy_to_reg (op);
33939 }
33940 }
33941 else if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
33942 (!mask_pos && (nargs - i) <= nargs_constant))
33943 {
33944 if (!match)
33945 switch (icode)
33946 {
33947 case CODE_FOR_avx2_inserti128:
33948 case CODE_FOR_avx2_extracti128:
33949 error ("the last argument must be an 1-bit immediate");
33950 return const0_rtx;
33951
33952 case CODE_FOR_avx512f_cmpv8di3_mask:
33953 case CODE_FOR_avx512f_cmpv16si3_mask:
33954 case CODE_FOR_avx512f_ucmpv8di3_mask:
33955 case CODE_FOR_avx512f_ucmpv16si3_mask:
33956 error ("the last argument must be a 3-bit immediate");
33957 return const0_rtx;
33958
33959 case CODE_FOR_sse4_1_roundsd:
33960 case CODE_FOR_sse4_1_roundss:
33961
33962 case CODE_FOR_sse4_1_roundpd:
33963 case CODE_FOR_sse4_1_roundps:
33964 case CODE_FOR_avx_roundpd256:
33965 case CODE_FOR_avx_roundps256:
33966
33967 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix:
33968 case CODE_FOR_sse4_1_roundps_sfix:
33969 case CODE_FOR_avx_roundpd_vec_pack_sfix256:
33970 case CODE_FOR_avx_roundps_sfix256:
33971
33972 case CODE_FOR_sse4_1_blendps:
33973 case CODE_FOR_avx_blendpd256:
33974 case CODE_FOR_avx_vpermilv4df:
33975 case CODE_FOR_avx512f_getmantv8df_mask:
33976 case CODE_FOR_avx512f_getmantv16sf_mask:
33977 error ("the last argument must be a 4-bit immediate");
33978 return const0_rtx;
33979
33980 case CODE_FOR_sha1rnds4:
33981 case CODE_FOR_sse4_1_blendpd:
33982 case CODE_FOR_avx_vpermilv2df:
33983 case CODE_FOR_xop_vpermil2v2df3:
33984 case CODE_FOR_xop_vpermil2v4sf3:
33985 case CODE_FOR_xop_vpermil2v4df3:
33986 case CODE_FOR_xop_vpermil2v8sf3:
33987 case CODE_FOR_avx512f_vinsertf32x4_mask:
33988 case CODE_FOR_avx512f_vinserti32x4_mask:
33989 case CODE_FOR_avx512f_vextractf32x4_mask:
33990 case CODE_FOR_avx512f_vextracti32x4_mask:
33991 error ("the last argument must be a 2-bit immediate");
33992 return const0_rtx;
33993
33994 case CODE_FOR_avx_vextractf128v4df:
33995 case CODE_FOR_avx_vextractf128v8sf:
33996 case CODE_FOR_avx_vextractf128v8si:
33997 case CODE_FOR_avx_vinsertf128v4df:
33998 case CODE_FOR_avx_vinsertf128v8sf:
33999 case CODE_FOR_avx_vinsertf128v8si:
34000 case CODE_FOR_avx512f_vinsertf64x4_mask:
34001 case CODE_FOR_avx512f_vinserti64x4_mask:
34002 case CODE_FOR_avx512f_vextractf64x4_mask:
34003 case CODE_FOR_avx512f_vextracti64x4_mask:
34004 error ("the last argument must be a 1-bit immediate");
34005 return const0_rtx;
34006
34007 case CODE_FOR_avx_vmcmpv2df3:
34008 case CODE_FOR_avx_vmcmpv4sf3:
34009 case CODE_FOR_avx_cmpv2df3:
34010 case CODE_FOR_avx_cmpv4sf3:
34011 case CODE_FOR_avx_cmpv4df3:
34012 case CODE_FOR_avx_cmpv8sf3:
34013 case CODE_FOR_avx512f_cmpv8df3_mask:
34014 case CODE_FOR_avx512f_cmpv16sf3_mask:
34015 case CODE_FOR_avx512f_vmcmpv2df3_mask:
34016 case CODE_FOR_avx512f_vmcmpv4sf3_mask:
34017 error ("the last argument must be a 5-bit immediate");
34018 return const0_rtx;
34019
34020 default:
34021 switch (nargs_constant)
34022 {
34023 case 2:
34024 if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
34025 (!mask_pos && (nargs - i) == nargs_constant))
34026 {
34027 error ("the next to last argument must be an 8-bit immediate");
34028 break;
34029 }
34030 case 1:
34031 error ("the last argument must be an 8-bit immediate");
34032 break;
34033 default:
34034 gcc_unreachable ();
34035 }
34036 return const0_rtx;
34037 }
34038 }
34039 else
34040 {
34041 if (VECTOR_MODE_P (mode))
34042 op = safe_vector_operand (op, mode);
34043
34044 /* If we aren't optimizing, only allow one memory operand to
34045 be generated. */
34046 if (memory_operand (op, mode))
34047 num_memory++;
34048
34049 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34050 {
34051 if (optimize || !match || num_memory > 1)
34052 op = copy_to_mode_reg (mode, op);
34053 }
34054 else
34055 {
34056 op = copy_to_reg (op);
34057 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
34058 }
34059 }
34060
34061 args[i].op = op;
34062 args[i].mode = mode;
34063 }
34064
34065 switch (nargs)
34066 {
34067 case 1:
34068 pat = GEN_FCN (icode) (real_target, args[0].op);
34069 break;
34070 case 2:
34071 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
34072 break;
34073 case 3:
34074 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34075 args[2].op);
34076 break;
34077 case 4:
34078 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34079 args[2].op, args[3].op);
34080 break;
34081 case 5:
34082 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34083 args[2].op, args[3].op, args[4].op);
34084 case 6:
34085 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34086 args[2].op, args[3].op, args[4].op,
34087 args[5].op);
34088 break;
34089 default:
34090 gcc_unreachable ();
34091 }
34092
34093 if (! pat)
34094 return 0;
34095
34096 emit_insn (pat);
34097 return target;
34098 }
34099
34100 /* Transform pattern of following layout:
34101 (parallel [
34102 set (A B)
34103 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)])
34104 ])
34105 into:
34106 (set (A B))
34107
34108 Or:
34109 (parallel [ A B
34110 ...
34111 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)
34112 ...
34113 ])
34114 into:
34115 (parallel [ A B ... ]) */
34116
34117 static rtx
34118 ix86_erase_embedded_rounding (rtx pat)
34119 {
34120 if (GET_CODE (pat) == INSN)
34121 pat = PATTERN (pat);
34122
34123 gcc_assert (GET_CODE (pat) == PARALLEL);
34124
34125 if (XVECLEN (pat, 0) == 2)
34126 {
34127 rtx p0 = XVECEXP (pat, 0, 0);
34128 rtx p1 = XVECEXP (pat, 0, 1);
34129
34130 gcc_assert (GET_CODE (p0) == SET
34131 && GET_CODE (p1) == UNSPEC
34132 && XINT (p1, 1) == UNSPEC_EMBEDDED_ROUNDING);
34133
34134 return p0;
34135 }
34136 else
34137 {
34138 rtx *res = XALLOCAVEC (rtx, XVECLEN (pat, 0));
34139 int i = 0;
34140 int j = 0;
34141
34142 for (; i < XVECLEN (pat, 0); ++i)
34143 {
34144 rtx elem = XVECEXP (pat, 0, i);
34145 if (GET_CODE (elem) != UNSPEC
34146 || XINT (elem, 1) != UNSPEC_EMBEDDED_ROUNDING)
34147 res [j++] = elem;
34148 }
34149
34150 /* No more than 1 occurence was removed. */
34151 gcc_assert (j >= XVECLEN (pat, 0) - 1);
34152
34153 return gen_rtx_PARALLEL (GET_MODE (pat), gen_rtvec_v (j, res));
34154 }
34155 }
34156
34157 /* Subroutine of ix86_expand_round_builtin to take care of comi insns
34158 with rounding. */
34159 static rtx
34160 ix86_expand_sse_comi_round (const struct builtin_description *d,
34161 tree exp, rtx target)
34162 {
34163 rtx pat, set_dst;
34164 tree arg0 = CALL_EXPR_ARG (exp, 0);
34165 tree arg1 = CALL_EXPR_ARG (exp, 1);
34166 tree arg2 = CALL_EXPR_ARG (exp, 2);
34167 tree arg3 = CALL_EXPR_ARG (exp, 3);
34168 rtx op0 = expand_normal (arg0);
34169 rtx op1 = expand_normal (arg1);
34170 rtx op2 = expand_normal (arg2);
34171 rtx op3 = expand_normal (arg3);
34172 enum insn_code icode = d->icode;
34173 const struct insn_data_d *insn_p = &insn_data[icode];
34174 enum machine_mode mode0 = insn_p->operand[0].mode;
34175 enum machine_mode mode1 = insn_p->operand[1].mode;
34176 enum rtx_code comparison = UNEQ;
34177 bool need_ucomi = false;
34178
34179 /* See avxintrin.h for values. */
34180 enum rtx_code comi_comparisons[32] =
34181 {
34182 UNEQ, GT, GE, UNORDERED, LTGT, UNLE, UNLT, ORDERED, UNEQ, UNLT,
34183 UNLE, LT, LTGT, GE, GT, LT, UNEQ, GT, GE, UNORDERED, LTGT, UNLE,
34184 UNLT, ORDERED, UNEQ, UNLT, UNLE, LT, LTGT, GE, GT, LT
34185 };
34186 bool need_ucomi_values[32] =
34187 {
34188 true, false, false, true, true, false, false, true,
34189 true, false, false, true, true, false, false, true,
34190 false, true, true, false, false, true, true, false,
34191 false, true, true, false, false, true, true, false
34192 };
34193
34194 if (!CONST_INT_P (op2))
34195 {
34196 error ("the third argument must be comparison constant");
34197 return const0_rtx;
34198 }
34199 if (INTVAL (op2) < 0 || INTVAL (op2) >= 32)
34200 {
34201 error ("incorect comparison mode");
34202 return const0_rtx;
34203 }
34204
34205 if (!insn_p->operand[2].predicate (op3, SImode))
34206 {
34207 error ("incorrect rounding operand");
34208 return const0_rtx;
34209 }
34210
34211 comparison = comi_comparisons[INTVAL (op2)];
34212 need_ucomi = need_ucomi_values[INTVAL (op2)];
34213
34214 if (VECTOR_MODE_P (mode0))
34215 op0 = safe_vector_operand (op0, mode0);
34216 if (VECTOR_MODE_P (mode1))
34217 op1 = safe_vector_operand (op1, mode1);
34218
34219 target = gen_reg_rtx (SImode);
34220 emit_move_insn (target, const0_rtx);
34221 target = gen_rtx_SUBREG (QImode, target, 0);
34222
34223 if ((optimize && !register_operand (op0, mode0))
34224 || !insn_p->operand[0].predicate (op0, mode0))
34225 op0 = copy_to_mode_reg (mode0, op0);
34226 if ((optimize && !register_operand (op1, mode1))
34227 || !insn_p->operand[1].predicate (op1, mode1))
34228 op1 = copy_to_mode_reg (mode1, op1);
34229
34230 if (need_ucomi)
34231 icode = icode == CODE_FOR_sse_comi_round
34232 ? CODE_FOR_sse_ucomi_round
34233 : CODE_FOR_sse2_ucomi_round;
34234
34235 pat = GEN_FCN (icode) (op0, op1, op3);
34236 if (! pat)
34237 return 0;
34238
34239 /* Rounding operand can be either NO_ROUND or ROUND_SAE at this point. */
34240 if (INTVAL (op3) == NO_ROUND)
34241 {
34242 pat = ix86_erase_embedded_rounding (pat);
34243 if (! pat)
34244 return 0;
34245
34246 set_dst = SET_DEST (pat);
34247 }
34248 else
34249 {
34250 gcc_assert (GET_CODE (XVECEXP (pat, 0, 0)) == SET);
34251 set_dst = SET_DEST (XVECEXP (pat, 0, 0));
34252 }
34253
34254 emit_insn (pat);
34255 emit_insn (gen_rtx_SET (VOIDmode,
34256 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
34257 gen_rtx_fmt_ee (comparison, QImode,
34258 set_dst,
34259 const0_rtx)));
34260
34261 return SUBREG_REG (target);
34262 }
34263
34264 static rtx
34265 ix86_expand_round_builtin (const struct builtin_description *d,
34266 tree exp, rtx target)
34267 {
34268 rtx pat;
34269 unsigned int i, nargs;
34270 struct
34271 {
34272 rtx op;
34273 enum machine_mode mode;
34274 } args[6];
34275 enum insn_code icode = d->icode;
34276 const struct insn_data_d *insn_p = &insn_data[icode];
34277 enum machine_mode tmode = insn_p->operand[0].mode;
34278 unsigned int nargs_constant = 0;
34279 unsigned int redundant_embed_rnd = 0;
34280
34281 switch ((enum ix86_builtin_func_type) d->flag)
34282 {
34283 case UINT64_FTYPE_V2DF_INT:
34284 case UINT64_FTYPE_V4SF_INT:
34285 case UINT_FTYPE_V2DF_INT:
34286 case UINT_FTYPE_V4SF_INT:
34287 case INT64_FTYPE_V2DF_INT:
34288 case INT64_FTYPE_V4SF_INT:
34289 case INT_FTYPE_V2DF_INT:
34290 case INT_FTYPE_V4SF_INT:
34291 nargs = 2;
34292 break;
34293 case V4SF_FTYPE_V4SF_UINT_INT:
34294 case V4SF_FTYPE_V4SF_UINT64_INT:
34295 case V2DF_FTYPE_V2DF_UINT64_INT:
34296 case V4SF_FTYPE_V4SF_INT_INT:
34297 case V4SF_FTYPE_V4SF_INT64_INT:
34298 case V2DF_FTYPE_V2DF_INT64_INT:
34299 case V4SF_FTYPE_V4SF_V4SF_INT:
34300 case V2DF_FTYPE_V2DF_V2DF_INT:
34301 case V4SF_FTYPE_V4SF_V2DF_INT:
34302 case V2DF_FTYPE_V2DF_V4SF_INT:
34303 nargs = 3;
34304 break;
34305 case V8SF_FTYPE_V8DF_V8SF_QI_INT:
34306 case V8DF_FTYPE_V8DF_V8DF_QI_INT:
34307 case V8SI_FTYPE_V8DF_V8SI_QI_INT:
34308 case V16SF_FTYPE_V16SF_V16SF_HI_INT:
34309 case V16SF_FTYPE_V16SI_V16SF_HI_INT:
34310 case V16SI_FTYPE_V16SF_V16SI_HI_INT:
34311 case V8DF_FTYPE_V8SF_V8DF_QI_INT:
34312 case V16SF_FTYPE_V16HI_V16SF_HI_INT:
34313 case V2DF_FTYPE_V2DF_V2DF_V2DF_INT:
34314 case V4SF_FTYPE_V4SF_V4SF_V4SF_INT:
34315 nargs = 4;
34316 break;
34317 case V4SF_FTYPE_V4SF_V4SF_INT_INT:
34318 case V2DF_FTYPE_V2DF_V2DF_INT_INT:
34319 nargs_constant = 2;
34320 nargs = 4;
34321 break;
34322 case INT_FTYPE_V4SF_V4SF_INT_INT:
34323 case INT_FTYPE_V2DF_V2DF_INT_INT:
34324 return ix86_expand_sse_comi_round (d, exp, target);
34325 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT:
34326 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT:
34327 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT:
34328 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT:
34329 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT:
34330 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT:
34331 nargs = 5;
34332 break;
34333 case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT:
34334 case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT:
34335 nargs_constant = 4;
34336 nargs = 5;
34337 break;
34338 case QI_FTYPE_V8DF_V8DF_INT_QI_INT:
34339 case QI_FTYPE_V2DF_V2DF_INT_QI_INT:
34340 case HI_FTYPE_V16SF_V16SF_INT_HI_INT:
34341 case QI_FTYPE_V4SF_V4SF_INT_QI_INT:
34342 nargs_constant = 3;
34343 nargs = 5;
34344 break;
34345 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT:
34346 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT:
34347 nargs = 6;
34348 nargs_constant = 4;
34349 break;
34350 case V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT:
34351 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT:
34352 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT:
34353 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT:
34354 nargs = 6;
34355 nargs_constant = 3;
34356 break;
34357 default:
34358 gcc_unreachable ();
34359 }
34360 gcc_assert (nargs <= ARRAY_SIZE (args));
34361
34362 if (optimize
34363 || target == 0
34364 || GET_MODE (target) != tmode
34365 || !insn_p->operand[0].predicate (target, tmode))
34366 target = gen_reg_rtx (tmode);
34367
34368 for (i = 0; i < nargs; i++)
34369 {
34370 tree arg = CALL_EXPR_ARG (exp, i);
34371 rtx op = expand_normal (arg);
34372 enum machine_mode mode = insn_p->operand[i + 1].mode;
34373 bool match = insn_p->operand[i + 1].predicate (op, mode);
34374
34375 if (i == nargs - nargs_constant)
34376 {
34377 if (!match)
34378 {
34379 switch (icode)
34380 {
34381 case CODE_FOR_avx512f_getmantv8df_mask_round:
34382 case CODE_FOR_avx512f_getmantv16sf_mask_round:
34383 case CODE_FOR_avx512f_getmantv2df_round:
34384 case CODE_FOR_avx512f_getmantv4sf_round:
34385 error ("the immediate argument must be a 4-bit immediate");
34386 return const0_rtx;
34387 case CODE_FOR_avx512f_cmpv8df3_mask_round:
34388 case CODE_FOR_avx512f_cmpv16sf3_mask_round:
34389 case CODE_FOR_avx512f_vmcmpv2df3_mask_round:
34390 case CODE_FOR_avx512f_vmcmpv4sf3_mask_round:
34391 error ("the immediate argument must be a 5-bit immediate");
34392 return const0_rtx;
34393 default:
34394 error ("the immediate argument must be an 8-bit immediate");
34395 return const0_rtx;
34396 }
34397 }
34398 }
34399 else if (i == nargs-1)
34400 {
34401 if (!insn_p->operand[nargs].predicate (op, SImode))
34402 {
34403 error ("incorrect rounding operand");
34404 return const0_rtx;
34405 }
34406
34407 /* If there is no rounding use normal version of the pattern. */
34408 if (INTVAL (op) == NO_ROUND)
34409 redundant_embed_rnd = 1;
34410 }
34411 else
34412 {
34413 if (VECTOR_MODE_P (mode))
34414 op = safe_vector_operand (op, mode);
34415
34416 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34417 {
34418 if (optimize || !match)
34419 op = copy_to_mode_reg (mode, op);
34420 }
34421 else
34422 {
34423 op = copy_to_reg (op);
34424 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
34425 }
34426 }
34427
34428 args[i].op = op;
34429 args[i].mode = mode;
34430 }
34431
34432 switch (nargs)
34433 {
34434 case 1:
34435 pat = GEN_FCN (icode) (target, args[0].op);
34436 break;
34437 case 2:
34438 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
34439 break;
34440 case 3:
34441 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34442 args[2].op);
34443 break;
34444 case 4:
34445 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34446 args[2].op, args[3].op);
34447 break;
34448 case 5:
34449 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34450 args[2].op, args[3].op, args[4].op);
34451 case 6:
34452 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34453 args[2].op, args[3].op, args[4].op,
34454 args[5].op);
34455 break;
34456 default:
34457 gcc_unreachable ();
34458 }
34459
34460 if (!pat)
34461 return 0;
34462
34463 if (redundant_embed_rnd)
34464 pat = ix86_erase_embedded_rounding (pat);
34465
34466 emit_insn (pat);
34467 return target;
34468 }
34469
34470 /* Subroutine of ix86_expand_builtin to take care of special insns
34471 with variable number of operands. */
34472
34473 static rtx
34474 ix86_expand_special_args_builtin (const struct builtin_description *d,
34475 tree exp, rtx target)
34476 {
34477 tree arg;
34478 rtx pat, op;
34479 unsigned int i, nargs, arg_adjust, memory;
34480 bool aligned_mem = false;
34481 struct
34482 {
34483 rtx op;
34484 enum machine_mode mode;
34485 } args[3];
34486 enum insn_code icode = d->icode;
34487 bool last_arg_constant = false;
34488 const struct insn_data_d *insn_p = &insn_data[icode];
34489 enum machine_mode tmode = insn_p->operand[0].mode;
34490 enum { load, store } klass;
34491
34492 switch ((enum ix86_builtin_func_type) d->flag)
34493 {
34494 case VOID_FTYPE_VOID:
34495 emit_insn (GEN_FCN (icode) (target));
34496 return 0;
34497 case VOID_FTYPE_UINT64:
34498 case VOID_FTYPE_UNSIGNED:
34499 nargs = 0;
34500 klass = store;
34501 memory = 0;
34502 break;
34503
34504 case INT_FTYPE_VOID:
34505 case UINT64_FTYPE_VOID:
34506 case UNSIGNED_FTYPE_VOID:
34507 nargs = 0;
34508 klass = load;
34509 memory = 0;
34510 break;
34511 case UINT64_FTYPE_PUNSIGNED:
34512 case V2DI_FTYPE_PV2DI:
34513 case V4DI_FTYPE_PV4DI:
34514 case V32QI_FTYPE_PCCHAR:
34515 case V16QI_FTYPE_PCCHAR:
34516 case V8SF_FTYPE_PCV4SF:
34517 case V8SF_FTYPE_PCFLOAT:
34518 case V4SF_FTYPE_PCFLOAT:
34519 case V4DF_FTYPE_PCV2DF:
34520 case V4DF_FTYPE_PCDOUBLE:
34521 case V2DF_FTYPE_PCDOUBLE:
34522 case VOID_FTYPE_PVOID:
34523 case V16SI_FTYPE_PV4SI:
34524 case V16SF_FTYPE_PV4SF:
34525 case V8DI_FTYPE_PV4DI:
34526 case V8DI_FTYPE_PV8DI:
34527 case V8DF_FTYPE_PV4DF:
34528 nargs = 1;
34529 klass = load;
34530 memory = 0;
34531 switch (icode)
34532 {
34533 case CODE_FOR_sse4_1_movntdqa:
34534 case CODE_FOR_avx2_movntdqa:
34535 case CODE_FOR_avx512f_movntdqa:
34536 aligned_mem = true;
34537 break;
34538 default:
34539 break;
34540 }
34541 break;
34542 case VOID_FTYPE_PV2SF_V4SF:
34543 case VOID_FTYPE_PV8DI_V8DI:
34544 case VOID_FTYPE_PV4DI_V4DI:
34545 case VOID_FTYPE_PV2DI_V2DI:
34546 case VOID_FTYPE_PCHAR_V32QI:
34547 case VOID_FTYPE_PCHAR_V16QI:
34548 case VOID_FTYPE_PFLOAT_V16SF:
34549 case VOID_FTYPE_PFLOAT_V8SF:
34550 case VOID_FTYPE_PFLOAT_V4SF:
34551 case VOID_FTYPE_PDOUBLE_V8DF:
34552 case VOID_FTYPE_PDOUBLE_V4DF:
34553 case VOID_FTYPE_PDOUBLE_V2DF:
34554 case VOID_FTYPE_PLONGLONG_LONGLONG:
34555 case VOID_FTYPE_PULONGLONG_ULONGLONG:
34556 case VOID_FTYPE_PINT_INT:
34557 nargs = 1;
34558 klass = store;
34559 /* Reserve memory operand for target. */
34560 memory = ARRAY_SIZE (args);
34561 switch (icode)
34562 {
34563 /* These builtins and instructions require the memory
34564 to be properly aligned. */
34565 case CODE_FOR_avx_movntv4di:
34566 case CODE_FOR_sse2_movntv2di:
34567 case CODE_FOR_avx_movntv8sf:
34568 case CODE_FOR_sse_movntv4sf:
34569 case CODE_FOR_sse4a_vmmovntv4sf:
34570 case CODE_FOR_avx_movntv4df:
34571 case CODE_FOR_sse2_movntv2df:
34572 case CODE_FOR_sse4a_vmmovntv2df:
34573 case CODE_FOR_sse2_movntidi:
34574 case CODE_FOR_sse_movntq:
34575 case CODE_FOR_sse2_movntisi:
34576 case CODE_FOR_avx512f_movntv16sf:
34577 case CODE_FOR_avx512f_movntv8df:
34578 case CODE_FOR_avx512f_movntv8di:
34579 aligned_mem = true;
34580 break;
34581 default:
34582 break;
34583 }
34584 break;
34585 case V4SF_FTYPE_V4SF_PCV2SF:
34586 case V2DF_FTYPE_V2DF_PCDOUBLE:
34587 nargs = 2;
34588 klass = load;
34589 memory = 1;
34590 break;
34591 case V8SF_FTYPE_PCV8SF_V8SI:
34592 case V4DF_FTYPE_PCV4DF_V4DI:
34593 case V4SF_FTYPE_PCV4SF_V4SI:
34594 case V2DF_FTYPE_PCV2DF_V2DI:
34595 case V8SI_FTYPE_PCV8SI_V8SI:
34596 case V4DI_FTYPE_PCV4DI_V4DI:
34597 case V4SI_FTYPE_PCV4SI_V4SI:
34598 case V2DI_FTYPE_PCV2DI_V2DI:
34599 nargs = 2;
34600 klass = load;
34601 memory = 0;
34602 break;
34603 case VOID_FTYPE_PV8DF_V8DF_QI:
34604 case VOID_FTYPE_PV16SF_V16SF_HI:
34605 case VOID_FTYPE_PV8DI_V8DI_QI:
34606 case VOID_FTYPE_PV16SI_V16SI_HI:
34607 switch (icode)
34608 {
34609 /* These builtins and instructions require the memory
34610 to be properly aligned. */
34611 case CODE_FOR_avx512f_storev16sf_mask:
34612 case CODE_FOR_avx512f_storev16si_mask:
34613 case CODE_FOR_avx512f_storev8df_mask:
34614 case CODE_FOR_avx512f_storev8di_mask:
34615 aligned_mem = true;
34616 break;
34617 default:
34618 break;
34619 }
34620 /* FALLTHRU */
34621 case VOID_FTYPE_PV8SF_V8SI_V8SF:
34622 case VOID_FTYPE_PV4DF_V4DI_V4DF:
34623 case VOID_FTYPE_PV4SF_V4SI_V4SF:
34624 case VOID_FTYPE_PV2DF_V2DI_V2DF:
34625 case VOID_FTYPE_PV8SI_V8SI_V8SI:
34626 case VOID_FTYPE_PV4DI_V4DI_V4DI:
34627 case VOID_FTYPE_PV4SI_V4SI_V4SI:
34628 case VOID_FTYPE_PV2DI_V2DI_V2DI:
34629 case VOID_FTYPE_PDOUBLE_V2DF_QI:
34630 case VOID_FTYPE_PFLOAT_V4SF_QI:
34631 case VOID_FTYPE_PV8SI_V8DI_QI:
34632 case VOID_FTYPE_PV8HI_V8DI_QI:
34633 case VOID_FTYPE_PV16HI_V16SI_HI:
34634 case VOID_FTYPE_PV16QI_V8DI_QI:
34635 case VOID_FTYPE_PV16QI_V16SI_HI:
34636 nargs = 2;
34637 klass = store;
34638 /* Reserve memory operand for target. */
34639 memory = ARRAY_SIZE (args);
34640 break;
34641 case V16SF_FTYPE_PCV16SF_V16SF_HI:
34642 case V16SI_FTYPE_PCV16SI_V16SI_HI:
34643 case V8DF_FTYPE_PCV8DF_V8DF_QI:
34644 case V8DI_FTYPE_PCV8DI_V8DI_QI:
34645 case V2DF_FTYPE_PCDOUBLE_V2DF_QI:
34646 case V4SF_FTYPE_PCFLOAT_V4SF_QI:
34647 nargs = 3;
34648 klass = load;
34649 memory = 0;
34650 switch (icode)
34651 {
34652 /* These builtins and instructions require the memory
34653 to be properly aligned. */
34654 case CODE_FOR_avx512f_loadv16sf_mask:
34655 case CODE_FOR_avx512f_loadv16si_mask:
34656 case CODE_FOR_avx512f_loadv8df_mask:
34657 case CODE_FOR_avx512f_loadv8di_mask:
34658 aligned_mem = true;
34659 break;
34660 default:
34661 break;
34662 }
34663 break;
34664 case VOID_FTYPE_UINT_UINT_UINT:
34665 case VOID_FTYPE_UINT64_UINT_UINT:
34666 case UCHAR_FTYPE_UINT_UINT_UINT:
34667 case UCHAR_FTYPE_UINT64_UINT_UINT:
34668 nargs = 3;
34669 klass = load;
34670 memory = ARRAY_SIZE (args);
34671 last_arg_constant = true;
34672 break;
34673 default:
34674 gcc_unreachable ();
34675 }
34676
34677 gcc_assert (nargs <= ARRAY_SIZE (args));
34678
34679 if (klass == store)
34680 {
34681 arg = CALL_EXPR_ARG (exp, 0);
34682 op = expand_normal (arg);
34683 gcc_assert (target == 0);
34684 if (memory)
34685 {
34686 op = ix86_zero_extend_to_Pmode (op);
34687 target = gen_rtx_MEM (tmode, op);
34688 /* target at this point has just BITS_PER_UNIT MEM_ALIGN
34689 on it. Try to improve it using get_pointer_alignment,
34690 and if the special builtin is one that requires strict
34691 mode alignment, also from it's GET_MODE_ALIGNMENT.
34692 Failure to do so could lead to ix86_legitimate_combined_insn
34693 rejecting all changes to such insns. */
34694 unsigned int align = get_pointer_alignment (arg);
34695 if (aligned_mem && align < GET_MODE_ALIGNMENT (tmode))
34696 align = GET_MODE_ALIGNMENT (tmode);
34697 if (MEM_ALIGN (target) < align)
34698 set_mem_align (target, align);
34699 }
34700 else
34701 target = force_reg (tmode, op);
34702 arg_adjust = 1;
34703 }
34704 else
34705 {
34706 arg_adjust = 0;
34707 if (optimize
34708 || target == 0
34709 || !register_operand (target, tmode)
34710 || GET_MODE (target) != tmode)
34711 target = gen_reg_rtx (tmode);
34712 }
34713
34714 for (i = 0; i < nargs; i++)
34715 {
34716 enum machine_mode mode = insn_p->operand[i + 1].mode;
34717 bool match;
34718
34719 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
34720 op = expand_normal (arg);
34721 match = insn_p->operand[i + 1].predicate (op, mode);
34722
34723 if (last_arg_constant && (i + 1) == nargs)
34724 {
34725 if (!match)
34726 {
34727 if (icode == CODE_FOR_lwp_lwpvalsi3
34728 || icode == CODE_FOR_lwp_lwpinssi3
34729 || icode == CODE_FOR_lwp_lwpvaldi3
34730 || icode == CODE_FOR_lwp_lwpinsdi3)
34731 error ("the last argument must be a 32-bit immediate");
34732 else
34733 error ("the last argument must be an 8-bit immediate");
34734 return const0_rtx;
34735 }
34736 }
34737 else
34738 {
34739 if (i == memory)
34740 {
34741 /* This must be the memory operand. */
34742 op = ix86_zero_extend_to_Pmode (op);
34743 op = gen_rtx_MEM (mode, op);
34744 /* op at this point has just BITS_PER_UNIT MEM_ALIGN
34745 on it. Try to improve it using get_pointer_alignment,
34746 and if the special builtin is one that requires strict
34747 mode alignment, also from it's GET_MODE_ALIGNMENT.
34748 Failure to do so could lead to ix86_legitimate_combined_insn
34749 rejecting all changes to such insns. */
34750 unsigned int align = get_pointer_alignment (arg);
34751 if (aligned_mem && align < GET_MODE_ALIGNMENT (mode))
34752 align = GET_MODE_ALIGNMENT (mode);
34753 if (MEM_ALIGN (op) < align)
34754 set_mem_align (op, align);
34755 }
34756 else
34757 {
34758 /* This must be register. */
34759 if (VECTOR_MODE_P (mode))
34760 op = safe_vector_operand (op, mode);
34761
34762 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34763 op = copy_to_mode_reg (mode, op);
34764 else
34765 {
34766 op = copy_to_reg (op);
34767 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
34768 }
34769 }
34770 }
34771
34772 args[i].op = op;
34773 args[i].mode = mode;
34774 }
34775
34776 switch (nargs)
34777 {
34778 case 0:
34779 pat = GEN_FCN (icode) (target);
34780 break;
34781 case 1:
34782 pat = GEN_FCN (icode) (target, args[0].op);
34783 break;
34784 case 2:
34785 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
34786 break;
34787 case 3:
34788 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
34789 break;
34790 default:
34791 gcc_unreachable ();
34792 }
34793
34794 if (! pat)
34795 return 0;
34796 emit_insn (pat);
34797 return klass == store ? 0 : target;
34798 }
34799
34800 /* Return the integer constant in ARG. Constrain it to be in the range
34801 of the subparts of VEC_TYPE; issue an error if not. */
34802
34803 static int
34804 get_element_number (tree vec_type, tree arg)
34805 {
34806 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
34807
34808 if (!tree_fits_uhwi_p (arg)
34809 || (elt = tree_to_uhwi (arg), elt > max))
34810 {
34811 error ("selector must be an integer constant in the range 0..%wi", max);
34812 return 0;
34813 }
34814
34815 return elt;
34816 }
34817
34818 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
34819 ix86_expand_vector_init. We DO have language-level syntax for this, in
34820 the form of (type){ init-list }. Except that since we can't place emms
34821 instructions from inside the compiler, we can't allow the use of MMX
34822 registers unless the user explicitly asks for it. So we do *not* define
34823 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
34824 we have builtins invoked by mmintrin.h that gives us license to emit
34825 these sorts of instructions. */
34826
34827 static rtx
34828 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
34829 {
34830 enum machine_mode tmode = TYPE_MODE (type);
34831 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
34832 int i, n_elt = GET_MODE_NUNITS (tmode);
34833 rtvec v = rtvec_alloc (n_elt);
34834
34835 gcc_assert (VECTOR_MODE_P (tmode));
34836 gcc_assert (call_expr_nargs (exp) == n_elt);
34837
34838 for (i = 0; i < n_elt; ++i)
34839 {
34840 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
34841 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
34842 }
34843
34844 if (!target || !register_operand (target, tmode))
34845 target = gen_reg_rtx (tmode);
34846
34847 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
34848 return target;
34849 }
34850
34851 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
34852 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
34853 had a language-level syntax for referencing vector elements. */
34854
34855 static rtx
34856 ix86_expand_vec_ext_builtin (tree exp, rtx target)
34857 {
34858 enum machine_mode tmode, mode0;
34859 tree arg0, arg1;
34860 int elt;
34861 rtx op0;
34862
34863 arg0 = CALL_EXPR_ARG (exp, 0);
34864 arg1 = CALL_EXPR_ARG (exp, 1);
34865
34866 op0 = expand_normal (arg0);
34867 elt = get_element_number (TREE_TYPE (arg0), arg1);
34868
34869 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
34870 mode0 = TYPE_MODE (TREE_TYPE (arg0));
34871 gcc_assert (VECTOR_MODE_P (mode0));
34872
34873 op0 = force_reg (mode0, op0);
34874
34875 if (optimize || !target || !register_operand (target, tmode))
34876 target = gen_reg_rtx (tmode);
34877
34878 ix86_expand_vector_extract (true, target, op0, elt);
34879
34880 return target;
34881 }
34882
34883 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
34884 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
34885 a language-level syntax for referencing vector elements. */
34886
34887 static rtx
34888 ix86_expand_vec_set_builtin (tree exp)
34889 {
34890 enum machine_mode tmode, mode1;
34891 tree arg0, arg1, arg2;
34892 int elt;
34893 rtx op0, op1, target;
34894
34895 arg0 = CALL_EXPR_ARG (exp, 0);
34896 arg1 = CALL_EXPR_ARG (exp, 1);
34897 arg2 = CALL_EXPR_ARG (exp, 2);
34898
34899 tmode = TYPE_MODE (TREE_TYPE (arg0));
34900 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
34901 gcc_assert (VECTOR_MODE_P (tmode));
34902
34903 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
34904 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
34905 elt = get_element_number (TREE_TYPE (arg0), arg2);
34906
34907 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
34908 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
34909
34910 op0 = force_reg (tmode, op0);
34911 op1 = force_reg (mode1, op1);
34912
34913 /* OP0 is the source of these builtin functions and shouldn't be
34914 modified. Create a copy, use it and return it as target. */
34915 target = gen_reg_rtx (tmode);
34916 emit_move_insn (target, op0);
34917 ix86_expand_vector_set (true, target, op1, elt);
34918
34919 return target;
34920 }
34921
34922 /* Expand an expression EXP that calls a built-in function,
34923 with result going to TARGET if that's convenient
34924 (and in mode MODE if that's convenient).
34925 SUBTARGET may be used as the target for computing one of EXP's operands.
34926 IGNORE is nonzero if the value is to be ignored. */
34927
34928 static rtx
34929 ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
34930 enum machine_mode mode, int ignore)
34931 {
34932 const struct builtin_description *d;
34933 size_t i;
34934 enum insn_code icode;
34935 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
34936 tree arg0, arg1, arg2, arg3, arg4;
34937 rtx op0, op1, op2, op3, op4, pat, insn;
34938 enum machine_mode mode0, mode1, mode2, mode3, mode4;
34939 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
34940
34941 /* For CPU builtins that can be folded, fold first and expand the fold. */
34942 switch (fcode)
34943 {
34944 case IX86_BUILTIN_CPU_INIT:
34945 {
34946 /* Make it call __cpu_indicator_init in libgcc. */
34947 tree call_expr, fndecl, type;
34948 type = build_function_type_list (integer_type_node, NULL_TREE);
34949 fndecl = build_fn_decl ("__cpu_indicator_init", type);
34950 call_expr = build_call_expr (fndecl, 0);
34951 return expand_expr (call_expr, target, mode, EXPAND_NORMAL);
34952 }
34953 case IX86_BUILTIN_CPU_IS:
34954 case IX86_BUILTIN_CPU_SUPPORTS:
34955 {
34956 tree arg0 = CALL_EXPR_ARG (exp, 0);
34957 tree fold_expr = fold_builtin_cpu (fndecl, &arg0);
34958 gcc_assert (fold_expr != NULL_TREE);
34959 return expand_expr (fold_expr, target, mode, EXPAND_NORMAL);
34960 }
34961 }
34962
34963 /* Determine whether the builtin function is available under the current ISA.
34964 Originally the builtin was not created if it wasn't applicable to the
34965 current ISA based on the command line switches. With function specific
34966 options, we need to check in the context of the function making the call
34967 whether it is supported. */
34968 if (ix86_builtins_isa[fcode].isa
34969 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
34970 {
34971 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
34972 NULL, (enum fpmath_unit) 0, false);
34973
34974 if (!opts)
34975 error ("%qE needs unknown isa option", fndecl);
34976 else
34977 {
34978 gcc_assert (opts != NULL);
34979 error ("%qE needs isa option %s", fndecl, opts);
34980 free (opts);
34981 }
34982 return const0_rtx;
34983 }
34984
34985 switch (fcode)
34986 {
34987 case IX86_BUILTIN_MASKMOVQ:
34988 case IX86_BUILTIN_MASKMOVDQU:
34989 icode = (fcode == IX86_BUILTIN_MASKMOVQ
34990 ? CODE_FOR_mmx_maskmovq
34991 : CODE_FOR_sse2_maskmovdqu);
34992 /* Note the arg order is different from the operand order. */
34993 arg1 = CALL_EXPR_ARG (exp, 0);
34994 arg2 = CALL_EXPR_ARG (exp, 1);
34995 arg0 = CALL_EXPR_ARG (exp, 2);
34996 op0 = expand_normal (arg0);
34997 op1 = expand_normal (arg1);
34998 op2 = expand_normal (arg2);
34999 mode0 = insn_data[icode].operand[0].mode;
35000 mode1 = insn_data[icode].operand[1].mode;
35001 mode2 = insn_data[icode].operand[2].mode;
35002
35003 op0 = ix86_zero_extend_to_Pmode (op0);
35004 op0 = gen_rtx_MEM (mode1, op0);
35005
35006 if (!insn_data[icode].operand[0].predicate (op0, mode0))
35007 op0 = copy_to_mode_reg (mode0, op0);
35008 if (!insn_data[icode].operand[1].predicate (op1, mode1))
35009 op1 = copy_to_mode_reg (mode1, op1);
35010 if (!insn_data[icode].operand[2].predicate (op2, mode2))
35011 op2 = copy_to_mode_reg (mode2, op2);
35012 pat = GEN_FCN (icode) (op0, op1, op2);
35013 if (! pat)
35014 return 0;
35015 emit_insn (pat);
35016 return 0;
35017
35018 case IX86_BUILTIN_LDMXCSR:
35019 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
35020 target = assign_386_stack_local (SImode, SLOT_TEMP);
35021 emit_move_insn (target, op0);
35022 emit_insn (gen_sse_ldmxcsr (target));
35023 return 0;
35024
35025 case IX86_BUILTIN_STMXCSR:
35026 target = assign_386_stack_local (SImode, SLOT_TEMP);
35027 emit_insn (gen_sse_stmxcsr (target));
35028 return copy_to_mode_reg (SImode, target);
35029
35030 case IX86_BUILTIN_CLFLUSH:
35031 arg0 = CALL_EXPR_ARG (exp, 0);
35032 op0 = expand_normal (arg0);
35033 icode = CODE_FOR_sse2_clflush;
35034 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35035 op0 = ix86_zero_extend_to_Pmode (op0);
35036
35037 emit_insn (gen_sse2_clflush (op0));
35038 return 0;
35039
35040 case IX86_BUILTIN_MONITOR:
35041 arg0 = CALL_EXPR_ARG (exp, 0);
35042 arg1 = CALL_EXPR_ARG (exp, 1);
35043 arg2 = CALL_EXPR_ARG (exp, 2);
35044 op0 = expand_normal (arg0);
35045 op1 = expand_normal (arg1);
35046 op2 = expand_normal (arg2);
35047 if (!REG_P (op0))
35048 op0 = ix86_zero_extend_to_Pmode (op0);
35049 if (!REG_P (op1))
35050 op1 = copy_to_mode_reg (SImode, op1);
35051 if (!REG_P (op2))
35052 op2 = copy_to_mode_reg (SImode, op2);
35053 emit_insn (ix86_gen_monitor (op0, op1, op2));
35054 return 0;
35055
35056 case IX86_BUILTIN_MWAIT:
35057 arg0 = CALL_EXPR_ARG (exp, 0);
35058 arg1 = CALL_EXPR_ARG (exp, 1);
35059 op0 = expand_normal (arg0);
35060 op1 = expand_normal (arg1);
35061 if (!REG_P (op0))
35062 op0 = copy_to_mode_reg (SImode, op0);
35063 if (!REG_P (op1))
35064 op1 = copy_to_mode_reg (SImode, op1);
35065 emit_insn (gen_sse3_mwait (op0, op1));
35066 return 0;
35067
35068 case IX86_BUILTIN_VEC_INIT_V2SI:
35069 case IX86_BUILTIN_VEC_INIT_V4HI:
35070 case IX86_BUILTIN_VEC_INIT_V8QI:
35071 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
35072
35073 case IX86_BUILTIN_VEC_EXT_V2DF:
35074 case IX86_BUILTIN_VEC_EXT_V2DI:
35075 case IX86_BUILTIN_VEC_EXT_V4SF:
35076 case IX86_BUILTIN_VEC_EXT_V4SI:
35077 case IX86_BUILTIN_VEC_EXT_V8HI:
35078 case IX86_BUILTIN_VEC_EXT_V2SI:
35079 case IX86_BUILTIN_VEC_EXT_V4HI:
35080 case IX86_BUILTIN_VEC_EXT_V16QI:
35081 return ix86_expand_vec_ext_builtin (exp, target);
35082
35083 case IX86_BUILTIN_VEC_SET_V2DI:
35084 case IX86_BUILTIN_VEC_SET_V4SF:
35085 case IX86_BUILTIN_VEC_SET_V4SI:
35086 case IX86_BUILTIN_VEC_SET_V8HI:
35087 case IX86_BUILTIN_VEC_SET_V4HI:
35088 case IX86_BUILTIN_VEC_SET_V16QI:
35089 return ix86_expand_vec_set_builtin (exp);
35090
35091 case IX86_BUILTIN_INFQ:
35092 case IX86_BUILTIN_HUGE_VALQ:
35093 {
35094 REAL_VALUE_TYPE inf;
35095 rtx tmp;
35096
35097 real_inf (&inf);
35098 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
35099
35100 tmp = validize_mem (force_const_mem (mode, tmp));
35101
35102 if (target == 0)
35103 target = gen_reg_rtx (mode);
35104
35105 emit_move_insn (target, tmp);
35106 return target;
35107 }
35108
35109 case IX86_BUILTIN_RDPMC:
35110 case IX86_BUILTIN_RDTSC:
35111 case IX86_BUILTIN_RDTSCP:
35112
35113 op0 = gen_reg_rtx (DImode);
35114 op1 = gen_reg_rtx (DImode);
35115
35116 if (fcode == IX86_BUILTIN_RDPMC)
35117 {
35118 arg0 = CALL_EXPR_ARG (exp, 0);
35119 op2 = expand_normal (arg0);
35120 if (!register_operand (op2, SImode))
35121 op2 = copy_to_mode_reg (SImode, op2);
35122
35123 insn = (TARGET_64BIT
35124 ? gen_rdpmc_rex64 (op0, op1, op2)
35125 : gen_rdpmc (op0, op2));
35126 emit_insn (insn);
35127 }
35128 else if (fcode == IX86_BUILTIN_RDTSC)
35129 {
35130 insn = (TARGET_64BIT
35131 ? gen_rdtsc_rex64 (op0, op1)
35132 : gen_rdtsc (op0));
35133 emit_insn (insn);
35134 }
35135 else
35136 {
35137 op2 = gen_reg_rtx (SImode);
35138
35139 insn = (TARGET_64BIT
35140 ? gen_rdtscp_rex64 (op0, op1, op2)
35141 : gen_rdtscp (op0, op2));
35142 emit_insn (insn);
35143
35144 arg0 = CALL_EXPR_ARG (exp, 0);
35145 op4 = expand_normal (arg0);
35146 if (!address_operand (op4, VOIDmode))
35147 {
35148 op4 = convert_memory_address (Pmode, op4);
35149 op4 = copy_addr_to_reg (op4);
35150 }
35151 emit_move_insn (gen_rtx_MEM (SImode, op4), op2);
35152 }
35153
35154 if (target == 0)
35155 {
35156 /* mode is VOIDmode if __builtin_rd* has been called
35157 without lhs. */
35158 if (mode == VOIDmode)
35159 return target;
35160 target = gen_reg_rtx (mode);
35161 }
35162
35163 if (TARGET_64BIT)
35164 {
35165 op1 = expand_simple_binop (DImode, ASHIFT, op1, GEN_INT (32),
35166 op1, 1, OPTAB_DIRECT);
35167 op0 = expand_simple_binop (DImode, IOR, op0, op1,
35168 op0, 1, OPTAB_DIRECT);
35169 }
35170
35171 emit_move_insn (target, op0);
35172 return target;
35173
35174 case IX86_BUILTIN_FXSAVE:
35175 case IX86_BUILTIN_FXRSTOR:
35176 case IX86_BUILTIN_FXSAVE64:
35177 case IX86_BUILTIN_FXRSTOR64:
35178 case IX86_BUILTIN_FNSTENV:
35179 case IX86_BUILTIN_FLDENV:
35180 case IX86_BUILTIN_FNSTSW:
35181 mode0 = BLKmode;
35182 switch (fcode)
35183 {
35184 case IX86_BUILTIN_FXSAVE:
35185 icode = CODE_FOR_fxsave;
35186 break;
35187 case IX86_BUILTIN_FXRSTOR:
35188 icode = CODE_FOR_fxrstor;
35189 break;
35190 case IX86_BUILTIN_FXSAVE64:
35191 icode = CODE_FOR_fxsave64;
35192 break;
35193 case IX86_BUILTIN_FXRSTOR64:
35194 icode = CODE_FOR_fxrstor64;
35195 break;
35196 case IX86_BUILTIN_FNSTENV:
35197 icode = CODE_FOR_fnstenv;
35198 break;
35199 case IX86_BUILTIN_FLDENV:
35200 icode = CODE_FOR_fldenv;
35201 break;
35202 case IX86_BUILTIN_FNSTSW:
35203 icode = CODE_FOR_fnstsw;
35204 mode0 = HImode;
35205 break;
35206 default:
35207 gcc_unreachable ();
35208 }
35209
35210 arg0 = CALL_EXPR_ARG (exp, 0);
35211 op0 = expand_normal (arg0);
35212
35213 if (!address_operand (op0, VOIDmode))
35214 {
35215 op0 = convert_memory_address (Pmode, op0);
35216 op0 = copy_addr_to_reg (op0);
35217 }
35218 op0 = gen_rtx_MEM (mode0, op0);
35219
35220 pat = GEN_FCN (icode) (op0);
35221 if (pat)
35222 emit_insn (pat);
35223 return 0;
35224
35225 case IX86_BUILTIN_XSAVE:
35226 case IX86_BUILTIN_XRSTOR:
35227 case IX86_BUILTIN_XSAVE64:
35228 case IX86_BUILTIN_XRSTOR64:
35229 case IX86_BUILTIN_XSAVEOPT:
35230 case IX86_BUILTIN_XSAVEOPT64:
35231 arg0 = CALL_EXPR_ARG (exp, 0);
35232 arg1 = CALL_EXPR_ARG (exp, 1);
35233 op0 = expand_normal (arg0);
35234 op1 = expand_normal (arg1);
35235
35236 if (!address_operand (op0, VOIDmode))
35237 {
35238 op0 = convert_memory_address (Pmode, op0);
35239 op0 = copy_addr_to_reg (op0);
35240 }
35241 op0 = gen_rtx_MEM (BLKmode, op0);
35242
35243 op1 = force_reg (DImode, op1);
35244
35245 if (TARGET_64BIT)
35246 {
35247 op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
35248 NULL, 1, OPTAB_DIRECT);
35249 switch (fcode)
35250 {
35251 case IX86_BUILTIN_XSAVE:
35252 icode = CODE_FOR_xsave_rex64;
35253 break;
35254 case IX86_BUILTIN_XRSTOR:
35255 icode = CODE_FOR_xrstor_rex64;
35256 break;
35257 case IX86_BUILTIN_XSAVE64:
35258 icode = CODE_FOR_xsave64;
35259 break;
35260 case IX86_BUILTIN_XRSTOR64:
35261 icode = CODE_FOR_xrstor64;
35262 break;
35263 case IX86_BUILTIN_XSAVEOPT:
35264 icode = CODE_FOR_xsaveopt_rex64;
35265 break;
35266 case IX86_BUILTIN_XSAVEOPT64:
35267 icode = CODE_FOR_xsaveopt64;
35268 break;
35269 default:
35270 gcc_unreachable ();
35271 }
35272
35273 op2 = gen_lowpart (SImode, op2);
35274 op1 = gen_lowpart (SImode, op1);
35275 pat = GEN_FCN (icode) (op0, op1, op2);
35276 }
35277 else
35278 {
35279 switch (fcode)
35280 {
35281 case IX86_BUILTIN_XSAVE:
35282 icode = CODE_FOR_xsave;
35283 break;
35284 case IX86_BUILTIN_XRSTOR:
35285 icode = CODE_FOR_xrstor;
35286 break;
35287 case IX86_BUILTIN_XSAVEOPT:
35288 icode = CODE_FOR_xsaveopt;
35289 break;
35290 default:
35291 gcc_unreachable ();
35292 }
35293 pat = GEN_FCN (icode) (op0, op1);
35294 }
35295
35296 if (pat)
35297 emit_insn (pat);
35298 return 0;
35299
35300 case IX86_BUILTIN_LLWPCB:
35301 arg0 = CALL_EXPR_ARG (exp, 0);
35302 op0 = expand_normal (arg0);
35303 icode = CODE_FOR_lwp_llwpcb;
35304 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35305 op0 = ix86_zero_extend_to_Pmode (op0);
35306 emit_insn (gen_lwp_llwpcb (op0));
35307 return 0;
35308
35309 case IX86_BUILTIN_SLWPCB:
35310 icode = CODE_FOR_lwp_slwpcb;
35311 if (!target
35312 || !insn_data[icode].operand[0].predicate (target, Pmode))
35313 target = gen_reg_rtx (Pmode);
35314 emit_insn (gen_lwp_slwpcb (target));
35315 return target;
35316
35317 case IX86_BUILTIN_BEXTRI32:
35318 case IX86_BUILTIN_BEXTRI64:
35319 arg0 = CALL_EXPR_ARG (exp, 0);
35320 arg1 = CALL_EXPR_ARG (exp, 1);
35321 op0 = expand_normal (arg0);
35322 op1 = expand_normal (arg1);
35323 icode = (fcode == IX86_BUILTIN_BEXTRI32
35324 ? CODE_FOR_tbm_bextri_si
35325 : CODE_FOR_tbm_bextri_di);
35326 if (!CONST_INT_P (op1))
35327 {
35328 error ("last argument must be an immediate");
35329 return const0_rtx;
35330 }
35331 else
35332 {
35333 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
35334 unsigned char lsb_index = INTVAL (op1) & 0xFF;
35335 op1 = GEN_INT (length);
35336 op2 = GEN_INT (lsb_index);
35337 pat = GEN_FCN (icode) (target, op0, op1, op2);
35338 if (pat)
35339 emit_insn (pat);
35340 return target;
35341 }
35342
35343 case IX86_BUILTIN_RDRAND16_STEP:
35344 icode = CODE_FOR_rdrandhi_1;
35345 mode0 = HImode;
35346 goto rdrand_step;
35347
35348 case IX86_BUILTIN_RDRAND32_STEP:
35349 icode = CODE_FOR_rdrandsi_1;
35350 mode0 = SImode;
35351 goto rdrand_step;
35352
35353 case IX86_BUILTIN_RDRAND64_STEP:
35354 icode = CODE_FOR_rdranddi_1;
35355 mode0 = DImode;
35356
35357 rdrand_step:
35358 op0 = gen_reg_rtx (mode0);
35359 emit_insn (GEN_FCN (icode) (op0));
35360
35361 arg0 = CALL_EXPR_ARG (exp, 0);
35362 op1 = expand_normal (arg0);
35363 if (!address_operand (op1, VOIDmode))
35364 {
35365 op1 = convert_memory_address (Pmode, op1);
35366 op1 = copy_addr_to_reg (op1);
35367 }
35368 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
35369
35370 op1 = gen_reg_rtx (SImode);
35371 emit_move_insn (op1, CONST1_RTX (SImode));
35372
35373 /* Emit SImode conditional move. */
35374 if (mode0 == HImode)
35375 {
35376 op2 = gen_reg_rtx (SImode);
35377 emit_insn (gen_zero_extendhisi2 (op2, op0));
35378 }
35379 else if (mode0 == SImode)
35380 op2 = op0;
35381 else
35382 op2 = gen_rtx_SUBREG (SImode, op0, 0);
35383
35384 if (target == 0)
35385 target = gen_reg_rtx (SImode);
35386
35387 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
35388 const0_rtx);
35389 emit_insn (gen_rtx_SET (VOIDmode, target,
35390 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
35391 return target;
35392
35393 case IX86_BUILTIN_RDSEED16_STEP:
35394 icode = CODE_FOR_rdseedhi_1;
35395 mode0 = HImode;
35396 goto rdseed_step;
35397
35398 case IX86_BUILTIN_RDSEED32_STEP:
35399 icode = CODE_FOR_rdseedsi_1;
35400 mode0 = SImode;
35401 goto rdseed_step;
35402
35403 case IX86_BUILTIN_RDSEED64_STEP:
35404 icode = CODE_FOR_rdseeddi_1;
35405 mode0 = DImode;
35406
35407 rdseed_step:
35408 op0 = gen_reg_rtx (mode0);
35409 emit_insn (GEN_FCN (icode) (op0));
35410
35411 arg0 = CALL_EXPR_ARG (exp, 0);
35412 op1 = expand_normal (arg0);
35413 if (!address_operand (op1, VOIDmode))
35414 {
35415 op1 = convert_memory_address (Pmode, op1);
35416 op1 = copy_addr_to_reg (op1);
35417 }
35418 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
35419
35420 op2 = gen_reg_rtx (QImode);
35421
35422 pat = gen_rtx_LTU (QImode, gen_rtx_REG (CCCmode, FLAGS_REG),
35423 const0_rtx);
35424 emit_insn (gen_rtx_SET (VOIDmode, op2, pat));
35425
35426 if (target == 0)
35427 target = gen_reg_rtx (SImode);
35428
35429 emit_insn (gen_zero_extendqisi2 (target, op2));
35430 return target;
35431
35432 case IX86_BUILTIN_ADDCARRYX32:
35433 icode = TARGET_ADX ? CODE_FOR_adcxsi3 : CODE_FOR_addsi3_carry;
35434 mode0 = SImode;
35435 goto addcarryx;
35436
35437 case IX86_BUILTIN_ADDCARRYX64:
35438 icode = TARGET_ADX ? CODE_FOR_adcxdi3 : CODE_FOR_adddi3_carry;
35439 mode0 = DImode;
35440
35441 addcarryx:
35442 arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in. */
35443 arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1. */
35444 arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2. */
35445 arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out. */
35446
35447 op0 = gen_reg_rtx (QImode);
35448
35449 /* Generate CF from input operand. */
35450 op1 = expand_normal (arg0);
35451 op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1));
35452 emit_insn (gen_addqi3_cc (op0, op1, constm1_rtx));
35453
35454 /* Gen ADCX instruction to compute X+Y+CF. */
35455 op2 = expand_normal (arg1);
35456 op3 = expand_normal (arg2);
35457
35458 if (!REG_P (op2))
35459 op2 = copy_to_mode_reg (mode0, op2);
35460 if (!REG_P (op3))
35461 op3 = copy_to_mode_reg (mode0, op3);
35462
35463 op0 = gen_reg_rtx (mode0);
35464
35465 op4 = gen_rtx_REG (CCCmode, FLAGS_REG);
35466 pat = gen_rtx_LTU (VOIDmode, op4, const0_rtx);
35467 emit_insn (GEN_FCN (icode) (op0, op2, op3, op4, pat));
35468
35469 /* Store the result. */
35470 op4 = expand_normal (arg3);
35471 if (!address_operand (op4, VOIDmode))
35472 {
35473 op4 = convert_memory_address (Pmode, op4);
35474 op4 = copy_addr_to_reg (op4);
35475 }
35476 emit_move_insn (gen_rtx_MEM (mode0, op4), op0);
35477
35478 /* Return current CF value. */
35479 if (target == 0)
35480 target = gen_reg_rtx (QImode);
35481
35482 PUT_MODE (pat, QImode);
35483 emit_insn (gen_rtx_SET (VOIDmode, target, pat));
35484 return target;
35485
35486 case IX86_BUILTIN_READ_FLAGS:
35487 emit_insn (gen_push (gen_rtx_REG (word_mode, FLAGS_REG)));
35488
35489 if (optimize
35490 || target == NULL_RTX
35491 || !nonimmediate_operand (target, word_mode)
35492 || GET_MODE (target) != word_mode)
35493 target = gen_reg_rtx (word_mode);
35494
35495 emit_insn (gen_pop (target));
35496 return target;
35497
35498 case IX86_BUILTIN_WRITE_FLAGS:
35499
35500 arg0 = CALL_EXPR_ARG (exp, 0);
35501 op0 = expand_normal (arg0);
35502 if (!general_no_elim_operand (op0, word_mode))
35503 op0 = copy_to_mode_reg (word_mode, op0);
35504
35505 emit_insn (gen_push (op0));
35506 emit_insn (gen_pop (gen_rtx_REG (word_mode, FLAGS_REG)));
35507 return 0;
35508
35509 case IX86_BUILTIN_KORTESTC16:
35510 icode = CODE_FOR_kortestchi;
35511 mode0 = HImode;
35512 mode1 = CCCmode;
35513 goto kortest;
35514
35515 case IX86_BUILTIN_KORTESTZ16:
35516 icode = CODE_FOR_kortestzhi;
35517 mode0 = HImode;
35518 mode1 = CCZmode;
35519
35520 kortest:
35521 arg0 = CALL_EXPR_ARG (exp, 0); /* Mask reg src1. */
35522 arg1 = CALL_EXPR_ARG (exp, 1); /* Mask reg src2. */
35523 op0 = expand_normal (arg0);
35524 op1 = expand_normal (arg1);
35525
35526 op0 = copy_to_reg (op0);
35527 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
35528 op1 = copy_to_reg (op1);
35529 op1 = simplify_gen_subreg (mode0, op1, GET_MODE (op1), 0);
35530
35531 target = gen_reg_rtx (QImode);
35532 emit_insn (gen_rtx_SET (mode0, target, const0_rtx));
35533
35534 /* Emit kortest. */
35535 emit_insn (GEN_FCN (icode) (op0, op1));
35536 /* And use setcc to return result from flags. */
35537 ix86_expand_setcc (target, EQ,
35538 gen_rtx_REG (mode1, FLAGS_REG), const0_rtx);
35539 return target;
35540
35541 case IX86_BUILTIN_GATHERSIV2DF:
35542 icode = CODE_FOR_avx2_gathersiv2df;
35543 goto gather_gen;
35544 case IX86_BUILTIN_GATHERSIV4DF:
35545 icode = CODE_FOR_avx2_gathersiv4df;
35546 goto gather_gen;
35547 case IX86_BUILTIN_GATHERDIV2DF:
35548 icode = CODE_FOR_avx2_gatherdiv2df;
35549 goto gather_gen;
35550 case IX86_BUILTIN_GATHERDIV4DF:
35551 icode = CODE_FOR_avx2_gatherdiv4df;
35552 goto gather_gen;
35553 case IX86_BUILTIN_GATHERSIV4SF:
35554 icode = CODE_FOR_avx2_gathersiv4sf;
35555 goto gather_gen;
35556 case IX86_BUILTIN_GATHERSIV8SF:
35557 icode = CODE_FOR_avx2_gathersiv8sf;
35558 goto gather_gen;
35559 case IX86_BUILTIN_GATHERDIV4SF:
35560 icode = CODE_FOR_avx2_gatherdiv4sf;
35561 goto gather_gen;
35562 case IX86_BUILTIN_GATHERDIV8SF:
35563 icode = CODE_FOR_avx2_gatherdiv8sf;
35564 goto gather_gen;
35565 case IX86_BUILTIN_GATHERSIV2DI:
35566 icode = CODE_FOR_avx2_gathersiv2di;
35567 goto gather_gen;
35568 case IX86_BUILTIN_GATHERSIV4DI:
35569 icode = CODE_FOR_avx2_gathersiv4di;
35570 goto gather_gen;
35571 case IX86_BUILTIN_GATHERDIV2DI:
35572 icode = CODE_FOR_avx2_gatherdiv2di;
35573 goto gather_gen;
35574 case IX86_BUILTIN_GATHERDIV4DI:
35575 icode = CODE_FOR_avx2_gatherdiv4di;
35576 goto gather_gen;
35577 case IX86_BUILTIN_GATHERSIV4SI:
35578 icode = CODE_FOR_avx2_gathersiv4si;
35579 goto gather_gen;
35580 case IX86_BUILTIN_GATHERSIV8SI:
35581 icode = CODE_FOR_avx2_gathersiv8si;
35582 goto gather_gen;
35583 case IX86_BUILTIN_GATHERDIV4SI:
35584 icode = CODE_FOR_avx2_gatherdiv4si;
35585 goto gather_gen;
35586 case IX86_BUILTIN_GATHERDIV8SI:
35587 icode = CODE_FOR_avx2_gatherdiv8si;
35588 goto gather_gen;
35589 case IX86_BUILTIN_GATHERALTSIV4DF:
35590 icode = CODE_FOR_avx2_gathersiv4df;
35591 goto gather_gen;
35592 case IX86_BUILTIN_GATHERALTDIV8SF:
35593 icode = CODE_FOR_avx2_gatherdiv8sf;
35594 goto gather_gen;
35595 case IX86_BUILTIN_GATHERALTSIV4DI:
35596 icode = CODE_FOR_avx2_gathersiv4di;
35597 goto gather_gen;
35598 case IX86_BUILTIN_GATHERALTDIV8SI:
35599 icode = CODE_FOR_avx2_gatherdiv8si;
35600 goto gather_gen;
35601 case IX86_BUILTIN_GATHER3SIV16SF:
35602 icode = CODE_FOR_avx512f_gathersiv16sf;
35603 goto gather_gen;
35604 case IX86_BUILTIN_GATHER3SIV8DF:
35605 icode = CODE_FOR_avx512f_gathersiv8df;
35606 goto gather_gen;
35607 case IX86_BUILTIN_GATHER3DIV16SF:
35608 icode = CODE_FOR_avx512f_gatherdiv16sf;
35609 goto gather_gen;
35610 case IX86_BUILTIN_GATHER3DIV8DF:
35611 icode = CODE_FOR_avx512f_gatherdiv8df;
35612 goto gather_gen;
35613 case IX86_BUILTIN_GATHER3SIV16SI:
35614 icode = CODE_FOR_avx512f_gathersiv16si;
35615 goto gather_gen;
35616 case IX86_BUILTIN_GATHER3SIV8DI:
35617 icode = CODE_FOR_avx512f_gathersiv8di;
35618 goto gather_gen;
35619 case IX86_BUILTIN_GATHER3DIV16SI:
35620 icode = CODE_FOR_avx512f_gatherdiv16si;
35621 goto gather_gen;
35622 case IX86_BUILTIN_GATHER3DIV8DI:
35623 icode = CODE_FOR_avx512f_gatherdiv8di;
35624 goto gather_gen;
35625 case IX86_BUILTIN_GATHER3ALTSIV8DF:
35626 icode = CODE_FOR_avx512f_gathersiv8df;
35627 goto gather_gen;
35628 case IX86_BUILTIN_GATHER3ALTDIV16SF:
35629 icode = CODE_FOR_avx512f_gatherdiv16sf;
35630 goto gather_gen;
35631 case IX86_BUILTIN_GATHER3ALTSIV8DI:
35632 icode = CODE_FOR_avx512f_gathersiv8di;
35633 goto gather_gen;
35634 case IX86_BUILTIN_GATHER3ALTDIV16SI:
35635 icode = CODE_FOR_avx512f_gatherdiv16si;
35636 goto gather_gen;
35637 case IX86_BUILTIN_SCATTERSIV16SF:
35638 icode = CODE_FOR_avx512f_scattersiv16sf;
35639 goto scatter_gen;
35640 case IX86_BUILTIN_SCATTERSIV8DF:
35641 icode = CODE_FOR_avx512f_scattersiv8df;
35642 goto scatter_gen;
35643 case IX86_BUILTIN_SCATTERDIV16SF:
35644 icode = CODE_FOR_avx512f_scatterdiv16sf;
35645 goto scatter_gen;
35646 case IX86_BUILTIN_SCATTERDIV8DF:
35647 icode = CODE_FOR_avx512f_scatterdiv8df;
35648 goto scatter_gen;
35649 case IX86_BUILTIN_SCATTERSIV16SI:
35650 icode = CODE_FOR_avx512f_scattersiv16si;
35651 goto scatter_gen;
35652 case IX86_BUILTIN_SCATTERSIV8DI:
35653 icode = CODE_FOR_avx512f_scattersiv8di;
35654 goto scatter_gen;
35655 case IX86_BUILTIN_SCATTERDIV16SI:
35656 icode = CODE_FOR_avx512f_scatterdiv16si;
35657 goto scatter_gen;
35658 case IX86_BUILTIN_SCATTERDIV8DI:
35659 icode = CODE_FOR_avx512f_scatterdiv8di;
35660 goto scatter_gen;
35661
35662 case IX86_BUILTIN_GATHERPFDPD:
35663 icode = CODE_FOR_avx512pf_gatherpfv8sidf;
35664 goto vec_prefetch_gen;
35665 case IX86_BUILTIN_GATHERPFDPS:
35666 icode = CODE_FOR_avx512pf_gatherpfv16sisf;
35667 goto vec_prefetch_gen;
35668 case IX86_BUILTIN_GATHERPFQPD:
35669 icode = CODE_FOR_avx512pf_gatherpfv8didf;
35670 goto vec_prefetch_gen;
35671 case IX86_BUILTIN_GATHERPFQPS:
35672 icode = CODE_FOR_avx512pf_gatherpfv8disf;
35673 goto vec_prefetch_gen;
35674 case IX86_BUILTIN_SCATTERPFDPD:
35675 icode = CODE_FOR_avx512pf_scatterpfv8sidf;
35676 goto vec_prefetch_gen;
35677 case IX86_BUILTIN_SCATTERPFDPS:
35678 icode = CODE_FOR_avx512pf_scatterpfv16sisf;
35679 goto vec_prefetch_gen;
35680 case IX86_BUILTIN_SCATTERPFQPD:
35681 icode = CODE_FOR_avx512pf_scatterpfv8didf;
35682 goto vec_prefetch_gen;
35683 case IX86_BUILTIN_SCATTERPFQPS:
35684 icode = CODE_FOR_avx512pf_scatterpfv8disf;
35685 goto vec_prefetch_gen;
35686
35687 gather_gen:
35688 rtx half;
35689 rtx (*gen) (rtx, rtx);
35690
35691 arg0 = CALL_EXPR_ARG (exp, 0);
35692 arg1 = CALL_EXPR_ARG (exp, 1);
35693 arg2 = CALL_EXPR_ARG (exp, 2);
35694 arg3 = CALL_EXPR_ARG (exp, 3);
35695 arg4 = CALL_EXPR_ARG (exp, 4);
35696 op0 = expand_normal (arg0);
35697 op1 = expand_normal (arg1);
35698 op2 = expand_normal (arg2);
35699 op3 = expand_normal (arg3);
35700 op4 = expand_normal (arg4);
35701 /* Note the arg order is different from the operand order. */
35702 mode0 = insn_data[icode].operand[1].mode;
35703 mode2 = insn_data[icode].operand[3].mode;
35704 mode3 = insn_data[icode].operand[4].mode;
35705 mode4 = insn_data[icode].operand[5].mode;
35706
35707 if (target == NULL_RTX
35708 || GET_MODE (target) != insn_data[icode].operand[0].mode
35709 || !insn_data[icode].operand[0].predicate (target,
35710 GET_MODE (target)))
35711 subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
35712 else
35713 subtarget = target;
35714
35715 switch (fcode)
35716 {
35717 case IX86_BUILTIN_GATHER3ALTSIV8DF:
35718 case IX86_BUILTIN_GATHER3ALTSIV8DI:
35719 half = gen_reg_rtx (V8SImode);
35720 if (!nonimmediate_operand (op2, V16SImode))
35721 op2 = copy_to_mode_reg (V16SImode, op2);
35722 emit_insn (gen_vec_extract_lo_v16si (half, op2));
35723 op2 = half;
35724 break;
35725 case IX86_BUILTIN_GATHERALTSIV4DF:
35726 case IX86_BUILTIN_GATHERALTSIV4DI:
35727 half = gen_reg_rtx (V4SImode);
35728 if (!nonimmediate_operand (op2, V8SImode))
35729 op2 = copy_to_mode_reg (V8SImode, op2);
35730 emit_insn (gen_vec_extract_lo_v8si (half, op2));
35731 op2 = half;
35732 break;
35733 case IX86_BUILTIN_GATHER3ALTDIV16SF:
35734 case IX86_BUILTIN_GATHER3ALTDIV16SI:
35735 half = gen_reg_rtx (mode0);
35736 if (mode0 == V8SFmode)
35737 gen = gen_vec_extract_lo_v16sf;
35738 else
35739 gen = gen_vec_extract_lo_v16si;
35740 if (!nonimmediate_operand (op0, GET_MODE (op0)))
35741 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
35742 emit_insn (gen (half, op0));
35743 op0 = half;
35744 if (GET_MODE (op3) != VOIDmode)
35745 {
35746 if (!nonimmediate_operand (op3, GET_MODE (op3)))
35747 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
35748 emit_insn (gen (half, op3));
35749 op3 = half;
35750 }
35751 break;
35752 case IX86_BUILTIN_GATHERALTDIV8SF:
35753 case IX86_BUILTIN_GATHERALTDIV8SI:
35754 half = gen_reg_rtx (mode0);
35755 if (mode0 == V4SFmode)
35756 gen = gen_vec_extract_lo_v8sf;
35757 else
35758 gen = gen_vec_extract_lo_v8si;
35759 if (!nonimmediate_operand (op0, GET_MODE (op0)))
35760 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
35761 emit_insn (gen (half, op0));
35762 op0 = half;
35763 if (GET_MODE (op3) != VOIDmode)
35764 {
35765 if (!nonimmediate_operand (op3, GET_MODE (op3)))
35766 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
35767 emit_insn (gen (half, op3));
35768 op3 = half;
35769 }
35770 break;
35771 default:
35772 break;
35773 }
35774
35775 /* Force memory operand only with base register here. But we
35776 don't want to do it on memory operand for other builtin
35777 functions. */
35778 op1 = ix86_zero_extend_to_Pmode (op1);
35779
35780 if (!insn_data[icode].operand[1].predicate (op0, mode0))
35781 op0 = copy_to_mode_reg (mode0, op0);
35782 if (!insn_data[icode].operand[2].predicate (op1, Pmode))
35783 op1 = copy_to_mode_reg (Pmode, op1);
35784 if (!insn_data[icode].operand[3].predicate (op2, mode2))
35785 op2 = copy_to_mode_reg (mode2, op2);
35786 if (GET_MODE (op3) == mode3 || GET_MODE (op3) == VOIDmode)
35787 {
35788 if (!insn_data[icode].operand[4].predicate (op3, mode3))
35789 op3 = copy_to_mode_reg (mode3, op3);
35790 }
35791 else
35792 {
35793 op3 = copy_to_reg (op3);
35794 op3 = simplify_gen_subreg (mode3, op3, GET_MODE (op3), 0);
35795 }
35796 if (!insn_data[icode].operand[5].predicate (op4, mode4))
35797 {
35798 error ("the last argument must be scale 1, 2, 4, 8");
35799 return const0_rtx;
35800 }
35801
35802 /* Optimize. If mask is known to have all high bits set,
35803 replace op0 with pc_rtx to signal that the instruction
35804 overwrites the whole destination and doesn't use its
35805 previous contents. */
35806 if (optimize)
35807 {
35808 if (TREE_CODE (arg3) == INTEGER_CST)
35809 {
35810 if (integer_all_onesp (arg3))
35811 op0 = pc_rtx;
35812 }
35813 else if (TREE_CODE (arg3) == VECTOR_CST)
35814 {
35815 unsigned int negative = 0;
35816 for (i = 0; i < VECTOR_CST_NELTS (arg3); ++i)
35817 {
35818 tree cst = VECTOR_CST_ELT (arg3, i);
35819 if (TREE_CODE (cst) == INTEGER_CST
35820 && tree_int_cst_sign_bit (cst))
35821 negative++;
35822 else if (TREE_CODE (cst) == REAL_CST
35823 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
35824 negative++;
35825 }
35826 if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
35827 op0 = pc_rtx;
35828 }
35829 else if (TREE_CODE (arg3) == SSA_NAME
35830 && TREE_CODE (TREE_TYPE (arg3)) == VECTOR_TYPE)
35831 {
35832 /* Recognize also when mask is like:
35833 __v2df src = _mm_setzero_pd ();
35834 __v2df mask = _mm_cmpeq_pd (src, src);
35835 or
35836 __v8sf src = _mm256_setzero_ps ();
35837 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
35838 as that is a cheaper way to load all ones into
35839 a register than having to load a constant from
35840 memory. */
35841 gimple def_stmt = SSA_NAME_DEF_STMT (arg3);
35842 if (is_gimple_call (def_stmt))
35843 {
35844 tree fndecl = gimple_call_fndecl (def_stmt);
35845 if (fndecl
35846 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
35847 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
35848 {
35849 case IX86_BUILTIN_CMPPD:
35850 case IX86_BUILTIN_CMPPS:
35851 case IX86_BUILTIN_CMPPD256:
35852 case IX86_BUILTIN_CMPPS256:
35853 if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
35854 break;
35855 /* FALLTHRU */
35856 case IX86_BUILTIN_CMPEQPD:
35857 case IX86_BUILTIN_CMPEQPS:
35858 if (initializer_zerop (gimple_call_arg (def_stmt, 0))
35859 && initializer_zerop (gimple_call_arg (def_stmt,
35860 1)))
35861 op0 = pc_rtx;
35862 break;
35863 default:
35864 break;
35865 }
35866 }
35867 }
35868 }
35869
35870 pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
35871 if (! pat)
35872 return const0_rtx;
35873 emit_insn (pat);
35874
35875 switch (fcode)
35876 {
35877 case IX86_BUILTIN_GATHER3DIV16SF:
35878 if (target == NULL_RTX)
35879 target = gen_reg_rtx (V8SFmode);
35880 emit_insn (gen_vec_extract_lo_v16sf (target, subtarget));
35881 break;
35882 case IX86_BUILTIN_GATHER3DIV16SI:
35883 if (target == NULL_RTX)
35884 target = gen_reg_rtx (V8SImode);
35885 emit_insn (gen_vec_extract_lo_v16si (target, subtarget));
35886 break;
35887 case IX86_BUILTIN_GATHERDIV8SF:
35888 if (target == NULL_RTX)
35889 target = gen_reg_rtx (V4SFmode);
35890 emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
35891 break;
35892 case IX86_BUILTIN_GATHERDIV8SI:
35893 if (target == NULL_RTX)
35894 target = gen_reg_rtx (V4SImode);
35895 emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
35896 break;
35897 default:
35898 target = subtarget;
35899 break;
35900 }
35901 return target;
35902
35903 scatter_gen:
35904 arg0 = CALL_EXPR_ARG (exp, 0);
35905 arg1 = CALL_EXPR_ARG (exp, 1);
35906 arg2 = CALL_EXPR_ARG (exp, 2);
35907 arg3 = CALL_EXPR_ARG (exp, 3);
35908 arg4 = CALL_EXPR_ARG (exp, 4);
35909 op0 = expand_normal (arg0);
35910 op1 = expand_normal (arg1);
35911 op2 = expand_normal (arg2);
35912 op3 = expand_normal (arg3);
35913 op4 = expand_normal (arg4);
35914 mode1 = insn_data[icode].operand[1].mode;
35915 mode2 = insn_data[icode].operand[2].mode;
35916 mode3 = insn_data[icode].operand[3].mode;
35917 mode4 = insn_data[icode].operand[4].mode;
35918
35919 /* Force memory operand only with base register here. But we
35920 don't want to do it on memory operand for other builtin
35921 functions. */
35922 op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1));
35923
35924 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35925 op0 = copy_to_mode_reg (Pmode, op0);
35926
35927 if (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
35928 {
35929 if (!insn_data[icode].operand[1].predicate (op1, mode1))
35930 op1 = copy_to_mode_reg (mode1, op1);
35931 }
35932 else
35933 {
35934 op1 = copy_to_reg (op1);
35935 op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
35936 }
35937
35938 if (!insn_data[icode].operand[2].predicate (op2, mode2))
35939 op2 = copy_to_mode_reg (mode2, op2);
35940
35941 if (!insn_data[icode].operand[3].predicate (op3, mode3))
35942 op3 = copy_to_mode_reg (mode3, op3);
35943
35944 if (!insn_data[icode].operand[4].predicate (op4, mode4))
35945 {
35946 error ("the last argument must be scale 1, 2, 4, 8");
35947 return const0_rtx;
35948 }
35949
35950 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
35951 if (! pat)
35952 return const0_rtx;
35953
35954 emit_insn (pat);
35955 return 0;
35956
35957 vec_prefetch_gen:
35958 arg0 = CALL_EXPR_ARG (exp, 0);
35959 arg1 = CALL_EXPR_ARG (exp, 1);
35960 arg2 = CALL_EXPR_ARG (exp, 2);
35961 arg3 = CALL_EXPR_ARG (exp, 3);
35962 arg4 = CALL_EXPR_ARG (exp, 4);
35963 op0 = expand_normal (arg0);
35964 op1 = expand_normal (arg1);
35965 op2 = expand_normal (arg2);
35966 op3 = expand_normal (arg3);
35967 op4 = expand_normal (arg4);
35968 mode0 = insn_data[icode].operand[0].mode;
35969 mode1 = insn_data[icode].operand[1].mode;
35970 mode3 = insn_data[icode].operand[3].mode;
35971 mode4 = insn_data[icode].operand[4].mode;
35972
35973 if (GET_MODE (op0) == mode0
35974 || (GET_MODE (op0) == VOIDmode && op0 != constm1_rtx))
35975 {
35976 if (!insn_data[icode].operand[0].predicate (op0, mode0))
35977 op0 = copy_to_mode_reg (mode0, op0);
35978 }
35979 else if (op0 != constm1_rtx)
35980 {
35981 op0 = copy_to_reg (op0);
35982 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
35983 }
35984
35985 if (!insn_data[icode].operand[1].predicate (op1, mode1))
35986 op1 = copy_to_mode_reg (mode1, op1);
35987
35988 /* Force memory operand only with base register here. But we
35989 don't want to do it on memory operand for other builtin
35990 functions. */
35991 op2 = force_reg (Pmode, convert_to_mode (Pmode, op2, 1));
35992
35993 if (!insn_data[icode].operand[2].predicate (op2, Pmode))
35994 op2 = copy_to_mode_reg (Pmode, op2);
35995
35996 if (!insn_data[icode].operand[3].predicate (op3, mode3))
35997 {
35998 error ("the forth argument must be scale 1, 2, 4, 8");
35999 return const0_rtx;
36000 }
36001
36002 if (!insn_data[icode].operand[4].predicate (op4, mode4))
36003 {
36004 error ("the last argument must be hint 0 or 1");
36005 return const0_rtx;
36006 }
36007
36008 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
36009 if (! pat)
36010 return const0_rtx;
36011
36012 emit_insn (pat);
36013
36014 return 0;
36015
36016 case IX86_BUILTIN_XABORT:
36017 icode = CODE_FOR_xabort;
36018 arg0 = CALL_EXPR_ARG (exp, 0);
36019 op0 = expand_normal (arg0);
36020 mode0 = insn_data[icode].operand[0].mode;
36021 if (!insn_data[icode].operand[0].predicate (op0, mode0))
36022 {
36023 error ("the xabort's argument must be an 8-bit immediate");
36024 return const0_rtx;
36025 }
36026 emit_insn (gen_xabort (op0));
36027 return 0;
36028
36029 default:
36030 break;
36031 }
36032
36033 for (i = 0, d = bdesc_special_args;
36034 i < ARRAY_SIZE (bdesc_special_args);
36035 i++, d++)
36036 if (d->code == fcode)
36037 return ix86_expand_special_args_builtin (d, exp, target);
36038
36039 for (i = 0, d = bdesc_args;
36040 i < ARRAY_SIZE (bdesc_args);
36041 i++, d++)
36042 if (d->code == fcode)
36043 switch (fcode)
36044 {
36045 case IX86_BUILTIN_FABSQ:
36046 case IX86_BUILTIN_COPYSIGNQ:
36047 if (!TARGET_SSE)
36048 /* Emit a normal call if SSE isn't available. */
36049 return expand_call (exp, target, ignore);
36050 default:
36051 return ix86_expand_args_builtin (d, exp, target);
36052 }
36053
36054 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
36055 if (d->code == fcode)
36056 return ix86_expand_sse_comi (d, exp, target);
36057
36058 for (i = 0, d = bdesc_round_args; i < ARRAY_SIZE (bdesc_round_args); i++, d++)
36059 if (d->code == fcode)
36060 return ix86_expand_round_builtin (d, exp, target);
36061
36062 for (i = 0, d = bdesc_pcmpestr;
36063 i < ARRAY_SIZE (bdesc_pcmpestr);
36064 i++, d++)
36065 if (d->code == fcode)
36066 return ix86_expand_sse_pcmpestr (d, exp, target);
36067
36068 for (i = 0, d = bdesc_pcmpistr;
36069 i < ARRAY_SIZE (bdesc_pcmpistr);
36070 i++, d++)
36071 if (d->code == fcode)
36072 return ix86_expand_sse_pcmpistr (d, exp, target);
36073
36074 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
36075 if (d->code == fcode)
36076 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
36077 (enum ix86_builtin_func_type)
36078 d->flag, d->comparison);
36079
36080 gcc_unreachable ();
36081 }
36082
36083 /* This returns the target-specific builtin with code CODE if
36084 current_function_decl has visibility on this builtin, which is checked
36085 using isa flags. Returns NULL_TREE otherwise. */
36086
36087 static tree ix86_get_builtin (enum ix86_builtins code)
36088 {
36089 struct cl_target_option *opts;
36090 tree target_tree = NULL_TREE;
36091
36092 /* Determine the isa flags of current_function_decl. */
36093
36094 if (current_function_decl)
36095 target_tree = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl);
36096
36097 if (target_tree == NULL)
36098 target_tree = target_option_default_node;
36099
36100 opts = TREE_TARGET_OPTION (target_tree);
36101
36102 if (ix86_builtins_isa[(int) code].isa & opts->x_ix86_isa_flags)
36103 return ix86_builtin_decl (code, true);
36104 else
36105 return NULL_TREE;
36106 }
36107
36108 /* Returns a function decl for a vectorized version of the builtin function
36109 with builtin function code FN and the result vector type TYPE, or NULL_TREE
36110 if it is not available. */
36111
36112 static tree
36113 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
36114 tree type_in)
36115 {
36116 enum machine_mode in_mode, out_mode;
36117 int in_n, out_n;
36118 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
36119
36120 if (TREE_CODE (type_out) != VECTOR_TYPE
36121 || TREE_CODE (type_in) != VECTOR_TYPE
36122 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
36123 return NULL_TREE;
36124
36125 out_mode = TYPE_MODE (TREE_TYPE (type_out));
36126 out_n = TYPE_VECTOR_SUBPARTS (type_out);
36127 in_mode = TYPE_MODE (TREE_TYPE (type_in));
36128 in_n = TYPE_VECTOR_SUBPARTS (type_in);
36129
36130 switch (fn)
36131 {
36132 case BUILT_IN_SQRT:
36133 if (out_mode == DFmode && in_mode == DFmode)
36134 {
36135 if (out_n == 2 && in_n == 2)
36136 return ix86_get_builtin (IX86_BUILTIN_SQRTPD);
36137 else if (out_n == 4 && in_n == 4)
36138 return ix86_get_builtin (IX86_BUILTIN_SQRTPD256);
36139 else if (out_n == 8 && in_n == 8)
36140 return ix86_get_builtin (IX86_BUILTIN_SQRTPD512);
36141 }
36142 break;
36143
36144 case BUILT_IN_EXP2F:
36145 if (out_mode == SFmode && in_mode == SFmode)
36146 {
36147 if (out_n == 16 && in_n == 16)
36148 return ix86_get_builtin (IX86_BUILTIN_EXP2PS);
36149 }
36150 break;
36151
36152 case BUILT_IN_SQRTF:
36153 if (out_mode == SFmode && in_mode == SFmode)
36154 {
36155 if (out_n == 4 && in_n == 4)
36156 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR);
36157 else if (out_n == 8 && in_n == 8)
36158 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR256);
36159 else if (out_n == 16 && in_n == 16)
36160 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR512);
36161 }
36162 break;
36163
36164 case BUILT_IN_IFLOOR:
36165 case BUILT_IN_LFLOOR:
36166 case BUILT_IN_LLFLOOR:
36167 /* The round insn does not trap on denormals. */
36168 if (flag_trapping_math || !TARGET_ROUND)
36169 break;
36170
36171 if (out_mode == SImode && in_mode == DFmode)
36172 {
36173 if (out_n == 4 && in_n == 2)
36174 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX);
36175 else if (out_n == 8 && in_n == 4)
36176 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256);
36177 else if (out_n == 16 && in_n == 8)
36178 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512);
36179 }
36180 break;
36181
36182 case BUILT_IN_IFLOORF:
36183 case BUILT_IN_LFLOORF:
36184 case BUILT_IN_LLFLOORF:
36185 /* The round insn does not trap on denormals. */
36186 if (flag_trapping_math || !TARGET_ROUND)
36187 break;
36188
36189 if (out_mode == SImode && in_mode == SFmode)
36190 {
36191 if (out_n == 4 && in_n == 4)
36192 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX);
36193 else if (out_n == 8 && in_n == 8)
36194 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256);
36195 }
36196 break;
36197
36198 case BUILT_IN_ICEIL:
36199 case BUILT_IN_LCEIL:
36200 case BUILT_IN_LLCEIL:
36201 /* The round insn does not trap on denormals. */
36202 if (flag_trapping_math || !TARGET_ROUND)
36203 break;
36204
36205 if (out_mode == SImode && in_mode == DFmode)
36206 {
36207 if (out_n == 4 && in_n == 2)
36208 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX);
36209 else if (out_n == 8 && in_n == 4)
36210 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256);
36211 else if (out_n == 16 && in_n == 8)
36212 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512);
36213 }
36214 break;
36215
36216 case BUILT_IN_ICEILF:
36217 case BUILT_IN_LCEILF:
36218 case BUILT_IN_LLCEILF:
36219 /* The round insn does not trap on denormals. */
36220 if (flag_trapping_math || !TARGET_ROUND)
36221 break;
36222
36223 if (out_mode == SImode && in_mode == SFmode)
36224 {
36225 if (out_n == 4 && in_n == 4)
36226 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX);
36227 else if (out_n == 8 && in_n == 8)
36228 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256);
36229 }
36230 break;
36231
36232 case BUILT_IN_IRINT:
36233 case BUILT_IN_LRINT:
36234 case BUILT_IN_LLRINT:
36235 if (out_mode == SImode && in_mode == DFmode)
36236 {
36237 if (out_n == 4 && in_n == 2)
36238 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX);
36239 else if (out_n == 8 && in_n == 4)
36240 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256);
36241 }
36242 break;
36243
36244 case BUILT_IN_IRINTF:
36245 case BUILT_IN_LRINTF:
36246 case BUILT_IN_LLRINTF:
36247 if (out_mode == SImode && in_mode == SFmode)
36248 {
36249 if (out_n == 4 && in_n == 4)
36250 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ);
36251 else if (out_n == 8 && in_n == 8)
36252 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256);
36253 }
36254 break;
36255
36256 case BUILT_IN_IROUND:
36257 case BUILT_IN_LROUND:
36258 case BUILT_IN_LLROUND:
36259 /* The round insn does not trap on denormals. */
36260 if (flag_trapping_math || !TARGET_ROUND)
36261 break;
36262
36263 if (out_mode == SImode && in_mode == DFmode)
36264 {
36265 if (out_n == 4 && in_n == 2)
36266 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX);
36267 else if (out_n == 8 && in_n == 4)
36268 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256);
36269 else if (out_n == 16 && in_n == 8)
36270 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512);
36271 }
36272 break;
36273
36274 case BUILT_IN_IROUNDF:
36275 case BUILT_IN_LROUNDF:
36276 case BUILT_IN_LLROUNDF:
36277 /* The round insn does not trap on denormals. */
36278 if (flag_trapping_math || !TARGET_ROUND)
36279 break;
36280
36281 if (out_mode == SImode && in_mode == SFmode)
36282 {
36283 if (out_n == 4 && in_n == 4)
36284 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX);
36285 else if (out_n == 8 && in_n == 8)
36286 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256);
36287 }
36288 break;
36289
36290 case BUILT_IN_COPYSIGN:
36291 if (out_mode == DFmode && in_mode == DFmode)
36292 {
36293 if (out_n == 2 && in_n == 2)
36294 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD);
36295 else if (out_n == 4 && in_n == 4)
36296 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD256);
36297 else if (out_n == 8 && in_n == 8)
36298 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD512);
36299 }
36300 break;
36301
36302 case BUILT_IN_COPYSIGNF:
36303 if (out_mode == SFmode && in_mode == SFmode)
36304 {
36305 if (out_n == 4 && in_n == 4)
36306 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS);
36307 else if (out_n == 8 && in_n == 8)
36308 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS256);
36309 else if (out_n == 16 && in_n == 16)
36310 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS512);
36311 }
36312 break;
36313
36314 case BUILT_IN_FLOOR:
36315 /* The round insn does not trap on denormals. */
36316 if (flag_trapping_math || !TARGET_ROUND)
36317 break;
36318
36319 if (out_mode == DFmode && in_mode == DFmode)
36320 {
36321 if (out_n == 2 && in_n == 2)
36322 return ix86_get_builtin (IX86_BUILTIN_FLOORPD);
36323 else if (out_n == 4 && in_n == 4)
36324 return ix86_get_builtin (IX86_BUILTIN_FLOORPD256);
36325 }
36326 break;
36327
36328 case BUILT_IN_FLOORF:
36329 /* The round insn does not trap on denormals. */
36330 if (flag_trapping_math || !TARGET_ROUND)
36331 break;
36332
36333 if (out_mode == SFmode && in_mode == SFmode)
36334 {
36335 if (out_n == 4 && in_n == 4)
36336 return ix86_get_builtin (IX86_BUILTIN_FLOORPS);
36337 else if (out_n == 8 && in_n == 8)
36338 return ix86_get_builtin (IX86_BUILTIN_FLOORPS256);
36339 }
36340 break;
36341
36342 case BUILT_IN_CEIL:
36343 /* The round insn does not trap on denormals. */
36344 if (flag_trapping_math || !TARGET_ROUND)
36345 break;
36346
36347 if (out_mode == DFmode && in_mode == DFmode)
36348 {
36349 if (out_n == 2 && in_n == 2)
36350 return ix86_get_builtin (IX86_BUILTIN_CEILPD);
36351 else if (out_n == 4 && in_n == 4)
36352 return ix86_get_builtin (IX86_BUILTIN_CEILPD256);
36353 }
36354 break;
36355
36356 case BUILT_IN_CEILF:
36357 /* The round insn does not trap on denormals. */
36358 if (flag_trapping_math || !TARGET_ROUND)
36359 break;
36360
36361 if (out_mode == SFmode && in_mode == SFmode)
36362 {
36363 if (out_n == 4 && in_n == 4)
36364 return ix86_get_builtin (IX86_BUILTIN_CEILPS);
36365 else if (out_n == 8 && in_n == 8)
36366 return ix86_get_builtin (IX86_BUILTIN_CEILPS256);
36367 }
36368 break;
36369
36370 case BUILT_IN_TRUNC:
36371 /* The round insn does not trap on denormals. */
36372 if (flag_trapping_math || !TARGET_ROUND)
36373 break;
36374
36375 if (out_mode == DFmode && in_mode == DFmode)
36376 {
36377 if (out_n == 2 && in_n == 2)
36378 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD);
36379 else if (out_n == 4 && in_n == 4)
36380 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256);
36381 }
36382 break;
36383
36384 case BUILT_IN_TRUNCF:
36385 /* The round insn does not trap on denormals. */
36386 if (flag_trapping_math || !TARGET_ROUND)
36387 break;
36388
36389 if (out_mode == SFmode && in_mode == SFmode)
36390 {
36391 if (out_n == 4 && in_n == 4)
36392 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS);
36393 else if (out_n == 8 && in_n == 8)
36394 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256);
36395 }
36396 break;
36397
36398 case BUILT_IN_RINT:
36399 /* The round insn does not trap on denormals. */
36400 if (flag_trapping_math || !TARGET_ROUND)
36401 break;
36402
36403 if (out_mode == DFmode && in_mode == DFmode)
36404 {
36405 if (out_n == 2 && in_n == 2)
36406 return ix86_get_builtin (IX86_BUILTIN_RINTPD);
36407 else if (out_n == 4 && in_n == 4)
36408 return ix86_get_builtin (IX86_BUILTIN_RINTPD256);
36409 }
36410 break;
36411
36412 case BUILT_IN_RINTF:
36413 /* The round insn does not trap on denormals. */
36414 if (flag_trapping_math || !TARGET_ROUND)
36415 break;
36416
36417 if (out_mode == SFmode && in_mode == SFmode)
36418 {
36419 if (out_n == 4 && in_n == 4)
36420 return ix86_get_builtin (IX86_BUILTIN_RINTPS);
36421 else if (out_n == 8 && in_n == 8)
36422 return ix86_get_builtin (IX86_BUILTIN_RINTPS256);
36423 }
36424 break;
36425
36426 case BUILT_IN_ROUND:
36427 /* The round insn does not trap on denormals. */
36428 if (flag_trapping_math || !TARGET_ROUND)
36429 break;
36430
36431 if (out_mode == DFmode && in_mode == DFmode)
36432 {
36433 if (out_n == 2 && in_n == 2)
36434 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ);
36435 else if (out_n == 4 && in_n == 4)
36436 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ256);
36437 }
36438 break;
36439
36440 case BUILT_IN_ROUNDF:
36441 /* The round insn does not trap on denormals. */
36442 if (flag_trapping_math || !TARGET_ROUND)
36443 break;
36444
36445 if (out_mode == SFmode && in_mode == SFmode)
36446 {
36447 if (out_n == 4 && in_n == 4)
36448 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ);
36449 else if (out_n == 8 && in_n == 8)
36450 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ256);
36451 }
36452 break;
36453
36454 case BUILT_IN_FMA:
36455 if (out_mode == DFmode && in_mode == DFmode)
36456 {
36457 if (out_n == 2 && in_n == 2)
36458 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD);
36459 if (out_n == 4 && in_n == 4)
36460 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD256);
36461 }
36462 break;
36463
36464 case BUILT_IN_FMAF:
36465 if (out_mode == SFmode && in_mode == SFmode)
36466 {
36467 if (out_n == 4 && in_n == 4)
36468 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS);
36469 if (out_n == 8 && in_n == 8)
36470 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS256);
36471 }
36472 break;
36473
36474 default:
36475 break;
36476 }
36477
36478 /* Dispatch to a handler for a vectorization library. */
36479 if (ix86_veclib_handler)
36480 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
36481 type_in);
36482
36483 return NULL_TREE;
36484 }
36485
36486 /* Handler for an SVML-style interface to
36487 a library with vectorized intrinsics. */
36488
36489 static tree
36490 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
36491 {
36492 char name[20];
36493 tree fntype, new_fndecl, args;
36494 unsigned arity;
36495 const char *bname;
36496 enum machine_mode el_mode, in_mode;
36497 int n, in_n;
36498
36499 /* The SVML is suitable for unsafe math only. */
36500 if (!flag_unsafe_math_optimizations)
36501 return NULL_TREE;
36502
36503 el_mode = TYPE_MODE (TREE_TYPE (type_out));
36504 n = TYPE_VECTOR_SUBPARTS (type_out);
36505 in_mode = TYPE_MODE (TREE_TYPE (type_in));
36506 in_n = TYPE_VECTOR_SUBPARTS (type_in);
36507 if (el_mode != in_mode
36508 || n != in_n)
36509 return NULL_TREE;
36510
36511 switch (fn)
36512 {
36513 case BUILT_IN_EXP:
36514 case BUILT_IN_LOG:
36515 case BUILT_IN_LOG10:
36516 case BUILT_IN_POW:
36517 case BUILT_IN_TANH:
36518 case BUILT_IN_TAN:
36519 case BUILT_IN_ATAN:
36520 case BUILT_IN_ATAN2:
36521 case BUILT_IN_ATANH:
36522 case BUILT_IN_CBRT:
36523 case BUILT_IN_SINH:
36524 case BUILT_IN_SIN:
36525 case BUILT_IN_ASINH:
36526 case BUILT_IN_ASIN:
36527 case BUILT_IN_COSH:
36528 case BUILT_IN_COS:
36529 case BUILT_IN_ACOSH:
36530 case BUILT_IN_ACOS:
36531 if (el_mode != DFmode || n != 2)
36532 return NULL_TREE;
36533 break;
36534
36535 case BUILT_IN_EXPF:
36536 case BUILT_IN_LOGF:
36537 case BUILT_IN_LOG10F:
36538 case BUILT_IN_POWF:
36539 case BUILT_IN_TANHF:
36540 case BUILT_IN_TANF:
36541 case BUILT_IN_ATANF:
36542 case BUILT_IN_ATAN2F:
36543 case BUILT_IN_ATANHF:
36544 case BUILT_IN_CBRTF:
36545 case BUILT_IN_SINHF:
36546 case BUILT_IN_SINF:
36547 case BUILT_IN_ASINHF:
36548 case BUILT_IN_ASINF:
36549 case BUILT_IN_COSHF:
36550 case BUILT_IN_COSF:
36551 case BUILT_IN_ACOSHF:
36552 case BUILT_IN_ACOSF:
36553 if (el_mode != SFmode || n != 4)
36554 return NULL_TREE;
36555 break;
36556
36557 default:
36558 return NULL_TREE;
36559 }
36560
36561 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
36562
36563 if (fn == BUILT_IN_LOGF)
36564 strcpy (name, "vmlsLn4");
36565 else if (fn == BUILT_IN_LOG)
36566 strcpy (name, "vmldLn2");
36567 else if (n == 4)
36568 {
36569 sprintf (name, "vmls%s", bname+10);
36570 name[strlen (name)-1] = '4';
36571 }
36572 else
36573 sprintf (name, "vmld%s2", bname+10);
36574
36575 /* Convert to uppercase. */
36576 name[4] &= ~0x20;
36577
36578 arity = 0;
36579 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
36580 args;
36581 args = TREE_CHAIN (args))
36582 arity++;
36583
36584 if (arity == 1)
36585 fntype = build_function_type_list (type_out, type_in, NULL);
36586 else
36587 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
36588
36589 /* Build a function declaration for the vectorized function. */
36590 new_fndecl = build_decl (BUILTINS_LOCATION,
36591 FUNCTION_DECL, get_identifier (name), fntype);
36592 TREE_PUBLIC (new_fndecl) = 1;
36593 DECL_EXTERNAL (new_fndecl) = 1;
36594 DECL_IS_NOVOPS (new_fndecl) = 1;
36595 TREE_READONLY (new_fndecl) = 1;
36596
36597 return new_fndecl;
36598 }
36599
36600 /* Handler for an ACML-style interface to
36601 a library with vectorized intrinsics. */
36602
36603 static tree
36604 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
36605 {
36606 char name[20] = "__vr.._";
36607 tree fntype, new_fndecl, args;
36608 unsigned arity;
36609 const char *bname;
36610 enum machine_mode el_mode, in_mode;
36611 int n, in_n;
36612
36613 /* The ACML is 64bits only and suitable for unsafe math only as
36614 it does not correctly support parts of IEEE with the required
36615 precision such as denormals. */
36616 if (!TARGET_64BIT
36617 || !flag_unsafe_math_optimizations)
36618 return NULL_TREE;
36619
36620 el_mode = TYPE_MODE (TREE_TYPE (type_out));
36621 n = TYPE_VECTOR_SUBPARTS (type_out);
36622 in_mode = TYPE_MODE (TREE_TYPE (type_in));
36623 in_n = TYPE_VECTOR_SUBPARTS (type_in);
36624 if (el_mode != in_mode
36625 || n != in_n)
36626 return NULL_TREE;
36627
36628 switch (fn)
36629 {
36630 case BUILT_IN_SIN:
36631 case BUILT_IN_COS:
36632 case BUILT_IN_EXP:
36633 case BUILT_IN_LOG:
36634 case BUILT_IN_LOG2:
36635 case BUILT_IN_LOG10:
36636 name[4] = 'd';
36637 name[5] = '2';
36638 if (el_mode != DFmode
36639 || n != 2)
36640 return NULL_TREE;
36641 break;
36642
36643 case BUILT_IN_SINF:
36644 case BUILT_IN_COSF:
36645 case BUILT_IN_EXPF:
36646 case BUILT_IN_POWF:
36647 case BUILT_IN_LOGF:
36648 case BUILT_IN_LOG2F:
36649 case BUILT_IN_LOG10F:
36650 name[4] = 's';
36651 name[5] = '4';
36652 if (el_mode != SFmode
36653 || n != 4)
36654 return NULL_TREE;
36655 break;
36656
36657 default:
36658 return NULL_TREE;
36659 }
36660
36661 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
36662 sprintf (name + 7, "%s", bname+10);
36663
36664 arity = 0;
36665 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
36666 args;
36667 args = TREE_CHAIN (args))
36668 arity++;
36669
36670 if (arity == 1)
36671 fntype = build_function_type_list (type_out, type_in, NULL);
36672 else
36673 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
36674
36675 /* Build a function declaration for the vectorized function. */
36676 new_fndecl = build_decl (BUILTINS_LOCATION,
36677 FUNCTION_DECL, get_identifier (name), fntype);
36678 TREE_PUBLIC (new_fndecl) = 1;
36679 DECL_EXTERNAL (new_fndecl) = 1;
36680 DECL_IS_NOVOPS (new_fndecl) = 1;
36681 TREE_READONLY (new_fndecl) = 1;
36682
36683 return new_fndecl;
36684 }
36685
36686 /* Returns a decl of a function that implements gather load with
36687 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
36688 Return NULL_TREE if it is not available. */
36689
36690 static tree
36691 ix86_vectorize_builtin_gather (const_tree mem_vectype,
36692 const_tree index_type, int scale)
36693 {
36694 bool si;
36695 enum ix86_builtins code;
36696
36697 if (! TARGET_AVX2)
36698 return NULL_TREE;
36699
36700 if ((TREE_CODE (index_type) != INTEGER_TYPE
36701 && !POINTER_TYPE_P (index_type))
36702 || (TYPE_MODE (index_type) != SImode
36703 && TYPE_MODE (index_type) != DImode))
36704 return NULL_TREE;
36705
36706 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
36707 return NULL_TREE;
36708
36709 /* v*gather* insn sign extends index to pointer mode. */
36710 if (TYPE_PRECISION (index_type) < POINTER_SIZE
36711 && TYPE_UNSIGNED (index_type))
36712 return NULL_TREE;
36713
36714 if (scale <= 0
36715 || scale > 8
36716 || (scale & (scale - 1)) != 0)
36717 return NULL_TREE;
36718
36719 si = TYPE_MODE (index_type) == SImode;
36720 switch (TYPE_MODE (mem_vectype))
36721 {
36722 case V2DFmode:
36723 code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
36724 break;
36725 case V4DFmode:
36726 code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
36727 break;
36728 case V2DImode:
36729 code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
36730 break;
36731 case V4DImode:
36732 code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
36733 break;
36734 case V4SFmode:
36735 code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
36736 break;
36737 case V8SFmode:
36738 code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
36739 break;
36740 case V4SImode:
36741 code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
36742 break;
36743 case V8SImode:
36744 code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
36745 break;
36746 case V8DFmode:
36747 if (TARGET_AVX512F)
36748 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DF : IX86_BUILTIN_GATHER3DIV8DF;
36749 else
36750 return NULL_TREE;
36751 break;
36752 case V8DImode:
36753 if (TARGET_AVX512F)
36754 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DI : IX86_BUILTIN_GATHER3DIV8DI;
36755 else
36756 return NULL_TREE;
36757 break;
36758 case V16SFmode:
36759 if (TARGET_AVX512F)
36760 code = si ? IX86_BUILTIN_GATHER3SIV16SF : IX86_BUILTIN_GATHER3ALTDIV16SF;
36761 else
36762 return NULL_TREE;
36763 break;
36764 case V16SImode:
36765 if (TARGET_AVX512F)
36766 code = si ? IX86_BUILTIN_GATHER3SIV16SI : IX86_BUILTIN_GATHER3ALTDIV16SI;
36767 else
36768 return NULL_TREE;
36769 break;
36770 default:
36771 return NULL_TREE;
36772 }
36773
36774 return ix86_get_builtin (code);
36775 }
36776
36777 /* Returns a code for a target-specific builtin that implements
36778 reciprocal of the function, or NULL_TREE if not available. */
36779
36780 static tree
36781 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
36782 bool sqrt ATTRIBUTE_UNUSED)
36783 {
36784 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
36785 && flag_finite_math_only && !flag_trapping_math
36786 && flag_unsafe_math_optimizations))
36787 return NULL_TREE;
36788
36789 if (md_fn)
36790 /* Machine dependent builtins. */
36791 switch (fn)
36792 {
36793 /* Vectorized version of sqrt to rsqrt conversion. */
36794 case IX86_BUILTIN_SQRTPS_NR:
36795 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR);
36796
36797 case IX86_BUILTIN_SQRTPS_NR256:
36798 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256);
36799
36800 default:
36801 return NULL_TREE;
36802 }
36803 else
36804 /* Normal builtins. */
36805 switch (fn)
36806 {
36807 /* Sqrt to rsqrt conversion. */
36808 case BUILT_IN_SQRTF:
36809 return ix86_get_builtin (IX86_BUILTIN_RSQRTF);
36810
36811 default:
36812 return NULL_TREE;
36813 }
36814 }
36815 \f
36816 /* Helper for avx_vpermilps256_operand et al. This is also used by
36817 the expansion functions to turn the parallel back into a mask.
36818 The return value is 0 for no match and the imm8+1 for a match. */
36819
36820 int
36821 avx_vpermilp_parallel (rtx par, enum machine_mode mode)
36822 {
36823 unsigned i, nelt = GET_MODE_NUNITS (mode);
36824 unsigned mask = 0;
36825 unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
36826
36827 if (XVECLEN (par, 0) != (int) nelt)
36828 return 0;
36829
36830 /* Validate that all of the elements are constants, and not totally
36831 out of range. Copy the data into an integral array to make the
36832 subsequent checks easier. */
36833 for (i = 0; i < nelt; ++i)
36834 {
36835 rtx er = XVECEXP (par, 0, i);
36836 unsigned HOST_WIDE_INT ei;
36837
36838 if (!CONST_INT_P (er))
36839 return 0;
36840 ei = INTVAL (er);
36841 if (ei >= nelt)
36842 return 0;
36843 ipar[i] = ei;
36844 }
36845
36846 switch (mode)
36847 {
36848 case V8DFmode:
36849 /* In the 512-bit DFmode case, we can only move elements within
36850 a 128-bit lane. First fill the second part of the mask,
36851 then fallthru. */
36852 for (i = 4; i < 6; ++i)
36853 {
36854 if (ipar[i] < 4 || ipar[i] >= 6)
36855 return 0;
36856 mask |= (ipar[i] - 4) << i;
36857 }
36858 for (i = 6; i < 8; ++i)
36859 {
36860 if (ipar[i] < 6)
36861 return 0;
36862 mask |= (ipar[i] - 6) << i;
36863 }
36864 /* FALLTHRU */
36865
36866 case V4DFmode:
36867 /* In the 256-bit DFmode case, we can only move elements within
36868 a 128-bit lane. */
36869 for (i = 0; i < 2; ++i)
36870 {
36871 if (ipar[i] >= 2)
36872 return 0;
36873 mask |= ipar[i] << i;
36874 }
36875 for (i = 2; i < 4; ++i)
36876 {
36877 if (ipar[i] < 2)
36878 return 0;
36879 mask |= (ipar[i] - 2) << i;
36880 }
36881 break;
36882
36883 case V16SFmode:
36884 /* In 512 bit SFmode case, permutation in the upper 256 bits
36885 must mirror the permutation in the lower 256-bits. */
36886 for (i = 0; i < 8; ++i)
36887 if (ipar[i] + 8 != ipar[i + 8])
36888 return 0;
36889 /* FALLTHRU */
36890
36891 case V8SFmode:
36892 /* In 256 bit SFmode case, we have full freedom of
36893 movement within the low 128-bit lane, but the high 128-bit
36894 lane must mirror the exact same pattern. */
36895 for (i = 0; i < 4; ++i)
36896 if (ipar[i] + 4 != ipar[i + 4])
36897 return 0;
36898 nelt = 4;
36899 /* FALLTHRU */
36900
36901 case V2DFmode:
36902 case V4SFmode:
36903 /* In the 128-bit case, we've full freedom in the placement of
36904 the elements from the source operand. */
36905 for (i = 0; i < nelt; ++i)
36906 mask |= ipar[i] << (i * (nelt / 2));
36907 break;
36908
36909 default:
36910 gcc_unreachable ();
36911 }
36912
36913 /* Make sure success has a non-zero value by adding one. */
36914 return mask + 1;
36915 }
36916
36917 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
36918 the expansion functions to turn the parallel back into a mask.
36919 The return value is 0 for no match and the imm8+1 for a match. */
36920
36921 int
36922 avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
36923 {
36924 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
36925 unsigned mask = 0;
36926 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
36927
36928 if (XVECLEN (par, 0) != (int) nelt)
36929 return 0;
36930
36931 /* Validate that all of the elements are constants, and not totally
36932 out of range. Copy the data into an integral array to make the
36933 subsequent checks easier. */
36934 for (i = 0; i < nelt; ++i)
36935 {
36936 rtx er = XVECEXP (par, 0, i);
36937 unsigned HOST_WIDE_INT ei;
36938
36939 if (!CONST_INT_P (er))
36940 return 0;
36941 ei = INTVAL (er);
36942 if (ei >= 2 * nelt)
36943 return 0;
36944 ipar[i] = ei;
36945 }
36946
36947 /* Validate that the halves of the permute are halves. */
36948 for (i = 0; i < nelt2 - 1; ++i)
36949 if (ipar[i] + 1 != ipar[i + 1])
36950 return 0;
36951 for (i = nelt2; i < nelt - 1; ++i)
36952 if (ipar[i] + 1 != ipar[i + 1])
36953 return 0;
36954
36955 /* Reconstruct the mask. */
36956 for (i = 0; i < 2; ++i)
36957 {
36958 unsigned e = ipar[i * nelt2];
36959 if (e % nelt2)
36960 return 0;
36961 e /= nelt2;
36962 mask |= e << (i * 4);
36963 }
36964
36965 /* Make sure success has a non-zero value by adding one. */
36966 return mask + 1;
36967 }
36968 \f
36969 /* Store OPERAND to the memory after reload is completed. This means
36970 that we can't easily use assign_stack_local. */
36971 rtx
36972 ix86_force_to_memory (enum machine_mode mode, rtx operand)
36973 {
36974 rtx result;
36975
36976 gcc_assert (reload_completed);
36977 if (ix86_using_red_zone ())
36978 {
36979 result = gen_rtx_MEM (mode,
36980 gen_rtx_PLUS (Pmode,
36981 stack_pointer_rtx,
36982 GEN_INT (-RED_ZONE_SIZE)));
36983 emit_move_insn (result, operand);
36984 }
36985 else if (TARGET_64BIT)
36986 {
36987 switch (mode)
36988 {
36989 case HImode:
36990 case SImode:
36991 operand = gen_lowpart (DImode, operand);
36992 /* FALLTHRU */
36993 case DImode:
36994 emit_insn (
36995 gen_rtx_SET (VOIDmode,
36996 gen_rtx_MEM (DImode,
36997 gen_rtx_PRE_DEC (DImode,
36998 stack_pointer_rtx)),
36999 operand));
37000 break;
37001 default:
37002 gcc_unreachable ();
37003 }
37004 result = gen_rtx_MEM (mode, stack_pointer_rtx);
37005 }
37006 else
37007 {
37008 switch (mode)
37009 {
37010 case DImode:
37011 {
37012 rtx operands[2];
37013 split_double_mode (mode, &operand, 1, operands, operands + 1);
37014 emit_insn (
37015 gen_rtx_SET (VOIDmode,
37016 gen_rtx_MEM (SImode,
37017 gen_rtx_PRE_DEC (Pmode,
37018 stack_pointer_rtx)),
37019 operands[1]));
37020 emit_insn (
37021 gen_rtx_SET (VOIDmode,
37022 gen_rtx_MEM (SImode,
37023 gen_rtx_PRE_DEC (Pmode,
37024 stack_pointer_rtx)),
37025 operands[0]));
37026 }
37027 break;
37028 case HImode:
37029 /* Store HImodes as SImodes. */
37030 operand = gen_lowpart (SImode, operand);
37031 /* FALLTHRU */
37032 case SImode:
37033 emit_insn (
37034 gen_rtx_SET (VOIDmode,
37035 gen_rtx_MEM (GET_MODE (operand),
37036 gen_rtx_PRE_DEC (SImode,
37037 stack_pointer_rtx)),
37038 operand));
37039 break;
37040 default:
37041 gcc_unreachable ();
37042 }
37043 result = gen_rtx_MEM (mode, stack_pointer_rtx);
37044 }
37045 return result;
37046 }
37047
37048 /* Free operand from the memory. */
37049 void
37050 ix86_free_from_memory (enum machine_mode mode)
37051 {
37052 if (!ix86_using_red_zone ())
37053 {
37054 int size;
37055
37056 if (mode == DImode || TARGET_64BIT)
37057 size = 8;
37058 else
37059 size = 4;
37060 /* Use LEA to deallocate stack space. In peephole2 it will be converted
37061 to pop or add instruction if registers are available. */
37062 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
37063 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
37064 GEN_INT (size))));
37065 }
37066 }
37067
37068 /* Return a register priority for hard reg REGNO. */
37069 static int
37070 ix86_register_priority (int hard_regno)
37071 {
37072 /* ebp and r13 as the base always wants a displacement, r12 as the
37073 base always wants an index. So discourage their usage in an
37074 address. */
37075 if (hard_regno == R12_REG || hard_regno == R13_REG)
37076 return 0;
37077 if (hard_regno == BP_REG)
37078 return 1;
37079 /* New x86-64 int registers result in bigger code size. Discourage
37080 them. */
37081 if (FIRST_REX_INT_REG <= hard_regno && hard_regno <= LAST_REX_INT_REG)
37082 return 2;
37083 /* New x86-64 SSE registers result in bigger code size. Discourage
37084 them. */
37085 if (FIRST_REX_SSE_REG <= hard_regno && hard_regno <= LAST_REX_SSE_REG)
37086 return 2;
37087 /* Usage of AX register results in smaller code. Prefer it. */
37088 if (hard_regno == 0)
37089 return 4;
37090 return 3;
37091 }
37092
37093 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
37094
37095 Put float CONST_DOUBLE in the constant pool instead of fp regs.
37096 QImode must go into class Q_REGS.
37097 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
37098 movdf to do mem-to-mem moves through integer regs. */
37099
37100 static reg_class_t
37101 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
37102 {
37103 enum machine_mode mode = GET_MODE (x);
37104
37105 /* We're only allowed to return a subclass of CLASS. Many of the
37106 following checks fail for NO_REGS, so eliminate that early. */
37107 if (regclass == NO_REGS)
37108 return NO_REGS;
37109
37110 /* All classes can load zeros. */
37111 if (x == CONST0_RTX (mode))
37112 return regclass;
37113
37114 /* Force constants into memory if we are loading a (nonzero) constant into
37115 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
37116 instructions to load from a constant. */
37117 if (CONSTANT_P (x)
37118 && (MAYBE_MMX_CLASS_P (regclass)
37119 || MAYBE_SSE_CLASS_P (regclass)
37120 || MAYBE_MASK_CLASS_P (regclass)))
37121 return NO_REGS;
37122
37123 /* Prefer SSE regs only, if we can use them for math. */
37124 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
37125 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
37126
37127 /* Floating-point constants need more complex checks. */
37128 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
37129 {
37130 /* General regs can load everything. */
37131 if (reg_class_subset_p (regclass, GENERAL_REGS))
37132 return regclass;
37133
37134 /* Floats can load 0 and 1 plus some others. Note that we eliminated
37135 zero above. We only want to wind up preferring 80387 registers if
37136 we plan on doing computation with them. */
37137 if (TARGET_80387
37138 && standard_80387_constant_p (x) > 0)
37139 {
37140 /* Limit class to non-sse. */
37141 if (regclass == FLOAT_SSE_REGS)
37142 return FLOAT_REGS;
37143 if (regclass == FP_TOP_SSE_REGS)
37144 return FP_TOP_REG;
37145 if (regclass == FP_SECOND_SSE_REGS)
37146 return FP_SECOND_REG;
37147 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
37148 return regclass;
37149 }
37150
37151 return NO_REGS;
37152 }
37153
37154 /* Generally when we see PLUS here, it's the function invariant
37155 (plus soft-fp const_int). Which can only be computed into general
37156 regs. */
37157 if (GET_CODE (x) == PLUS)
37158 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
37159
37160 /* QImode constants are easy to load, but non-constant QImode data
37161 must go into Q_REGS. */
37162 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
37163 {
37164 if (reg_class_subset_p (regclass, Q_REGS))
37165 return regclass;
37166 if (reg_class_subset_p (Q_REGS, regclass))
37167 return Q_REGS;
37168 return NO_REGS;
37169 }
37170
37171 return regclass;
37172 }
37173
37174 /* Discourage putting floating-point values in SSE registers unless
37175 SSE math is being used, and likewise for the 387 registers. */
37176 static reg_class_t
37177 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
37178 {
37179 enum machine_mode mode = GET_MODE (x);
37180
37181 /* Restrict the output reload class to the register bank that we are doing
37182 math on. If we would like not to return a subset of CLASS, reject this
37183 alternative: if reload cannot do this, it will still use its choice. */
37184 mode = GET_MODE (x);
37185 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
37186 return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
37187
37188 if (X87_FLOAT_MODE_P (mode))
37189 {
37190 if (regclass == FP_TOP_SSE_REGS)
37191 return FP_TOP_REG;
37192 else if (regclass == FP_SECOND_SSE_REGS)
37193 return FP_SECOND_REG;
37194 else
37195 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
37196 }
37197
37198 return regclass;
37199 }
37200
37201 static reg_class_t
37202 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
37203 enum machine_mode mode, secondary_reload_info *sri)
37204 {
37205 /* Double-word spills from general registers to non-offsettable memory
37206 references (zero-extended addresses) require special handling. */
37207 if (TARGET_64BIT
37208 && MEM_P (x)
37209 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
37210 && INTEGER_CLASS_P (rclass)
37211 && !offsettable_memref_p (x))
37212 {
37213 sri->icode = (in_p
37214 ? CODE_FOR_reload_noff_load
37215 : CODE_FOR_reload_noff_store);
37216 /* Add the cost of moving address to a temporary. */
37217 sri->extra_cost = 1;
37218
37219 return NO_REGS;
37220 }
37221
37222 /* QImode spills from non-QI registers require
37223 intermediate register on 32bit targets. */
37224 if (mode == QImode
37225 && (MAYBE_MASK_CLASS_P (rclass)
37226 || (!TARGET_64BIT && !in_p
37227 && INTEGER_CLASS_P (rclass)
37228 && MAYBE_NON_Q_CLASS_P (rclass))))
37229 {
37230 int regno;
37231
37232 if (REG_P (x))
37233 regno = REGNO (x);
37234 else
37235 regno = -1;
37236
37237 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
37238 regno = true_regnum (x);
37239
37240 /* Return Q_REGS if the operand is in memory. */
37241 if (regno == -1)
37242 return Q_REGS;
37243 }
37244
37245 /* This condition handles corner case where an expression involving
37246 pointers gets vectorized. We're trying to use the address of a
37247 stack slot as a vector initializer.
37248
37249 (set (reg:V2DI 74 [ vect_cst_.2 ])
37250 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
37251
37252 Eventually frame gets turned into sp+offset like this:
37253
37254 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37255 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
37256 (const_int 392 [0x188]))))
37257
37258 That later gets turned into:
37259
37260 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37261 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
37262 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
37263
37264 We'll have the following reload recorded:
37265
37266 Reload 0: reload_in (DI) =
37267 (plus:DI (reg/f:DI 7 sp)
37268 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
37269 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37270 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
37271 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
37272 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37273 reload_reg_rtx: (reg:V2DI 22 xmm1)
37274
37275 Which isn't going to work since SSE instructions can't handle scalar
37276 additions. Returning GENERAL_REGS forces the addition into integer
37277 register and reload can handle subsequent reloads without problems. */
37278
37279 if (in_p && GET_CODE (x) == PLUS
37280 && SSE_CLASS_P (rclass)
37281 && SCALAR_INT_MODE_P (mode))
37282 return GENERAL_REGS;
37283
37284 return NO_REGS;
37285 }
37286
37287 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
37288
37289 static bool
37290 ix86_class_likely_spilled_p (reg_class_t rclass)
37291 {
37292 switch (rclass)
37293 {
37294 case AREG:
37295 case DREG:
37296 case CREG:
37297 case BREG:
37298 case AD_REGS:
37299 case SIREG:
37300 case DIREG:
37301 case SSE_FIRST_REG:
37302 case FP_TOP_REG:
37303 case FP_SECOND_REG:
37304 return true;
37305
37306 default:
37307 break;
37308 }
37309
37310 return false;
37311 }
37312
37313 /* If we are copying between general and FP registers, we need a memory
37314 location. The same is true for SSE and MMX registers.
37315
37316 To optimize register_move_cost performance, allow inline variant.
37317
37318 The macro can't work reliably when one of the CLASSES is class containing
37319 registers from multiple units (SSE, MMX, integer). We avoid this by never
37320 combining those units in single alternative in the machine description.
37321 Ensure that this constraint holds to avoid unexpected surprises.
37322
37323 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
37324 enforce these sanity checks. */
37325
37326 static inline bool
37327 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
37328 enum machine_mode mode, int strict)
37329 {
37330 if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
37331 return false;
37332 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
37333 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
37334 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
37335 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
37336 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
37337 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
37338 {
37339 gcc_assert (!strict || lra_in_progress);
37340 return true;
37341 }
37342
37343 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
37344 return true;
37345
37346 /* ??? This is a lie. We do have moves between mmx/general, and for
37347 mmx/sse2. But by saying we need secondary memory we discourage the
37348 register allocator from using the mmx registers unless needed. */
37349 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
37350 return true;
37351
37352 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
37353 {
37354 /* SSE1 doesn't have any direct moves from other classes. */
37355 if (!TARGET_SSE2)
37356 return true;
37357
37358 /* If the target says that inter-unit moves are more expensive
37359 than moving through memory, then don't generate them. */
37360 if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
37361 || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
37362 return true;
37363
37364 /* Between SSE and general, we have moves no larger than word size. */
37365 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
37366 return true;
37367 }
37368
37369 return false;
37370 }
37371
37372 bool
37373 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
37374 enum machine_mode mode, int strict)
37375 {
37376 return inline_secondary_memory_needed (class1, class2, mode, strict);
37377 }
37378
37379 /* Implement the TARGET_CLASS_MAX_NREGS hook.
37380
37381 On the 80386, this is the size of MODE in words,
37382 except in the FP regs, where a single reg is always enough. */
37383
37384 static unsigned char
37385 ix86_class_max_nregs (reg_class_t rclass, enum machine_mode mode)
37386 {
37387 if (MAYBE_INTEGER_CLASS_P (rclass))
37388 {
37389 if (mode == XFmode)
37390 return (TARGET_64BIT ? 2 : 3);
37391 else if (mode == XCmode)
37392 return (TARGET_64BIT ? 4 : 6);
37393 else
37394 return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
37395 }
37396 else
37397 {
37398 if (COMPLEX_MODE_P (mode))
37399 return 2;
37400 else
37401 return 1;
37402 }
37403 }
37404
37405 /* Return true if the registers in CLASS cannot represent the change from
37406 modes FROM to TO. */
37407
37408 bool
37409 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
37410 enum reg_class regclass)
37411 {
37412 if (from == to)
37413 return false;
37414
37415 /* x87 registers can't do subreg at all, as all values are reformatted
37416 to extended precision. */
37417 if (MAYBE_FLOAT_CLASS_P (regclass))
37418 return true;
37419
37420 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
37421 {
37422 /* Vector registers do not support QI or HImode loads. If we don't
37423 disallow a change to these modes, reload will assume it's ok to
37424 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
37425 the vec_dupv4hi pattern. */
37426 if (GET_MODE_SIZE (from) < 4)
37427 return true;
37428
37429 /* Vector registers do not support subreg with nonzero offsets, which
37430 are otherwise valid for integer registers. Since we can't see
37431 whether we have a nonzero offset from here, prohibit all
37432 nonparadoxical subregs changing size. */
37433 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
37434 return true;
37435 }
37436
37437 return false;
37438 }
37439
37440 /* Return the cost of moving data of mode M between a
37441 register and memory. A value of 2 is the default; this cost is
37442 relative to those in `REGISTER_MOVE_COST'.
37443
37444 This function is used extensively by register_move_cost that is used to
37445 build tables at startup. Make it inline in this case.
37446 When IN is 2, return maximum of in and out move cost.
37447
37448 If moving between registers and memory is more expensive than
37449 between two registers, you should define this macro to express the
37450 relative cost.
37451
37452 Model also increased moving costs of QImode registers in non
37453 Q_REGS classes.
37454 */
37455 static inline int
37456 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
37457 int in)
37458 {
37459 int cost;
37460 if (FLOAT_CLASS_P (regclass))
37461 {
37462 int index;
37463 switch (mode)
37464 {
37465 case SFmode:
37466 index = 0;
37467 break;
37468 case DFmode:
37469 index = 1;
37470 break;
37471 case XFmode:
37472 index = 2;
37473 break;
37474 default:
37475 return 100;
37476 }
37477 if (in == 2)
37478 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
37479 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
37480 }
37481 if (SSE_CLASS_P (regclass))
37482 {
37483 int index;
37484 switch (GET_MODE_SIZE (mode))
37485 {
37486 case 4:
37487 index = 0;
37488 break;
37489 case 8:
37490 index = 1;
37491 break;
37492 case 16:
37493 index = 2;
37494 break;
37495 default:
37496 return 100;
37497 }
37498 if (in == 2)
37499 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
37500 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
37501 }
37502 if (MMX_CLASS_P (regclass))
37503 {
37504 int index;
37505 switch (GET_MODE_SIZE (mode))
37506 {
37507 case 4:
37508 index = 0;
37509 break;
37510 case 8:
37511 index = 1;
37512 break;
37513 default:
37514 return 100;
37515 }
37516 if (in)
37517 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
37518 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
37519 }
37520 switch (GET_MODE_SIZE (mode))
37521 {
37522 case 1:
37523 if (Q_CLASS_P (regclass) || TARGET_64BIT)
37524 {
37525 if (!in)
37526 return ix86_cost->int_store[0];
37527 if (TARGET_PARTIAL_REG_DEPENDENCY
37528 && optimize_function_for_speed_p (cfun))
37529 cost = ix86_cost->movzbl_load;
37530 else
37531 cost = ix86_cost->int_load[0];
37532 if (in == 2)
37533 return MAX (cost, ix86_cost->int_store[0]);
37534 return cost;
37535 }
37536 else
37537 {
37538 if (in == 2)
37539 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
37540 if (in)
37541 return ix86_cost->movzbl_load;
37542 else
37543 return ix86_cost->int_store[0] + 4;
37544 }
37545 break;
37546 case 2:
37547 if (in == 2)
37548 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
37549 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
37550 default:
37551 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
37552 if (mode == TFmode)
37553 mode = XFmode;
37554 if (in == 2)
37555 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
37556 else if (in)
37557 cost = ix86_cost->int_load[2];
37558 else
37559 cost = ix86_cost->int_store[2];
37560 return (cost * (((int) GET_MODE_SIZE (mode)
37561 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
37562 }
37563 }
37564
37565 static int
37566 ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
37567 bool in)
37568 {
37569 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
37570 }
37571
37572
37573 /* Return the cost of moving data from a register in class CLASS1 to
37574 one in class CLASS2.
37575
37576 It is not required that the cost always equal 2 when FROM is the same as TO;
37577 on some machines it is expensive to move between registers if they are not
37578 general registers. */
37579
37580 static int
37581 ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
37582 reg_class_t class2_i)
37583 {
37584 enum reg_class class1 = (enum reg_class) class1_i;
37585 enum reg_class class2 = (enum reg_class) class2_i;
37586
37587 /* In case we require secondary memory, compute cost of the store followed
37588 by load. In order to avoid bad register allocation choices, we need
37589 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
37590
37591 if (inline_secondary_memory_needed (class1, class2, mode, 0))
37592 {
37593 int cost = 1;
37594
37595 cost += inline_memory_move_cost (mode, class1, 2);
37596 cost += inline_memory_move_cost (mode, class2, 2);
37597
37598 /* In case of copying from general_purpose_register we may emit multiple
37599 stores followed by single load causing memory size mismatch stall.
37600 Count this as arbitrarily high cost of 20. */
37601 if (targetm.class_max_nregs (class1, mode)
37602 > targetm.class_max_nregs (class2, mode))
37603 cost += 20;
37604
37605 /* In the case of FP/MMX moves, the registers actually overlap, and we
37606 have to switch modes in order to treat them differently. */
37607 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
37608 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
37609 cost += 20;
37610
37611 return cost;
37612 }
37613
37614 /* Moves between SSE/MMX and integer unit are expensive. */
37615 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
37616 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
37617
37618 /* ??? By keeping returned value relatively high, we limit the number
37619 of moves between integer and MMX/SSE registers for all targets.
37620 Additionally, high value prevents problem with x86_modes_tieable_p(),
37621 where integer modes in MMX/SSE registers are not tieable
37622 because of missing QImode and HImode moves to, from or between
37623 MMX/SSE registers. */
37624 return MAX (8, ix86_cost->mmxsse_to_integer);
37625
37626 if (MAYBE_FLOAT_CLASS_P (class1))
37627 return ix86_cost->fp_move;
37628 if (MAYBE_SSE_CLASS_P (class1))
37629 return ix86_cost->sse_move;
37630 if (MAYBE_MMX_CLASS_P (class1))
37631 return ix86_cost->mmx_move;
37632 return 2;
37633 }
37634
37635 /* Return TRUE if hard register REGNO can hold a value of machine-mode
37636 MODE. */
37637
37638 bool
37639 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
37640 {
37641 /* Flags and only flags can only hold CCmode values. */
37642 if (CC_REGNO_P (regno))
37643 return GET_MODE_CLASS (mode) == MODE_CC;
37644 if (GET_MODE_CLASS (mode) == MODE_CC
37645 || GET_MODE_CLASS (mode) == MODE_RANDOM
37646 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
37647 return false;
37648 if (STACK_REGNO_P (regno))
37649 return VALID_FP_MODE_P (mode);
37650 if (MASK_REGNO_P (regno))
37651 return VALID_MASK_REG_MODE (mode);
37652 if (SSE_REGNO_P (regno))
37653 {
37654 /* We implement the move patterns for all vector modes into and
37655 out of SSE registers, even when no operation instructions
37656 are available. */
37657
37658 /* For AVX-512 we allow, regardless of regno:
37659 - XI mode
37660 - any of 512-bit wide vector mode
37661 - any scalar mode. */
37662 if (TARGET_AVX512F
37663 && (mode == XImode
37664 || VALID_AVX512F_REG_MODE (mode)
37665 || VALID_AVX512F_SCALAR_MODE (mode)))
37666 return true;
37667
37668 /* xmm16-xmm31 are only available for AVX-512. */
37669 if (EXT_REX_SSE_REGNO_P (regno))
37670 return false;
37671
37672 /* OImode and AVX modes are available only when AVX is enabled. */
37673 return ((TARGET_AVX
37674 && VALID_AVX256_REG_OR_OI_MODE (mode))
37675 || VALID_SSE_REG_MODE (mode)
37676 || VALID_SSE2_REG_MODE (mode)
37677 || VALID_MMX_REG_MODE (mode)
37678 || VALID_MMX_REG_MODE_3DNOW (mode));
37679 }
37680 if (MMX_REGNO_P (regno))
37681 {
37682 /* We implement the move patterns for 3DNOW modes even in MMX mode,
37683 so if the register is available at all, then we can move data of
37684 the given mode into or out of it. */
37685 return (VALID_MMX_REG_MODE (mode)
37686 || VALID_MMX_REG_MODE_3DNOW (mode));
37687 }
37688
37689 if (mode == QImode)
37690 {
37691 /* Take care for QImode values - they can be in non-QI regs,
37692 but then they do cause partial register stalls. */
37693 if (ANY_QI_REGNO_P (regno))
37694 return true;
37695 if (!TARGET_PARTIAL_REG_STALL)
37696 return true;
37697 /* LRA checks if the hard register is OK for the given mode.
37698 QImode values can live in non-QI regs, so we allow all
37699 registers here. */
37700 if (lra_in_progress)
37701 return true;
37702 return !can_create_pseudo_p ();
37703 }
37704 /* We handle both integer and floats in the general purpose registers. */
37705 else if (VALID_INT_MODE_P (mode))
37706 return true;
37707 else if (VALID_FP_MODE_P (mode))
37708 return true;
37709 else if (VALID_DFP_MODE_P (mode))
37710 return true;
37711 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
37712 on to use that value in smaller contexts, this can easily force a
37713 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
37714 supporting DImode, allow it. */
37715 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
37716 return true;
37717
37718 return false;
37719 }
37720
37721 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
37722 tieable integer mode. */
37723
37724 static bool
37725 ix86_tieable_integer_mode_p (enum machine_mode mode)
37726 {
37727 switch (mode)
37728 {
37729 case HImode:
37730 case SImode:
37731 return true;
37732
37733 case QImode:
37734 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
37735
37736 case DImode:
37737 return TARGET_64BIT;
37738
37739 default:
37740 return false;
37741 }
37742 }
37743
37744 /* Return true if MODE1 is accessible in a register that can hold MODE2
37745 without copying. That is, all register classes that can hold MODE2
37746 can also hold MODE1. */
37747
37748 bool
37749 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
37750 {
37751 if (mode1 == mode2)
37752 return true;
37753
37754 if (ix86_tieable_integer_mode_p (mode1)
37755 && ix86_tieable_integer_mode_p (mode2))
37756 return true;
37757
37758 /* MODE2 being XFmode implies fp stack or general regs, which means we
37759 can tie any smaller floating point modes to it. Note that we do not
37760 tie this with TFmode. */
37761 if (mode2 == XFmode)
37762 return mode1 == SFmode || mode1 == DFmode;
37763
37764 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
37765 that we can tie it with SFmode. */
37766 if (mode2 == DFmode)
37767 return mode1 == SFmode;
37768
37769 /* If MODE2 is only appropriate for an SSE register, then tie with
37770 any other mode acceptable to SSE registers. */
37771 if (GET_MODE_SIZE (mode2) == 32
37772 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
37773 return (GET_MODE_SIZE (mode1) == 32
37774 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
37775 if (GET_MODE_SIZE (mode2) == 16
37776 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
37777 return (GET_MODE_SIZE (mode1) == 16
37778 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
37779
37780 /* If MODE2 is appropriate for an MMX register, then tie
37781 with any other mode acceptable to MMX registers. */
37782 if (GET_MODE_SIZE (mode2) == 8
37783 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
37784 return (GET_MODE_SIZE (mode1) == 8
37785 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
37786
37787 return false;
37788 }
37789
37790 /* Return the cost of moving between two registers of mode MODE. */
37791
37792 static int
37793 ix86_set_reg_reg_cost (enum machine_mode mode)
37794 {
37795 unsigned int units = UNITS_PER_WORD;
37796
37797 switch (GET_MODE_CLASS (mode))
37798 {
37799 default:
37800 break;
37801
37802 case MODE_CC:
37803 units = GET_MODE_SIZE (CCmode);
37804 break;
37805
37806 case MODE_FLOAT:
37807 if ((TARGET_SSE && mode == TFmode)
37808 || (TARGET_80387 && mode == XFmode)
37809 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
37810 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
37811 units = GET_MODE_SIZE (mode);
37812 break;
37813
37814 case MODE_COMPLEX_FLOAT:
37815 if ((TARGET_SSE && mode == TCmode)
37816 || (TARGET_80387 && mode == XCmode)
37817 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
37818 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
37819 units = GET_MODE_SIZE (mode);
37820 break;
37821
37822 case MODE_VECTOR_INT:
37823 case MODE_VECTOR_FLOAT:
37824 if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
37825 || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
37826 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
37827 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
37828 || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
37829 units = GET_MODE_SIZE (mode);
37830 }
37831
37832 /* Return the cost of moving between two registers of mode MODE,
37833 assuming that the move will be in pieces of at most UNITS bytes. */
37834 return COSTS_N_INSNS ((GET_MODE_SIZE (mode) + units - 1) / units);
37835 }
37836
37837 /* Compute a (partial) cost for rtx X. Return true if the complete
37838 cost has been computed, and false if subexpressions should be
37839 scanned. In either case, *TOTAL contains the cost result. */
37840
37841 static bool
37842 ix86_rtx_costs (rtx x, int code_i, int outer_code_i, int opno, int *total,
37843 bool speed)
37844 {
37845 rtx mask;
37846 enum rtx_code code = (enum rtx_code) code_i;
37847 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
37848 enum machine_mode mode = GET_MODE (x);
37849 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
37850
37851 switch (code)
37852 {
37853 case SET:
37854 if (register_operand (SET_DEST (x), VOIDmode)
37855 && reg_or_0_operand (SET_SRC (x), VOIDmode))
37856 {
37857 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
37858 return true;
37859 }
37860 return false;
37861
37862 case CONST_INT:
37863 case CONST:
37864 case LABEL_REF:
37865 case SYMBOL_REF:
37866 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
37867 *total = 3;
37868 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
37869 *total = 2;
37870 else if (flag_pic && SYMBOLIC_CONST (x)
37871 && (!TARGET_64BIT
37872 || (!GET_CODE (x) != LABEL_REF
37873 && (GET_CODE (x) != SYMBOL_REF
37874 || !SYMBOL_REF_LOCAL_P (x)))))
37875 *total = 1;
37876 else
37877 *total = 0;
37878 return true;
37879
37880 case CONST_DOUBLE:
37881 if (mode == VOIDmode)
37882 {
37883 *total = 0;
37884 return true;
37885 }
37886 switch (standard_80387_constant_p (x))
37887 {
37888 case 1: /* 0.0 */
37889 *total = 1;
37890 return true;
37891 default: /* Other constants */
37892 *total = 2;
37893 return true;
37894 case 0:
37895 case -1:
37896 break;
37897 }
37898 if (SSE_FLOAT_MODE_P (mode))
37899 {
37900 case CONST_VECTOR:
37901 switch (standard_sse_constant_p (x))
37902 {
37903 case 0:
37904 break;
37905 case 1: /* 0: xor eliminates false dependency */
37906 *total = 0;
37907 return true;
37908 default: /* -1: cmp contains false dependency */
37909 *total = 1;
37910 return true;
37911 }
37912 }
37913 /* Fall back to (MEM (SYMBOL_REF)), since that's where
37914 it'll probably end up. Add a penalty for size. */
37915 *total = (COSTS_N_INSNS (1)
37916 + (flag_pic != 0 && !TARGET_64BIT)
37917 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
37918 return true;
37919
37920 case ZERO_EXTEND:
37921 /* The zero extensions is often completely free on x86_64, so make
37922 it as cheap as possible. */
37923 if (TARGET_64BIT && mode == DImode
37924 && GET_MODE (XEXP (x, 0)) == SImode)
37925 *total = 1;
37926 else if (TARGET_ZERO_EXTEND_WITH_AND)
37927 *total = cost->add;
37928 else
37929 *total = cost->movzx;
37930 return false;
37931
37932 case SIGN_EXTEND:
37933 *total = cost->movsx;
37934 return false;
37935
37936 case ASHIFT:
37937 if (SCALAR_INT_MODE_P (mode)
37938 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
37939 && CONST_INT_P (XEXP (x, 1)))
37940 {
37941 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
37942 if (value == 1)
37943 {
37944 *total = cost->add;
37945 return false;
37946 }
37947 if ((value == 2 || value == 3)
37948 && cost->lea <= cost->shift_const)
37949 {
37950 *total = cost->lea;
37951 return false;
37952 }
37953 }
37954 /* FALLTHRU */
37955
37956 case ROTATE:
37957 case ASHIFTRT:
37958 case LSHIFTRT:
37959 case ROTATERT:
37960 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
37961 {
37962 /* ??? Should be SSE vector operation cost. */
37963 /* At least for published AMD latencies, this really is the same
37964 as the latency for a simple fpu operation like fabs. */
37965 /* V*QImode is emulated with 1-11 insns. */
37966 if (mode == V16QImode || mode == V32QImode)
37967 {
37968 int count = 11;
37969 if (TARGET_XOP && mode == V16QImode)
37970 {
37971 /* For XOP we use vpshab, which requires a broadcast of the
37972 value to the variable shift insn. For constants this
37973 means a V16Q const in mem; even when we can perform the
37974 shift with one insn set the cost to prefer paddb. */
37975 if (CONSTANT_P (XEXP (x, 1)))
37976 {
37977 *total = (cost->fabs
37978 + rtx_cost (XEXP (x, 0), code, 0, speed)
37979 + (speed ? 2 : COSTS_N_BYTES (16)));
37980 return true;
37981 }
37982 count = 3;
37983 }
37984 else if (TARGET_SSSE3)
37985 count = 7;
37986 *total = cost->fabs * count;
37987 }
37988 else
37989 *total = cost->fabs;
37990 }
37991 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
37992 {
37993 if (CONST_INT_P (XEXP (x, 1)))
37994 {
37995 if (INTVAL (XEXP (x, 1)) > 32)
37996 *total = cost->shift_const + COSTS_N_INSNS (2);
37997 else
37998 *total = cost->shift_const * 2;
37999 }
38000 else
38001 {
38002 if (GET_CODE (XEXP (x, 1)) == AND)
38003 *total = cost->shift_var * 2;
38004 else
38005 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
38006 }
38007 }
38008 else
38009 {
38010 if (CONST_INT_P (XEXP (x, 1)))
38011 *total = cost->shift_const;
38012 else if (GET_CODE (XEXP (x, 1)) == SUBREG
38013 && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND)
38014 {
38015 /* Return the cost after shift-and truncation. */
38016 *total = cost->shift_var;
38017 return true;
38018 }
38019 else
38020 *total = cost->shift_var;
38021 }
38022 return false;
38023
38024 case FMA:
38025 {
38026 rtx sub;
38027
38028 gcc_assert (FLOAT_MODE_P (mode));
38029 gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
38030
38031 /* ??? SSE scalar/vector cost should be used here. */
38032 /* ??? Bald assumption that fma has the same cost as fmul. */
38033 *total = cost->fmul;
38034 *total += rtx_cost (XEXP (x, 1), FMA, 1, speed);
38035
38036 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
38037 sub = XEXP (x, 0);
38038 if (GET_CODE (sub) == NEG)
38039 sub = XEXP (sub, 0);
38040 *total += rtx_cost (sub, FMA, 0, speed);
38041
38042 sub = XEXP (x, 2);
38043 if (GET_CODE (sub) == NEG)
38044 sub = XEXP (sub, 0);
38045 *total += rtx_cost (sub, FMA, 2, speed);
38046 return true;
38047 }
38048
38049 case MULT:
38050 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38051 {
38052 /* ??? SSE scalar cost should be used here. */
38053 *total = cost->fmul;
38054 return false;
38055 }
38056 else if (X87_FLOAT_MODE_P (mode))
38057 {
38058 *total = cost->fmul;
38059 return false;
38060 }
38061 else if (FLOAT_MODE_P (mode))
38062 {
38063 /* ??? SSE vector cost should be used here. */
38064 *total = cost->fmul;
38065 return false;
38066 }
38067 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
38068 {
38069 /* V*QImode is emulated with 7-13 insns. */
38070 if (mode == V16QImode || mode == V32QImode)
38071 {
38072 int extra = 11;
38073 if (TARGET_XOP && mode == V16QImode)
38074 extra = 5;
38075 else if (TARGET_SSSE3)
38076 extra = 6;
38077 *total = cost->fmul * 2 + cost->fabs * extra;
38078 }
38079 /* V*DImode is emulated with 5-8 insns. */
38080 else if (mode == V2DImode || mode == V4DImode)
38081 {
38082 if (TARGET_XOP && mode == V2DImode)
38083 *total = cost->fmul * 2 + cost->fabs * 3;
38084 else
38085 *total = cost->fmul * 3 + cost->fabs * 5;
38086 }
38087 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
38088 insns, including two PMULUDQ. */
38089 else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
38090 *total = cost->fmul * 2 + cost->fabs * 5;
38091 else
38092 *total = cost->fmul;
38093 return false;
38094 }
38095 else
38096 {
38097 rtx op0 = XEXP (x, 0);
38098 rtx op1 = XEXP (x, 1);
38099 int nbits;
38100 if (CONST_INT_P (XEXP (x, 1)))
38101 {
38102 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
38103 for (nbits = 0; value != 0; value &= value - 1)
38104 nbits++;
38105 }
38106 else
38107 /* This is arbitrary. */
38108 nbits = 7;
38109
38110 /* Compute costs correctly for widening multiplication. */
38111 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
38112 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
38113 == GET_MODE_SIZE (mode))
38114 {
38115 int is_mulwiden = 0;
38116 enum machine_mode inner_mode = GET_MODE (op0);
38117
38118 if (GET_CODE (op0) == GET_CODE (op1))
38119 is_mulwiden = 1, op1 = XEXP (op1, 0);
38120 else if (CONST_INT_P (op1))
38121 {
38122 if (GET_CODE (op0) == SIGN_EXTEND)
38123 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
38124 == INTVAL (op1);
38125 else
38126 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
38127 }
38128
38129 if (is_mulwiden)
38130 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
38131 }
38132
38133 *total = (cost->mult_init[MODE_INDEX (mode)]
38134 + nbits * cost->mult_bit
38135 + rtx_cost (op0, outer_code, opno, speed)
38136 + rtx_cost (op1, outer_code, opno, speed));
38137
38138 return true;
38139 }
38140
38141 case DIV:
38142 case UDIV:
38143 case MOD:
38144 case UMOD:
38145 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38146 /* ??? SSE cost should be used here. */
38147 *total = cost->fdiv;
38148 else if (X87_FLOAT_MODE_P (mode))
38149 *total = cost->fdiv;
38150 else if (FLOAT_MODE_P (mode))
38151 /* ??? SSE vector cost should be used here. */
38152 *total = cost->fdiv;
38153 else
38154 *total = cost->divide[MODE_INDEX (mode)];
38155 return false;
38156
38157 case PLUS:
38158 if (GET_MODE_CLASS (mode) == MODE_INT
38159 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
38160 {
38161 if (GET_CODE (XEXP (x, 0)) == PLUS
38162 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
38163 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
38164 && CONSTANT_P (XEXP (x, 1)))
38165 {
38166 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
38167 if (val == 2 || val == 4 || val == 8)
38168 {
38169 *total = cost->lea;
38170 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
38171 outer_code, opno, speed);
38172 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
38173 outer_code, opno, speed);
38174 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
38175 return true;
38176 }
38177 }
38178 else if (GET_CODE (XEXP (x, 0)) == MULT
38179 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
38180 {
38181 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
38182 if (val == 2 || val == 4 || val == 8)
38183 {
38184 *total = cost->lea;
38185 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
38186 outer_code, opno, speed);
38187 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
38188 return true;
38189 }
38190 }
38191 else if (GET_CODE (XEXP (x, 0)) == PLUS)
38192 {
38193 *total = cost->lea;
38194 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
38195 outer_code, opno, speed);
38196 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
38197 outer_code, opno, speed);
38198 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
38199 return true;
38200 }
38201 }
38202 /* FALLTHRU */
38203
38204 case MINUS:
38205 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38206 {
38207 /* ??? SSE cost should be used here. */
38208 *total = cost->fadd;
38209 return false;
38210 }
38211 else if (X87_FLOAT_MODE_P (mode))
38212 {
38213 *total = cost->fadd;
38214 return false;
38215 }
38216 else if (FLOAT_MODE_P (mode))
38217 {
38218 /* ??? SSE vector cost should be used here. */
38219 *total = cost->fadd;
38220 return false;
38221 }
38222 /* FALLTHRU */
38223
38224 case AND:
38225 case IOR:
38226 case XOR:
38227 if (GET_MODE_CLASS (mode) == MODE_INT
38228 && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
38229 {
38230 *total = (cost->add * 2
38231 + (rtx_cost (XEXP (x, 0), outer_code, opno, speed)
38232 << (GET_MODE (XEXP (x, 0)) != DImode))
38233 + (rtx_cost (XEXP (x, 1), outer_code, opno, speed)
38234 << (GET_MODE (XEXP (x, 1)) != DImode)));
38235 return true;
38236 }
38237 /* FALLTHRU */
38238
38239 case NEG:
38240 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38241 {
38242 /* ??? SSE cost should be used here. */
38243 *total = cost->fchs;
38244 return false;
38245 }
38246 else if (X87_FLOAT_MODE_P (mode))
38247 {
38248 *total = cost->fchs;
38249 return false;
38250 }
38251 else if (FLOAT_MODE_P (mode))
38252 {
38253 /* ??? SSE vector cost should be used here. */
38254 *total = cost->fchs;
38255 return false;
38256 }
38257 /* FALLTHRU */
38258
38259 case NOT:
38260 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
38261 {
38262 /* ??? Should be SSE vector operation cost. */
38263 /* At least for published AMD latencies, this really is the same
38264 as the latency for a simple fpu operation like fabs. */
38265 *total = cost->fabs;
38266 }
38267 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
38268 *total = cost->add * 2;
38269 else
38270 *total = cost->add;
38271 return false;
38272
38273 case COMPARE:
38274 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
38275 && XEXP (XEXP (x, 0), 1) == const1_rtx
38276 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
38277 && XEXP (x, 1) == const0_rtx)
38278 {
38279 /* This kind of construct is implemented using test[bwl].
38280 Treat it as if we had an AND. */
38281 *total = (cost->add
38282 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, opno, speed)
38283 + rtx_cost (const1_rtx, outer_code, opno, speed));
38284 return true;
38285 }
38286 return false;
38287
38288 case FLOAT_EXTEND:
38289 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
38290 *total = 0;
38291 return false;
38292
38293 case ABS:
38294 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38295 /* ??? SSE cost should be used here. */
38296 *total = cost->fabs;
38297 else if (X87_FLOAT_MODE_P (mode))
38298 *total = cost->fabs;
38299 else if (FLOAT_MODE_P (mode))
38300 /* ??? SSE vector cost should be used here. */
38301 *total = cost->fabs;
38302 return false;
38303
38304 case SQRT:
38305 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38306 /* ??? SSE cost should be used here. */
38307 *total = cost->fsqrt;
38308 else if (X87_FLOAT_MODE_P (mode))
38309 *total = cost->fsqrt;
38310 else if (FLOAT_MODE_P (mode))
38311 /* ??? SSE vector cost should be used here. */
38312 *total = cost->fsqrt;
38313 return false;
38314
38315 case UNSPEC:
38316 if (XINT (x, 1) == UNSPEC_TP)
38317 *total = 0;
38318 return false;
38319
38320 case VEC_SELECT:
38321 case VEC_CONCAT:
38322 case VEC_DUPLICATE:
38323 /* ??? Assume all of these vector manipulation patterns are
38324 recognizable. In which case they all pretty much have the
38325 same cost. */
38326 *total = cost->fabs;
38327 return true;
38328 case VEC_MERGE:
38329 mask = XEXP (x, 2);
38330 /* This is masked instruction, assume the same cost,
38331 as nonmasked variant. */
38332 if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
38333 *total = rtx_cost (XEXP (x, 0), outer_code, opno, speed);
38334 else
38335 *total = cost->fabs;
38336 return true;
38337
38338 default:
38339 return false;
38340 }
38341 }
38342
38343 #if TARGET_MACHO
38344
38345 static int current_machopic_label_num;
38346
38347 /* Given a symbol name and its associated stub, write out the
38348 definition of the stub. */
38349
38350 void
38351 machopic_output_stub (FILE *file, const char *symb, const char *stub)
38352 {
38353 unsigned int length;
38354 char *binder_name, *symbol_name, lazy_ptr_name[32];
38355 int label = ++current_machopic_label_num;
38356
38357 /* For 64-bit we shouldn't get here. */
38358 gcc_assert (!TARGET_64BIT);
38359
38360 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
38361 symb = targetm.strip_name_encoding (symb);
38362
38363 length = strlen (stub);
38364 binder_name = XALLOCAVEC (char, length + 32);
38365 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
38366
38367 length = strlen (symb);
38368 symbol_name = XALLOCAVEC (char, length + 32);
38369 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
38370
38371 sprintf (lazy_ptr_name, "L%d$lz", label);
38372
38373 if (MACHOPIC_ATT_STUB)
38374 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
38375 else if (MACHOPIC_PURE)
38376 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
38377 else
38378 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
38379
38380 fprintf (file, "%s:\n", stub);
38381 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
38382
38383 if (MACHOPIC_ATT_STUB)
38384 {
38385 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
38386 }
38387 else if (MACHOPIC_PURE)
38388 {
38389 /* PIC stub. */
38390 /* 25-byte PIC stub using "CALL get_pc_thunk". */
38391 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
38392 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
38393 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
38394 label, lazy_ptr_name, label);
38395 fprintf (file, "\tjmp\t*%%ecx\n");
38396 }
38397 else
38398 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
38399
38400 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
38401 it needs no stub-binding-helper. */
38402 if (MACHOPIC_ATT_STUB)
38403 return;
38404
38405 fprintf (file, "%s:\n", binder_name);
38406
38407 if (MACHOPIC_PURE)
38408 {
38409 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
38410 fprintf (file, "\tpushl\t%%ecx\n");
38411 }
38412 else
38413 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
38414
38415 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
38416
38417 /* N.B. Keep the correspondence of these
38418 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
38419 old-pic/new-pic/non-pic stubs; altering this will break
38420 compatibility with existing dylibs. */
38421 if (MACHOPIC_PURE)
38422 {
38423 /* 25-byte PIC stub using "CALL get_pc_thunk". */
38424 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
38425 }
38426 else
38427 /* 16-byte -mdynamic-no-pic stub. */
38428 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
38429
38430 fprintf (file, "%s:\n", lazy_ptr_name);
38431 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
38432 fprintf (file, ASM_LONG "%s\n", binder_name);
38433 }
38434 #endif /* TARGET_MACHO */
38435
38436 /* Order the registers for register allocator. */
38437
38438 void
38439 x86_order_regs_for_local_alloc (void)
38440 {
38441 int pos = 0;
38442 int i;
38443
38444 /* First allocate the local general purpose registers. */
38445 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
38446 if (GENERAL_REGNO_P (i) && call_used_regs[i])
38447 reg_alloc_order [pos++] = i;
38448
38449 /* Global general purpose registers. */
38450 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
38451 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
38452 reg_alloc_order [pos++] = i;
38453
38454 /* x87 registers come first in case we are doing FP math
38455 using them. */
38456 if (!TARGET_SSE_MATH)
38457 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
38458 reg_alloc_order [pos++] = i;
38459
38460 /* SSE registers. */
38461 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
38462 reg_alloc_order [pos++] = i;
38463 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
38464 reg_alloc_order [pos++] = i;
38465
38466 /* Extended REX SSE registers. */
38467 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
38468 reg_alloc_order [pos++] = i;
38469
38470 /* Mask register. */
38471 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
38472 reg_alloc_order [pos++] = i;
38473
38474 /* x87 registers. */
38475 if (TARGET_SSE_MATH)
38476 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
38477 reg_alloc_order [pos++] = i;
38478
38479 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
38480 reg_alloc_order [pos++] = i;
38481
38482 /* Initialize the rest of array as we do not allocate some registers
38483 at all. */
38484 while (pos < FIRST_PSEUDO_REGISTER)
38485 reg_alloc_order [pos++] = 0;
38486 }
38487
38488 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
38489 in struct attribute_spec handler. */
38490 static tree
38491 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
38492 tree args,
38493 int flags ATTRIBUTE_UNUSED,
38494 bool *no_add_attrs)
38495 {
38496 if (TREE_CODE (*node) != FUNCTION_TYPE
38497 && TREE_CODE (*node) != METHOD_TYPE
38498 && TREE_CODE (*node) != FIELD_DECL
38499 && TREE_CODE (*node) != TYPE_DECL)
38500 {
38501 warning (OPT_Wattributes, "%qE attribute only applies to functions",
38502 name);
38503 *no_add_attrs = true;
38504 return NULL_TREE;
38505 }
38506 if (TARGET_64BIT)
38507 {
38508 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
38509 name);
38510 *no_add_attrs = true;
38511 return NULL_TREE;
38512 }
38513 if (is_attribute_p ("callee_pop_aggregate_return", name))
38514 {
38515 tree cst;
38516
38517 cst = TREE_VALUE (args);
38518 if (TREE_CODE (cst) != INTEGER_CST)
38519 {
38520 warning (OPT_Wattributes,
38521 "%qE attribute requires an integer constant argument",
38522 name);
38523 *no_add_attrs = true;
38524 }
38525 else if (compare_tree_int (cst, 0) != 0
38526 && compare_tree_int (cst, 1) != 0)
38527 {
38528 warning (OPT_Wattributes,
38529 "argument to %qE attribute is neither zero, nor one",
38530 name);
38531 *no_add_attrs = true;
38532 }
38533
38534 return NULL_TREE;
38535 }
38536
38537 return NULL_TREE;
38538 }
38539
38540 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
38541 struct attribute_spec.handler. */
38542 static tree
38543 ix86_handle_abi_attribute (tree *node, tree name,
38544 tree args ATTRIBUTE_UNUSED,
38545 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
38546 {
38547 if (TREE_CODE (*node) != FUNCTION_TYPE
38548 && TREE_CODE (*node) != METHOD_TYPE
38549 && TREE_CODE (*node) != FIELD_DECL
38550 && TREE_CODE (*node) != TYPE_DECL)
38551 {
38552 warning (OPT_Wattributes, "%qE attribute only applies to functions",
38553 name);
38554 *no_add_attrs = true;
38555 return NULL_TREE;
38556 }
38557
38558 /* Can combine regparm with all attributes but fastcall. */
38559 if (is_attribute_p ("ms_abi", name))
38560 {
38561 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
38562 {
38563 error ("ms_abi and sysv_abi attributes are not compatible");
38564 }
38565
38566 return NULL_TREE;
38567 }
38568 else if (is_attribute_p ("sysv_abi", name))
38569 {
38570 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
38571 {
38572 error ("ms_abi and sysv_abi attributes are not compatible");
38573 }
38574
38575 return NULL_TREE;
38576 }
38577
38578 return NULL_TREE;
38579 }
38580
38581 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
38582 struct attribute_spec.handler. */
38583 static tree
38584 ix86_handle_struct_attribute (tree *node, tree name,
38585 tree args ATTRIBUTE_UNUSED,
38586 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
38587 {
38588 tree *type = NULL;
38589 if (DECL_P (*node))
38590 {
38591 if (TREE_CODE (*node) == TYPE_DECL)
38592 type = &TREE_TYPE (*node);
38593 }
38594 else
38595 type = node;
38596
38597 if (!(type && RECORD_OR_UNION_TYPE_P (*type)))
38598 {
38599 warning (OPT_Wattributes, "%qE attribute ignored",
38600 name);
38601 *no_add_attrs = true;
38602 }
38603
38604 else if ((is_attribute_p ("ms_struct", name)
38605 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
38606 || ((is_attribute_p ("gcc_struct", name)
38607 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
38608 {
38609 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
38610 name);
38611 *no_add_attrs = true;
38612 }
38613
38614 return NULL_TREE;
38615 }
38616
38617 static tree
38618 ix86_handle_fndecl_attribute (tree *node, tree name,
38619 tree args ATTRIBUTE_UNUSED,
38620 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
38621 {
38622 if (TREE_CODE (*node) != FUNCTION_DECL)
38623 {
38624 warning (OPT_Wattributes, "%qE attribute only applies to functions",
38625 name);
38626 *no_add_attrs = true;
38627 }
38628 return NULL_TREE;
38629 }
38630
38631 static bool
38632 ix86_ms_bitfield_layout_p (const_tree record_type)
38633 {
38634 return ((TARGET_MS_BITFIELD_LAYOUT
38635 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
38636 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
38637 }
38638
38639 /* Returns an expression indicating where the this parameter is
38640 located on entry to the FUNCTION. */
38641
38642 static rtx
38643 x86_this_parameter (tree function)
38644 {
38645 tree type = TREE_TYPE (function);
38646 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
38647 int nregs;
38648
38649 if (TARGET_64BIT)
38650 {
38651 const int *parm_regs;
38652
38653 if (ix86_function_type_abi (type) == MS_ABI)
38654 parm_regs = x86_64_ms_abi_int_parameter_registers;
38655 else
38656 parm_regs = x86_64_int_parameter_registers;
38657 return gen_rtx_REG (Pmode, parm_regs[aggr]);
38658 }
38659
38660 nregs = ix86_function_regparm (type, function);
38661
38662 if (nregs > 0 && !stdarg_p (type))
38663 {
38664 int regno;
38665 unsigned int ccvt = ix86_get_callcvt (type);
38666
38667 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
38668 regno = aggr ? DX_REG : CX_REG;
38669 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
38670 {
38671 regno = CX_REG;
38672 if (aggr)
38673 return gen_rtx_MEM (SImode,
38674 plus_constant (Pmode, stack_pointer_rtx, 4));
38675 }
38676 else
38677 {
38678 regno = AX_REG;
38679 if (aggr)
38680 {
38681 regno = DX_REG;
38682 if (nregs == 1)
38683 return gen_rtx_MEM (SImode,
38684 plus_constant (Pmode,
38685 stack_pointer_rtx, 4));
38686 }
38687 }
38688 return gen_rtx_REG (SImode, regno);
38689 }
38690
38691 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
38692 aggr ? 8 : 4));
38693 }
38694
38695 /* Determine whether x86_output_mi_thunk can succeed. */
38696
38697 static bool
38698 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
38699 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
38700 HOST_WIDE_INT vcall_offset, const_tree function)
38701 {
38702 /* 64-bit can handle anything. */
38703 if (TARGET_64BIT)
38704 return true;
38705
38706 /* For 32-bit, everything's fine if we have one free register. */
38707 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
38708 return true;
38709
38710 /* Need a free register for vcall_offset. */
38711 if (vcall_offset)
38712 return false;
38713
38714 /* Need a free register for GOT references. */
38715 if (flag_pic && !targetm.binds_local_p (function))
38716 return false;
38717
38718 /* Otherwise ok. */
38719 return true;
38720 }
38721
38722 /* Output the assembler code for a thunk function. THUNK_DECL is the
38723 declaration for the thunk function itself, FUNCTION is the decl for
38724 the target function. DELTA is an immediate constant offset to be
38725 added to THIS. If VCALL_OFFSET is nonzero, the word at
38726 *(*this + vcall_offset) should be added to THIS. */
38727
38728 static void
38729 x86_output_mi_thunk (FILE *file,
38730 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
38731 HOST_WIDE_INT vcall_offset, tree function)
38732 {
38733 rtx this_param = x86_this_parameter (function);
38734 rtx this_reg, tmp, fnaddr;
38735 unsigned int tmp_regno;
38736
38737 if (TARGET_64BIT)
38738 tmp_regno = R10_REG;
38739 else
38740 {
38741 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
38742 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
38743 tmp_regno = AX_REG;
38744 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
38745 tmp_regno = DX_REG;
38746 else
38747 tmp_regno = CX_REG;
38748 }
38749
38750 emit_note (NOTE_INSN_PROLOGUE_END);
38751
38752 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
38753 pull it in now and let DELTA benefit. */
38754 if (REG_P (this_param))
38755 this_reg = this_param;
38756 else if (vcall_offset)
38757 {
38758 /* Put the this parameter into %eax. */
38759 this_reg = gen_rtx_REG (Pmode, AX_REG);
38760 emit_move_insn (this_reg, this_param);
38761 }
38762 else
38763 this_reg = NULL_RTX;
38764
38765 /* Adjust the this parameter by a fixed constant. */
38766 if (delta)
38767 {
38768 rtx delta_rtx = GEN_INT (delta);
38769 rtx delta_dst = this_reg ? this_reg : this_param;
38770
38771 if (TARGET_64BIT)
38772 {
38773 if (!x86_64_general_operand (delta_rtx, Pmode))
38774 {
38775 tmp = gen_rtx_REG (Pmode, tmp_regno);
38776 emit_move_insn (tmp, delta_rtx);
38777 delta_rtx = tmp;
38778 }
38779 }
38780
38781 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
38782 }
38783
38784 /* Adjust the this parameter by a value stored in the vtable. */
38785 if (vcall_offset)
38786 {
38787 rtx vcall_addr, vcall_mem, this_mem;
38788
38789 tmp = gen_rtx_REG (Pmode, tmp_regno);
38790
38791 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
38792 if (Pmode != ptr_mode)
38793 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
38794 emit_move_insn (tmp, this_mem);
38795
38796 /* Adjust the this parameter. */
38797 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
38798 if (TARGET_64BIT
38799 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
38800 {
38801 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
38802 emit_move_insn (tmp2, GEN_INT (vcall_offset));
38803 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
38804 }
38805
38806 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
38807 if (Pmode != ptr_mode)
38808 emit_insn (gen_addsi_1_zext (this_reg,
38809 gen_rtx_REG (ptr_mode,
38810 REGNO (this_reg)),
38811 vcall_mem));
38812 else
38813 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
38814 }
38815
38816 /* If necessary, drop THIS back to its stack slot. */
38817 if (this_reg && this_reg != this_param)
38818 emit_move_insn (this_param, this_reg);
38819
38820 fnaddr = XEXP (DECL_RTL (function), 0);
38821 if (TARGET_64BIT)
38822 {
38823 if (!flag_pic || targetm.binds_local_p (function)
38824 || TARGET_PECOFF)
38825 ;
38826 else
38827 {
38828 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
38829 tmp = gen_rtx_CONST (Pmode, tmp);
38830 fnaddr = gen_rtx_MEM (Pmode, tmp);
38831 }
38832 }
38833 else
38834 {
38835 if (!flag_pic || targetm.binds_local_p (function))
38836 ;
38837 #if TARGET_MACHO
38838 else if (TARGET_MACHO)
38839 {
38840 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
38841 fnaddr = XEXP (fnaddr, 0);
38842 }
38843 #endif /* TARGET_MACHO */
38844 else
38845 {
38846 tmp = gen_rtx_REG (Pmode, CX_REG);
38847 output_set_got (tmp, NULL_RTX);
38848
38849 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
38850 fnaddr = gen_rtx_PLUS (Pmode, fnaddr, tmp);
38851 fnaddr = gen_rtx_MEM (Pmode, fnaddr);
38852 }
38853 }
38854
38855 /* Our sibling call patterns do not allow memories, because we have no
38856 predicate that can distinguish between frame and non-frame memory.
38857 For our purposes here, we can get away with (ab)using a jump pattern,
38858 because we're going to do no optimization. */
38859 if (MEM_P (fnaddr))
38860 emit_jump_insn (gen_indirect_jump (fnaddr));
38861 else
38862 {
38863 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
38864 fnaddr = legitimize_pic_address (fnaddr,
38865 gen_rtx_REG (Pmode, tmp_regno));
38866
38867 if (!sibcall_insn_operand (fnaddr, word_mode))
38868 {
38869 tmp = gen_rtx_REG (word_mode, tmp_regno);
38870 if (GET_MODE (fnaddr) != word_mode)
38871 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
38872 emit_move_insn (tmp, fnaddr);
38873 fnaddr = tmp;
38874 }
38875
38876 tmp = gen_rtx_MEM (QImode, fnaddr);
38877 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
38878 tmp = emit_call_insn (tmp);
38879 SIBLING_CALL_P (tmp) = 1;
38880 }
38881 emit_barrier ();
38882
38883 /* Emit just enough of rest_of_compilation to get the insns emitted.
38884 Note that use_thunk calls assemble_start_function et al. */
38885 tmp = get_insns ();
38886 shorten_branches (tmp);
38887 final_start_function (tmp, file, 1);
38888 final (tmp, file, 1);
38889 final_end_function ();
38890 }
38891
38892 static void
38893 x86_file_start (void)
38894 {
38895 default_file_start ();
38896 if (TARGET_16BIT)
38897 fputs ("\t.code16gcc\n", asm_out_file);
38898 #if TARGET_MACHO
38899 darwin_file_start ();
38900 #endif
38901 if (X86_FILE_START_VERSION_DIRECTIVE)
38902 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
38903 if (X86_FILE_START_FLTUSED)
38904 fputs ("\t.global\t__fltused\n", asm_out_file);
38905 if (ix86_asm_dialect == ASM_INTEL)
38906 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
38907 }
38908
38909 int
38910 x86_field_alignment (tree field, int computed)
38911 {
38912 enum machine_mode mode;
38913 tree type = TREE_TYPE (field);
38914
38915 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
38916 return computed;
38917 mode = TYPE_MODE (strip_array_types (type));
38918 if (mode == DFmode || mode == DCmode
38919 || GET_MODE_CLASS (mode) == MODE_INT
38920 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
38921 return MIN (32, computed);
38922 return computed;
38923 }
38924
38925 /* Output assembler code to FILE to increment profiler label # LABELNO
38926 for profiling a function entry. */
38927 void
38928 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
38929 {
38930 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
38931 : MCOUNT_NAME);
38932
38933 if (TARGET_64BIT)
38934 {
38935 #ifndef NO_PROFILE_COUNTERS
38936 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
38937 #endif
38938
38939 if (!TARGET_PECOFF && flag_pic)
38940 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
38941 else
38942 fprintf (file, "\tcall\t%s\n", mcount_name);
38943 }
38944 else if (flag_pic)
38945 {
38946 #ifndef NO_PROFILE_COUNTERS
38947 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
38948 LPREFIX, labelno);
38949 #endif
38950 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
38951 }
38952 else
38953 {
38954 #ifndef NO_PROFILE_COUNTERS
38955 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
38956 LPREFIX, labelno);
38957 #endif
38958 fprintf (file, "\tcall\t%s\n", mcount_name);
38959 }
38960 }
38961
38962 /* We don't have exact information about the insn sizes, but we may assume
38963 quite safely that we are informed about all 1 byte insns and memory
38964 address sizes. This is enough to eliminate unnecessary padding in
38965 99% of cases. */
38966
38967 static int
38968 min_insn_size (rtx insn)
38969 {
38970 int l = 0, len;
38971
38972 if (!INSN_P (insn) || !active_insn_p (insn))
38973 return 0;
38974
38975 /* Discard alignments we've emit and jump instructions. */
38976 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
38977 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
38978 return 0;
38979
38980 /* Important case - calls are always 5 bytes.
38981 It is common to have many calls in the row. */
38982 if (CALL_P (insn)
38983 && symbolic_reference_mentioned_p (PATTERN (insn))
38984 && !SIBLING_CALL_P (insn))
38985 return 5;
38986 len = get_attr_length (insn);
38987 if (len <= 1)
38988 return 1;
38989
38990 /* For normal instructions we rely on get_attr_length being exact,
38991 with a few exceptions. */
38992 if (!JUMP_P (insn))
38993 {
38994 enum attr_type type = get_attr_type (insn);
38995
38996 switch (type)
38997 {
38998 case TYPE_MULTI:
38999 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
39000 || asm_noperands (PATTERN (insn)) >= 0)
39001 return 0;
39002 break;
39003 case TYPE_OTHER:
39004 case TYPE_FCMP:
39005 break;
39006 default:
39007 /* Otherwise trust get_attr_length. */
39008 return len;
39009 }
39010
39011 l = get_attr_length_address (insn);
39012 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
39013 l = 4;
39014 }
39015 if (l)
39016 return 1+l;
39017 else
39018 return 2;
39019 }
39020
39021 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
39022
39023 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
39024 window. */
39025
39026 static void
39027 ix86_avoid_jump_mispredicts (void)
39028 {
39029 rtx insn, start = get_insns ();
39030 int nbytes = 0, njumps = 0;
39031 int isjump = 0;
39032
39033 /* Look for all minimal intervals of instructions containing 4 jumps.
39034 The intervals are bounded by START and INSN. NBYTES is the total
39035 size of instructions in the interval including INSN and not including
39036 START. When the NBYTES is smaller than 16 bytes, it is possible
39037 that the end of START and INSN ends up in the same 16byte page.
39038
39039 The smallest offset in the page INSN can start is the case where START
39040 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
39041 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
39042
39043 Don't consider asm goto as jump, while it can contain a jump, it doesn't
39044 have to, control transfer to label(s) can be performed through other
39045 means, and also we estimate minimum length of all asm stmts as 0. */
39046 for (insn = start; insn; insn = NEXT_INSN (insn))
39047 {
39048 int min_size;
39049
39050 if (LABEL_P (insn))
39051 {
39052 int align = label_to_alignment (insn);
39053 int max_skip = label_to_max_skip (insn);
39054
39055 if (max_skip > 15)
39056 max_skip = 15;
39057 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
39058 already in the current 16 byte page, because otherwise
39059 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
39060 bytes to reach 16 byte boundary. */
39061 if (align <= 0
39062 || (align <= 3 && max_skip != (1 << align) - 1))
39063 max_skip = 0;
39064 if (dump_file)
39065 fprintf (dump_file, "Label %i with max_skip %i\n",
39066 INSN_UID (insn), max_skip);
39067 if (max_skip)
39068 {
39069 while (nbytes + max_skip >= 16)
39070 {
39071 start = NEXT_INSN (start);
39072 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
39073 || CALL_P (start))
39074 njumps--, isjump = 1;
39075 else
39076 isjump = 0;
39077 nbytes -= min_insn_size (start);
39078 }
39079 }
39080 continue;
39081 }
39082
39083 min_size = min_insn_size (insn);
39084 nbytes += min_size;
39085 if (dump_file)
39086 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
39087 INSN_UID (insn), min_size);
39088 if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
39089 || CALL_P (insn))
39090 njumps++;
39091 else
39092 continue;
39093
39094 while (njumps > 3)
39095 {
39096 start = NEXT_INSN (start);
39097 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
39098 || CALL_P (start))
39099 njumps--, isjump = 1;
39100 else
39101 isjump = 0;
39102 nbytes -= min_insn_size (start);
39103 }
39104 gcc_assert (njumps >= 0);
39105 if (dump_file)
39106 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
39107 INSN_UID (start), INSN_UID (insn), nbytes);
39108
39109 if (njumps == 3 && isjump && nbytes < 16)
39110 {
39111 int padsize = 15 - nbytes + min_insn_size (insn);
39112
39113 if (dump_file)
39114 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
39115 INSN_UID (insn), padsize);
39116 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
39117 }
39118 }
39119 }
39120 #endif
39121
39122 /* AMD Athlon works faster
39123 when RET is not destination of conditional jump or directly preceded
39124 by other jump instruction. We avoid the penalty by inserting NOP just
39125 before the RET instructions in such cases. */
39126 static void
39127 ix86_pad_returns (void)
39128 {
39129 edge e;
39130 edge_iterator ei;
39131
39132 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
39133 {
39134 basic_block bb = e->src;
39135 rtx ret = BB_END (bb);
39136 rtx prev;
39137 bool replace = false;
39138
39139 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
39140 || optimize_bb_for_size_p (bb))
39141 continue;
39142 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
39143 if (active_insn_p (prev) || LABEL_P (prev))
39144 break;
39145 if (prev && LABEL_P (prev))
39146 {
39147 edge e;
39148 edge_iterator ei;
39149
39150 FOR_EACH_EDGE (e, ei, bb->preds)
39151 if (EDGE_FREQUENCY (e) && e->src->index >= 0
39152 && !(e->flags & EDGE_FALLTHRU))
39153 {
39154 replace = true;
39155 break;
39156 }
39157 }
39158 if (!replace)
39159 {
39160 prev = prev_active_insn (ret);
39161 if (prev
39162 && ((JUMP_P (prev) && any_condjump_p (prev))
39163 || CALL_P (prev)))
39164 replace = true;
39165 /* Empty functions get branch mispredict even when
39166 the jump destination is not visible to us. */
39167 if (!prev && !optimize_function_for_size_p (cfun))
39168 replace = true;
39169 }
39170 if (replace)
39171 {
39172 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
39173 delete_insn (ret);
39174 }
39175 }
39176 }
39177
39178 /* Count the minimum number of instructions in BB. Return 4 if the
39179 number of instructions >= 4. */
39180
39181 static int
39182 ix86_count_insn_bb (basic_block bb)
39183 {
39184 rtx insn;
39185 int insn_count = 0;
39186
39187 /* Count number of instructions in this block. Return 4 if the number
39188 of instructions >= 4. */
39189 FOR_BB_INSNS (bb, insn)
39190 {
39191 /* Only happen in exit blocks. */
39192 if (JUMP_P (insn)
39193 && ANY_RETURN_P (PATTERN (insn)))
39194 break;
39195
39196 if (NONDEBUG_INSN_P (insn)
39197 && GET_CODE (PATTERN (insn)) != USE
39198 && GET_CODE (PATTERN (insn)) != CLOBBER)
39199 {
39200 insn_count++;
39201 if (insn_count >= 4)
39202 return insn_count;
39203 }
39204 }
39205
39206 return insn_count;
39207 }
39208
39209
39210 /* Count the minimum number of instructions in code path in BB.
39211 Return 4 if the number of instructions >= 4. */
39212
39213 static int
39214 ix86_count_insn (basic_block bb)
39215 {
39216 edge e;
39217 edge_iterator ei;
39218 int min_prev_count;
39219
39220 /* Only bother counting instructions along paths with no
39221 more than 2 basic blocks between entry and exit. Given
39222 that BB has an edge to exit, determine if a predecessor
39223 of BB has an edge from entry. If so, compute the number
39224 of instructions in the predecessor block. If there
39225 happen to be multiple such blocks, compute the minimum. */
39226 min_prev_count = 4;
39227 FOR_EACH_EDGE (e, ei, bb->preds)
39228 {
39229 edge prev_e;
39230 edge_iterator prev_ei;
39231
39232 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
39233 {
39234 min_prev_count = 0;
39235 break;
39236 }
39237 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
39238 {
39239 if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
39240 {
39241 int count = ix86_count_insn_bb (e->src);
39242 if (count < min_prev_count)
39243 min_prev_count = count;
39244 break;
39245 }
39246 }
39247 }
39248
39249 if (min_prev_count < 4)
39250 min_prev_count += ix86_count_insn_bb (bb);
39251
39252 return min_prev_count;
39253 }
39254
39255 /* Pad short function to 4 instructions. */
39256
39257 static void
39258 ix86_pad_short_function (void)
39259 {
39260 edge e;
39261 edge_iterator ei;
39262
39263 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
39264 {
39265 rtx ret = BB_END (e->src);
39266 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
39267 {
39268 int insn_count = ix86_count_insn (e->src);
39269
39270 /* Pad short function. */
39271 if (insn_count < 4)
39272 {
39273 rtx insn = ret;
39274
39275 /* Find epilogue. */
39276 while (insn
39277 && (!NOTE_P (insn)
39278 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
39279 insn = PREV_INSN (insn);
39280
39281 if (!insn)
39282 insn = ret;
39283
39284 /* Two NOPs count as one instruction. */
39285 insn_count = 2 * (4 - insn_count);
39286 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
39287 }
39288 }
39289 }
39290 }
39291
39292 /* Fix up a Windows system unwinder issue. If an EH region falls through into
39293 the epilogue, the Windows system unwinder will apply epilogue logic and
39294 produce incorrect offsets. This can be avoided by adding a nop between
39295 the last insn that can throw and the first insn of the epilogue. */
39296
39297 static void
39298 ix86_seh_fixup_eh_fallthru (void)
39299 {
39300 edge e;
39301 edge_iterator ei;
39302
39303 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
39304 {
39305 rtx insn, next;
39306
39307 /* Find the beginning of the epilogue. */
39308 for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
39309 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
39310 break;
39311 if (insn == NULL)
39312 continue;
39313
39314 /* We only care about preceding insns that can throw. */
39315 insn = prev_active_insn (insn);
39316 if (insn == NULL || !can_throw_internal (insn))
39317 continue;
39318
39319 /* Do not separate calls from their debug information. */
39320 for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
39321 if (NOTE_P (next)
39322 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
39323 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION))
39324 insn = next;
39325 else
39326 break;
39327
39328 emit_insn_after (gen_nops (const1_rtx), insn);
39329 }
39330 }
39331
39332 /* Implement machine specific optimizations. We implement padding of returns
39333 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
39334 static void
39335 ix86_reorg (void)
39336 {
39337 /* We are freeing block_for_insn in the toplev to keep compatibility
39338 with old MDEP_REORGS that are not CFG based. Recompute it now. */
39339 compute_bb_for_insn ();
39340
39341 if (TARGET_SEH && current_function_has_exception_handlers ())
39342 ix86_seh_fixup_eh_fallthru ();
39343
39344 if (optimize && optimize_function_for_speed_p (cfun))
39345 {
39346 if (TARGET_PAD_SHORT_FUNCTION)
39347 ix86_pad_short_function ();
39348 else if (TARGET_PAD_RETURNS)
39349 ix86_pad_returns ();
39350 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
39351 if (TARGET_FOUR_JUMP_LIMIT)
39352 ix86_avoid_jump_mispredicts ();
39353 #endif
39354 }
39355 }
39356
39357 /* Return nonzero when QImode register that must be represented via REX prefix
39358 is used. */
39359 bool
39360 x86_extended_QIreg_mentioned_p (rtx insn)
39361 {
39362 int i;
39363 extract_insn_cached (insn);
39364 for (i = 0; i < recog_data.n_operands; i++)
39365 if (GENERAL_REG_P (recog_data.operand[i])
39366 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
39367 return true;
39368 return false;
39369 }
39370
39371 /* Return nonzero when P points to register encoded via REX prefix.
39372 Called via for_each_rtx. */
39373 static int
39374 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
39375 {
39376 unsigned int regno;
39377 if (!REG_P (*p))
39378 return 0;
39379 regno = REGNO (*p);
39380 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
39381 }
39382
39383 /* Return true when INSN mentions register that must be encoded using REX
39384 prefix. */
39385 bool
39386 x86_extended_reg_mentioned_p (rtx insn)
39387 {
39388 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
39389 extended_reg_mentioned_1, NULL);
39390 }
39391
39392 /* If profitable, negate (without causing overflow) integer constant
39393 of mode MODE at location LOC. Return true in this case. */
39394 bool
39395 x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
39396 {
39397 HOST_WIDE_INT val;
39398
39399 if (!CONST_INT_P (*loc))
39400 return false;
39401
39402 switch (mode)
39403 {
39404 case DImode:
39405 /* DImode x86_64 constants must fit in 32 bits. */
39406 gcc_assert (x86_64_immediate_operand (*loc, mode));
39407
39408 mode = SImode;
39409 break;
39410
39411 case SImode:
39412 case HImode:
39413 case QImode:
39414 break;
39415
39416 default:
39417 gcc_unreachable ();
39418 }
39419
39420 /* Avoid overflows. */
39421 if (mode_signbit_p (mode, *loc))
39422 return false;
39423
39424 val = INTVAL (*loc);
39425
39426 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
39427 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
39428 if ((val < 0 && val != -128)
39429 || val == 128)
39430 {
39431 *loc = GEN_INT (-val);
39432 return true;
39433 }
39434
39435 return false;
39436 }
39437
39438 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
39439 optabs would emit if we didn't have TFmode patterns. */
39440
39441 void
39442 x86_emit_floatuns (rtx operands[2])
39443 {
39444 rtx neglab, donelab, i0, i1, f0, in, out;
39445 enum machine_mode mode, inmode;
39446
39447 inmode = GET_MODE (operands[1]);
39448 gcc_assert (inmode == SImode || inmode == DImode);
39449
39450 out = operands[0];
39451 in = force_reg (inmode, operands[1]);
39452 mode = GET_MODE (out);
39453 neglab = gen_label_rtx ();
39454 donelab = gen_label_rtx ();
39455 f0 = gen_reg_rtx (mode);
39456
39457 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
39458
39459 expand_float (out, in, 0);
39460
39461 emit_jump_insn (gen_jump (donelab));
39462 emit_barrier ();
39463
39464 emit_label (neglab);
39465
39466 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
39467 1, OPTAB_DIRECT);
39468 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
39469 1, OPTAB_DIRECT);
39470 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
39471
39472 expand_float (f0, i0, 0);
39473
39474 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
39475
39476 emit_label (donelab);
39477 }
39478 \f
39479 /* AVX512F does support 64-byte integer vector operations,
39480 thus the longest vector we are faced with is V64QImode. */
39481 #define MAX_VECT_LEN 64
39482
39483 struct expand_vec_perm_d
39484 {
39485 rtx target, op0, op1;
39486 unsigned char perm[MAX_VECT_LEN];
39487 enum machine_mode vmode;
39488 unsigned char nelt;
39489 bool one_operand_p;
39490 bool testing_p;
39491 };
39492
39493 static bool canonicalize_perm (struct expand_vec_perm_d *d);
39494 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
39495 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
39496
39497 /* Get a vector mode of the same size as the original but with elements
39498 twice as wide. This is only guaranteed to apply to integral vectors. */
39499
39500 static inline enum machine_mode
39501 get_mode_wider_vector (enum machine_mode o)
39502 {
39503 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
39504 enum machine_mode n = GET_MODE_WIDER_MODE (o);
39505 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
39506 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
39507 return n;
39508 }
39509
39510 /* A subroutine of ix86_expand_vector_init_duplicate. Tries to
39511 fill target with val via vec_duplicate. */
39512
39513 static bool
39514 ix86_vector_duplicate_value (enum machine_mode mode, rtx target, rtx val)
39515 {
39516 bool ok;
39517 rtx insn, dup;
39518
39519 /* First attempt to recognize VAL as-is. */
39520 dup = gen_rtx_VEC_DUPLICATE (mode, val);
39521 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
39522 if (recog_memoized (insn) < 0)
39523 {
39524 rtx seq;
39525 /* If that fails, force VAL into a register. */
39526
39527 start_sequence ();
39528 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
39529 seq = get_insns ();
39530 end_sequence ();
39531 if (seq)
39532 emit_insn_before (seq, insn);
39533
39534 ok = recog_memoized (insn) >= 0;
39535 gcc_assert (ok);
39536 }
39537 return true;
39538 }
39539
39540 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
39541 with all elements equal to VAR. Return true if successful. */
39542
39543 static bool
39544 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
39545 rtx target, rtx val)
39546 {
39547 bool ok;
39548
39549 switch (mode)
39550 {
39551 case V2SImode:
39552 case V2SFmode:
39553 if (!mmx_ok)
39554 return false;
39555 /* FALLTHRU */
39556
39557 case V4DFmode:
39558 case V4DImode:
39559 case V8SFmode:
39560 case V8SImode:
39561 case V2DFmode:
39562 case V2DImode:
39563 case V4SFmode:
39564 case V4SImode:
39565 case V16SImode:
39566 case V8DImode:
39567 case V16SFmode:
39568 case V8DFmode:
39569 return ix86_vector_duplicate_value (mode, target, val);
39570
39571 case V4HImode:
39572 if (!mmx_ok)
39573 return false;
39574 if (TARGET_SSE || TARGET_3DNOW_A)
39575 {
39576 rtx x;
39577
39578 val = gen_lowpart (SImode, val);
39579 x = gen_rtx_TRUNCATE (HImode, val);
39580 x = gen_rtx_VEC_DUPLICATE (mode, x);
39581 emit_insn (gen_rtx_SET (VOIDmode, target, x));
39582 return true;
39583 }
39584 goto widen;
39585
39586 case V8QImode:
39587 if (!mmx_ok)
39588 return false;
39589 goto widen;
39590
39591 case V8HImode:
39592 if (TARGET_SSE2)
39593 {
39594 struct expand_vec_perm_d dperm;
39595 rtx tmp1, tmp2;
39596
39597 permute:
39598 memset (&dperm, 0, sizeof (dperm));
39599 dperm.target = target;
39600 dperm.vmode = mode;
39601 dperm.nelt = GET_MODE_NUNITS (mode);
39602 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
39603 dperm.one_operand_p = true;
39604
39605 /* Extend to SImode using a paradoxical SUBREG. */
39606 tmp1 = gen_reg_rtx (SImode);
39607 emit_move_insn (tmp1, gen_lowpart (SImode, val));
39608
39609 /* Insert the SImode value as low element of a V4SImode vector. */
39610 tmp2 = gen_reg_rtx (V4SImode);
39611 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
39612 emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2));
39613
39614 ok = (expand_vec_perm_1 (&dperm)
39615 || expand_vec_perm_broadcast_1 (&dperm));
39616 gcc_assert (ok);
39617 return ok;
39618 }
39619 goto widen;
39620
39621 case V16QImode:
39622 if (TARGET_SSE2)
39623 goto permute;
39624 goto widen;
39625
39626 widen:
39627 /* Replicate the value once into the next wider mode and recurse. */
39628 {
39629 enum machine_mode smode, wsmode, wvmode;
39630 rtx x;
39631
39632 smode = GET_MODE_INNER (mode);
39633 wvmode = get_mode_wider_vector (mode);
39634 wsmode = GET_MODE_INNER (wvmode);
39635
39636 val = convert_modes (wsmode, smode, val, true);
39637 x = expand_simple_binop (wsmode, ASHIFT, val,
39638 GEN_INT (GET_MODE_BITSIZE (smode)),
39639 NULL_RTX, 1, OPTAB_LIB_WIDEN);
39640 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
39641
39642 x = gen_reg_rtx (wvmode);
39643 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
39644 gcc_assert (ok);
39645 emit_move_insn (target, gen_lowpart (GET_MODE (target), x));
39646 return ok;
39647 }
39648
39649 case V16HImode:
39650 case V32QImode:
39651 {
39652 enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
39653 rtx x = gen_reg_rtx (hvmode);
39654
39655 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
39656 gcc_assert (ok);
39657
39658 x = gen_rtx_VEC_CONCAT (mode, x, x);
39659 emit_insn (gen_rtx_SET (VOIDmode, target, x));
39660 }
39661 return true;
39662
39663 default:
39664 return false;
39665 }
39666 }
39667
39668 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
39669 whose ONE_VAR element is VAR, and other elements are zero. Return true
39670 if successful. */
39671
39672 static bool
39673 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
39674 rtx target, rtx var, int one_var)
39675 {
39676 enum machine_mode vsimode;
39677 rtx new_target;
39678 rtx x, tmp;
39679 bool use_vector_set = false;
39680
39681 switch (mode)
39682 {
39683 case V2DImode:
39684 /* For SSE4.1, we normally use vector set. But if the second
39685 element is zero and inter-unit moves are OK, we use movq
39686 instead. */
39687 use_vector_set = (TARGET_64BIT && TARGET_SSE4_1
39688 && !(TARGET_INTER_UNIT_MOVES_TO_VEC
39689 && one_var == 0));
39690 break;
39691 case V16QImode:
39692 case V4SImode:
39693 case V4SFmode:
39694 use_vector_set = TARGET_SSE4_1;
39695 break;
39696 case V8HImode:
39697 use_vector_set = TARGET_SSE2;
39698 break;
39699 case V4HImode:
39700 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
39701 break;
39702 case V32QImode:
39703 case V16HImode:
39704 case V8SImode:
39705 case V8SFmode:
39706 case V4DFmode:
39707 use_vector_set = TARGET_AVX;
39708 break;
39709 case V4DImode:
39710 /* Use ix86_expand_vector_set in 64bit mode only. */
39711 use_vector_set = TARGET_AVX && TARGET_64BIT;
39712 break;
39713 default:
39714 break;
39715 }
39716
39717 if (use_vector_set)
39718 {
39719 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
39720 var = force_reg (GET_MODE_INNER (mode), var);
39721 ix86_expand_vector_set (mmx_ok, target, var, one_var);
39722 return true;
39723 }
39724
39725 switch (mode)
39726 {
39727 case V2SFmode:
39728 case V2SImode:
39729 if (!mmx_ok)
39730 return false;
39731 /* FALLTHRU */
39732
39733 case V2DFmode:
39734 case V2DImode:
39735 if (one_var != 0)
39736 return false;
39737 var = force_reg (GET_MODE_INNER (mode), var);
39738 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
39739 emit_insn (gen_rtx_SET (VOIDmode, target, x));
39740 return true;
39741
39742 case V4SFmode:
39743 case V4SImode:
39744 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
39745 new_target = gen_reg_rtx (mode);
39746 else
39747 new_target = target;
39748 var = force_reg (GET_MODE_INNER (mode), var);
39749 x = gen_rtx_VEC_DUPLICATE (mode, var);
39750 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
39751 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
39752 if (one_var != 0)
39753 {
39754 /* We need to shuffle the value to the correct position, so
39755 create a new pseudo to store the intermediate result. */
39756
39757 /* With SSE2, we can use the integer shuffle insns. */
39758 if (mode != V4SFmode && TARGET_SSE2)
39759 {
39760 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
39761 const1_rtx,
39762 GEN_INT (one_var == 1 ? 0 : 1),
39763 GEN_INT (one_var == 2 ? 0 : 1),
39764 GEN_INT (one_var == 3 ? 0 : 1)));
39765 if (target != new_target)
39766 emit_move_insn (target, new_target);
39767 return true;
39768 }
39769
39770 /* Otherwise convert the intermediate result to V4SFmode and
39771 use the SSE1 shuffle instructions. */
39772 if (mode != V4SFmode)
39773 {
39774 tmp = gen_reg_rtx (V4SFmode);
39775 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
39776 }
39777 else
39778 tmp = new_target;
39779
39780 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
39781 const1_rtx,
39782 GEN_INT (one_var == 1 ? 0 : 1),
39783 GEN_INT (one_var == 2 ? 0+4 : 1+4),
39784 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
39785
39786 if (mode != V4SFmode)
39787 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
39788 else if (tmp != target)
39789 emit_move_insn (target, tmp);
39790 }
39791 else if (target != new_target)
39792 emit_move_insn (target, new_target);
39793 return true;
39794
39795 case V8HImode:
39796 case V16QImode:
39797 vsimode = V4SImode;
39798 goto widen;
39799 case V4HImode:
39800 case V8QImode:
39801 if (!mmx_ok)
39802 return false;
39803 vsimode = V2SImode;
39804 goto widen;
39805 widen:
39806 if (one_var != 0)
39807 return false;
39808
39809 /* Zero extend the variable element to SImode and recurse. */
39810 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
39811
39812 x = gen_reg_rtx (vsimode);
39813 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
39814 var, one_var))
39815 gcc_unreachable ();
39816
39817 emit_move_insn (target, gen_lowpart (mode, x));
39818 return true;
39819
39820 default:
39821 return false;
39822 }
39823 }
39824
39825 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
39826 consisting of the values in VALS. It is known that all elements
39827 except ONE_VAR are constants. Return true if successful. */
39828
39829 static bool
39830 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
39831 rtx target, rtx vals, int one_var)
39832 {
39833 rtx var = XVECEXP (vals, 0, one_var);
39834 enum machine_mode wmode;
39835 rtx const_vec, x;
39836
39837 const_vec = copy_rtx (vals);
39838 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
39839 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
39840
39841 switch (mode)
39842 {
39843 case V2DFmode:
39844 case V2DImode:
39845 case V2SFmode:
39846 case V2SImode:
39847 /* For the two element vectors, it's just as easy to use
39848 the general case. */
39849 return false;
39850
39851 case V4DImode:
39852 /* Use ix86_expand_vector_set in 64bit mode only. */
39853 if (!TARGET_64BIT)
39854 return false;
39855 case V4DFmode:
39856 case V8SFmode:
39857 case V8SImode:
39858 case V16HImode:
39859 case V32QImode:
39860 case V4SFmode:
39861 case V4SImode:
39862 case V8HImode:
39863 case V4HImode:
39864 break;
39865
39866 case V16QImode:
39867 if (TARGET_SSE4_1)
39868 break;
39869 wmode = V8HImode;
39870 goto widen;
39871 case V8QImode:
39872 wmode = V4HImode;
39873 goto widen;
39874 widen:
39875 /* There's no way to set one QImode entry easily. Combine
39876 the variable value with its adjacent constant value, and
39877 promote to an HImode set. */
39878 x = XVECEXP (vals, 0, one_var ^ 1);
39879 if (one_var & 1)
39880 {
39881 var = convert_modes (HImode, QImode, var, true);
39882 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
39883 NULL_RTX, 1, OPTAB_LIB_WIDEN);
39884 x = GEN_INT (INTVAL (x) & 0xff);
39885 }
39886 else
39887 {
39888 var = convert_modes (HImode, QImode, var, true);
39889 x = gen_int_mode (INTVAL (x) << 8, HImode);
39890 }
39891 if (x != const0_rtx)
39892 var = expand_simple_binop (HImode, IOR, var, x, var,
39893 1, OPTAB_LIB_WIDEN);
39894
39895 x = gen_reg_rtx (wmode);
39896 emit_move_insn (x, gen_lowpart (wmode, const_vec));
39897 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
39898
39899 emit_move_insn (target, gen_lowpart (mode, x));
39900 return true;
39901
39902 default:
39903 return false;
39904 }
39905
39906 emit_move_insn (target, const_vec);
39907 ix86_expand_vector_set (mmx_ok, target, var, one_var);
39908 return true;
39909 }
39910
39911 /* A subroutine of ix86_expand_vector_init_general. Use vector
39912 concatenate to handle the most general case: all values variable,
39913 and none identical. */
39914
39915 static void
39916 ix86_expand_vector_init_concat (enum machine_mode mode,
39917 rtx target, rtx *ops, int n)
39918 {
39919 enum machine_mode cmode, hmode = VOIDmode, gmode = VOIDmode;
39920 rtx first[16], second[8], third[4];
39921 rtvec v;
39922 int i, j;
39923
39924 switch (n)
39925 {
39926 case 2:
39927 switch (mode)
39928 {
39929 case V16SImode:
39930 cmode = V8SImode;
39931 break;
39932 case V16SFmode:
39933 cmode = V8SFmode;
39934 break;
39935 case V8DImode:
39936 cmode = V4DImode;
39937 break;
39938 case V8DFmode:
39939 cmode = V4DFmode;
39940 break;
39941 case V8SImode:
39942 cmode = V4SImode;
39943 break;
39944 case V8SFmode:
39945 cmode = V4SFmode;
39946 break;
39947 case V4DImode:
39948 cmode = V2DImode;
39949 break;
39950 case V4DFmode:
39951 cmode = V2DFmode;
39952 break;
39953 case V4SImode:
39954 cmode = V2SImode;
39955 break;
39956 case V4SFmode:
39957 cmode = V2SFmode;
39958 break;
39959 case V2DImode:
39960 cmode = DImode;
39961 break;
39962 case V2SImode:
39963 cmode = SImode;
39964 break;
39965 case V2DFmode:
39966 cmode = DFmode;
39967 break;
39968 case V2SFmode:
39969 cmode = SFmode;
39970 break;
39971 default:
39972 gcc_unreachable ();
39973 }
39974
39975 if (!register_operand (ops[1], cmode))
39976 ops[1] = force_reg (cmode, ops[1]);
39977 if (!register_operand (ops[0], cmode))
39978 ops[0] = force_reg (cmode, ops[0]);
39979 emit_insn (gen_rtx_SET (VOIDmode, target,
39980 gen_rtx_VEC_CONCAT (mode, ops[0],
39981 ops[1])));
39982 break;
39983
39984 case 4:
39985 switch (mode)
39986 {
39987 case V4DImode:
39988 cmode = V2DImode;
39989 break;
39990 case V4DFmode:
39991 cmode = V2DFmode;
39992 break;
39993 case V4SImode:
39994 cmode = V2SImode;
39995 break;
39996 case V4SFmode:
39997 cmode = V2SFmode;
39998 break;
39999 default:
40000 gcc_unreachable ();
40001 }
40002 goto half;
40003
40004 case 8:
40005 switch (mode)
40006 {
40007 case V8DImode:
40008 cmode = V2DImode;
40009 hmode = V4DImode;
40010 break;
40011 case V8DFmode:
40012 cmode = V2DFmode;
40013 hmode = V4DFmode;
40014 break;
40015 case V8SImode:
40016 cmode = V2SImode;
40017 hmode = V4SImode;
40018 break;
40019 case V8SFmode:
40020 cmode = V2SFmode;
40021 hmode = V4SFmode;
40022 break;
40023 default:
40024 gcc_unreachable ();
40025 }
40026 goto half;
40027
40028 case 16:
40029 switch (mode)
40030 {
40031 case V16SImode:
40032 cmode = V2SImode;
40033 hmode = V4SImode;
40034 gmode = V8SImode;
40035 break;
40036 case V16SFmode:
40037 cmode = V2SFmode;
40038 hmode = V4SFmode;
40039 gmode = V8SFmode;
40040 break;
40041 default:
40042 gcc_unreachable ();
40043 }
40044 goto half;
40045
40046 half:
40047 /* FIXME: We process inputs backward to help RA. PR 36222. */
40048 i = n - 1;
40049 j = (n >> 1) - 1;
40050 for (; i > 0; i -= 2, j--)
40051 {
40052 first[j] = gen_reg_rtx (cmode);
40053 v = gen_rtvec (2, ops[i - 1], ops[i]);
40054 ix86_expand_vector_init (false, first[j],
40055 gen_rtx_PARALLEL (cmode, v));
40056 }
40057
40058 n >>= 1;
40059 if (n > 4)
40060 {
40061 gcc_assert (hmode != VOIDmode);
40062 gcc_assert (gmode != VOIDmode);
40063 for (i = j = 0; i < n; i += 2, j++)
40064 {
40065 second[j] = gen_reg_rtx (hmode);
40066 ix86_expand_vector_init_concat (hmode, second [j],
40067 &first [i], 2);
40068 }
40069 n >>= 1;
40070 for (i = j = 0; i < n; i += 2, j++)
40071 {
40072 third[j] = gen_reg_rtx (gmode);
40073 ix86_expand_vector_init_concat (gmode, third[j],
40074 &second[i], 2);
40075 }
40076 n >>= 1;
40077 ix86_expand_vector_init_concat (mode, target, third, n);
40078 }
40079 else if (n > 2)
40080 {
40081 gcc_assert (hmode != VOIDmode);
40082 for (i = j = 0; i < n; i += 2, j++)
40083 {
40084 second[j] = gen_reg_rtx (hmode);
40085 ix86_expand_vector_init_concat (hmode, second [j],
40086 &first [i], 2);
40087 }
40088 n >>= 1;
40089 ix86_expand_vector_init_concat (mode, target, second, n);
40090 }
40091 else
40092 ix86_expand_vector_init_concat (mode, target, first, n);
40093 break;
40094
40095 default:
40096 gcc_unreachable ();
40097 }
40098 }
40099
40100 /* A subroutine of ix86_expand_vector_init_general. Use vector
40101 interleave to handle the most general case: all values variable,
40102 and none identical. */
40103
40104 static void
40105 ix86_expand_vector_init_interleave (enum machine_mode mode,
40106 rtx target, rtx *ops, int n)
40107 {
40108 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
40109 int i, j;
40110 rtx op0, op1;
40111 rtx (*gen_load_even) (rtx, rtx, rtx);
40112 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
40113 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
40114
40115 switch (mode)
40116 {
40117 case V8HImode:
40118 gen_load_even = gen_vec_setv8hi;
40119 gen_interleave_first_low = gen_vec_interleave_lowv4si;
40120 gen_interleave_second_low = gen_vec_interleave_lowv2di;
40121 inner_mode = HImode;
40122 first_imode = V4SImode;
40123 second_imode = V2DImode;
40124 third_imode = VOIDmode;
40125 break;
40126 case V16QImode:
40127 gen_load_even = gen_vec_setv16qi;
40128 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
40129 gen_interleave_second_low = gen_vec_interleave_lowv4si;
40130 inner_mode = QImode;
40131 first_imode = V8HImode;
40132 second_imode = V4SImode;
40133 third_imode = V2DImode;
40134 break;
40135 default:
40136 gcc_unreachable ();
40137 }
40138
40139 for (i = 0; i < n; i++)
40140 {
40141 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
40142 op0 = gen_reg_rtx (SImode);
40143 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
40144
40145 /* Insert the SImode value as low element of V4SImode vector. */
40146 op1 = gen_reg_rtx (V4SImode);
40147 op0 = gen_rtx_VEC_MERGE (V4SImode,
40148 gen_rtx_VEC_DUPLICATE (V4SImode,
40149 op0),
40150 CONST0_RTX (V4SImode),
40151 const1_rtx);
40152 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
40153
40154 /* Cast the V4SImode vector back to a vector in orignal mode. */
40155 op0 = gen_reg_rtx (mode);
40156 emit_move_insn (op0, gen_lowpart (mode, op1));
40157
40158 /* Load even elements into the second position. */
40159 emit_insn (gen_load_even (op0,
40160 force_reg (inner_mode,
40161 ops [i + i + 1]),
40162 const1_rtx));
40163
40164 /* Cast vector to FIRST_IMODE vector. */
40165 ops[i] = gen_reg_rtx (first_imode);
40166 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
40167 }
40168
40169 /* Interleave low FIRST_IMODE vectors. */
40170 for (i = j = 0; i < n; i += 2, j++)
40171 {
40172 op0 = gen_reg_rtx (first_imode);
40173 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
40174
40175 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
40176 ops[j] = gen_reg_rtx (second_imode);
40177 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
40178 }
40179
40180 /* Interleave low SECOND_IMODE vectors. */
40181 switch (second_imode)
40182 {
40183 case V4SImode:
40184 for (i = j = 0; i < n / 2; i += 2, j++)
40185 {
40186 op0 = gen_reg_rtx (second_imode);
40187 emit_insn (gen_interleave_second_low (op0, ops[i],
40188 ops[i + 1]));
40189
40190 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
40191 vector. */
40192 ops[j] = gen_reg_rtx (third_imode);
40193 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
40194 }
40195 second_imode = V2DImode;
40196 gen_interleave_second_low = gen_vec_interleave_lowv2di;
40197 /* FALLTHRU */
40198
40199 case V2DImode:
40200 op0 = gen_reg_rtx (second_imode);
40201 emit_insn (gen_interleave_second_low (op0, ops[0],
40202 ops[1]));
40203
40204 /* Cast the SECOND_IMODE vector back to a vector on original
40205 mode. */
40206 emit_insn (gen_rtx_SET (VOIDmode, target,
40207 gen_lowpart (mode, op0)));
40208 break;
40209
40210 default:
40211 gcc_unreachable ();
40212 }
40213 }
40214
40215 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
40216 all values variable, and none identical. */
40217
40218 static void
40219 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
40220 rtx target, rtx vals)
40221 {
40222 rtx ops[64], op0, op1;
40223 enum machine_mode half_mode = VOIDmode;
40224 int n, i;
40225
40226 switch (mode)
40227 {
40228 case V2SFmode:
40229 case V2SImode:
40230 if (!mmx_ok && !TARGET_SSE)
40231 break;
40232 /* FALLTHRU */
40233
40234 case V16SImode:
40235 case V16SFmode:
40236 case V8DFmode:
40237 case V8DImode:
40238 case V8SFmode:
40239 case V8SImode:
40240 case V4DFmode:
40241 case V4DImode:
40242 case V4SFmode:
40243 case V4SImode:
40244 case V2DFmode:
40245 case V2DImode:
40246 n = GET_MODE_NUNITS (mode);
40247 for (i = 0; i < n; i++)
40248 ops[i] = XVECEXP (vals, 0, i);
40249 ix86_expand_vector_init_concat (mode, target, ops, n);
40250 return;
40251
40252 case V32QImode:
40253 half_mode = V16QImode;
40254 goto half;
40255
40256 case V16HImode:
40257 half_mode = V8HImode;
40258 goto half;
40259
40260 half:
40261 n = GET_MODE_NUNITS (mode);
40262 for (i = 0; i < n; i++)
40263 ops[i] = XVECEXP (vals, 0, i);
40264 op0 = gen_reg_rtx (half_mode);
40265 op1 = gen_reg_rtx (half_mode);
40266 ix86_expand_vector_init_interleave (half_mode, op0, ops,
40267 n >> 2);
40268 ix86_expand_vector_init_interleave (half_mode, op1,
40269 &ops [n >> 1], n >> 2);
40270 emit_insn (gen_rtx_SET (VOIDmode, target,
40271 gen_rtx_VEC_CONCAT (mode, op0, op1)));
40272 return;
40273
40274 case V16QImode:
40275 if (!TARGET_SSE4_1)
40276 break;
40277 /* FALLTHRU */
40278
40279 case V8HImode:
40280 if (!TARGET_SSE2)
40281 break;
40282
40283 /* Don't use ix86_expand_vector_init_interleave if we can't
40284 move from GPR to SSE register directly. */
40285 if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
40286 break;
40287
40288 n = GET_MODE_NUNITS (mode);
40289 for (i = 0; i < n; i++)
40290 ops[i] = XVECEXP (vals, 0, i);
40291 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
40292 return;
40293
40294 case V4HImode:
40295 case V8QImode:
40296 break;
40297
40298 default:
40299 gcc_unreachable ();
40300 }
40301
40302 {
40303 int i, j, n_elts, n_words, n_elt_per_word;
40304 enum machine_mode inner_mode;
40305 rtx words[4], shift;
40306
40307 inner_mode = GET_MODE_INNER (mode);
40308 n_elts = GET_MODE_NUNITS (mode);
40309 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
40310 n_elt_per_word = n_elts / n_words;
40311 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
40312
40313 for (i = 0; i < n_words; ++i)
40314 {
40315 rtx word = NULL_RTX;
40316
40317 for (j = 0; j < n_elt_per_word; ++j)
40318 {
40319 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
40320 elt = convert_modes (word_mode, inner_mode, elt, true);
40321
40322 if (j == 0)
40323 word = elt;
40324 else
40325 {
40326 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
40327 word, 1, OPTAB_LIB_WIDEN);
40328 word = expand_simple_binop (word_mode, IOR, word, elt,
40329 word, 1, OPTAB_LIB_WIDEN);
40330 }
40331 }
40332
40333 words[i] = word;
40334 }
40335
40336 if (n_words == 1)
40337 emit_move_insn (target, gen_lowpart (mode, words[0]));
40338 else if (n_words == 2)
40339 {
40340 rtx tmp = gen_reg_rtx (mode);
40341 emit_clobber (tmp);
40342 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
40343 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
40344 emit_move_insn (target, tmp);
40345 }
40346 else if (n_words == 4)
40347 {
40348 rtx tmp = gen_reg_rtx (V4SImode);
40349 gcc_assert (word_mode == SImode);
40350 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
40351 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
40352 emit_move_insn (target, gen_lowpart (mode, tmp));
40353 }
40354 else
40355 gcc_unreachable ();
40356 }
40357 }
40358
40359 /* Initialize vector TARGET via VALS. Suppress the use of MMX
40360 instructions unless MMX_OK is true. */
40361
40362 void
40363 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
40364 {
40365 enum machine_mode mode = GET_MODE (target);
40366 enum machine_mode inner_mode = GET_MODE_INNER (mode);
40367 int n_elts = GET_MODE_NUNITS (mode);
40368 int n_var = 0, one_var = -1;
40369 bool all_same = true, all_const_zero = true;
40370 int i;
40371 rtx x;
40372
40373 for (i = 0; i < n_elts; ++i)
40374 {
40375 x = XVECEXP (vals, 0, i);
40376 if (!(CONST_INT_P (x)
40377 || GET_CODE (x) == CONST_DOUBLE
40378 || GET_CODE (x) == CONST_FIXED))
40379 n_var++, one_var = i;
40380 else if (x != CONST0_RTX (inner_mode))
40381 all_const_zero = false;
40382 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
40383 all_same = false;
40384 }
40385
40386 /* Constants are best loaded from the constant pool. */
40387 if (n_var == 0)
40388 {
40389 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
40390 return;
40391 }
40392
40393 /* If all values are identical, broadcast the value. */
40394 if (all_same
40395 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
40396 XVECEXP (vals, 0, 0)))
40397 return;
40398
40399 /* Values where only one field is non-constant are best loaded from
40400 the pool and overwritten via move later. */
40401 if (n_var == 1)
40402 {
40403 if (all_const_zero
40404 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
40405 XVECEXP (vals, 0, one_var),
40406 one_var))
40407 return;
40408
40409 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
40410 return;
40411 }
40412
40413 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
40414 }
40415
40416 void
40417 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
40418 {
40419 enum machine_mode mode = GET_MODE (target);
40420 enum machine_mode inner_mode = GET_MODE_INNER (mode);
40421 enum machine_mode half_mode;
40422 bool use_vec_merge = false;
40423 rtx tmp;
40424 static rtx (*gen_extract[6][2]) (rtx, rtx)
40425 = {
40426 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
40427 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
40428 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
40429 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
40430 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
40431 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
40432 };
40433 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
40434 = {
40435 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
40436 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
40437 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
40438 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
40439 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
40440 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
40441 };
40442 int i, j, n;
40443
40444 switch (mode)
40445 {
40446 case V2SFmode:
40447 case V2SImode:
40448 if (mmx_ok)
40449 {
40450 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
40451 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
40452 if (elt == 0)
40453 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
40454 else
40455 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
40456 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40457 return;
40458 }
40459 break;
40460
40461 case V2DImode:
40462 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
40463 if (use_vec_merge)
40464 break;
40465
40466 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
40467 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
40468 if (elt == 0)
40469 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
40470 else
40471 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
40472 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40473 return;
40474
40475 case V2DFmode:
40476 {
40477 rtx op0, op1;
40478
40479 /* For the two element vectors, we implement a VEC_CONCAT with
40480 the extraction of the other element. */
40481
40482 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
40483 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
40484
40485 if (elt == 0)
40486 op0 = val, op1 = tmp;
40487 else
40488 op0 = tmp, op1 = val;
40489
40490 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
40491 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40492 }
40493 return;
40494
40495 case V4SFmode:
40496 use_vec_merge = TARGET_SSE4_1;
40497 if (use_vec_merge)
40498 break;
40499
40500 switch (elt)
40501 {
40502 case 0:
40503 use_vec_merge = true;
40504 break;
40505
40506 case 1:
40507 /* tmp = target = A B C D */
40508 tmp = copy_to_reg (target);
40509 /* target = A A B B */
40510 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
40511 /* target = X A B B */
40512 ix86_expand_vector_set (false, target, val, 0);
40513 /* target = A X C D */
40514 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
40515 const1_rtx, const0_rtx,
40516 GEN_INT (2+4), GEN_INT (3+4)));
40517 return;
40518
40519 case 2:
40520 /* tmp = target = A B C D */
40521 tmp = copy_to_reg (target);
40522 /* tmp = X B C D */
40523 ix86_expand_vector_set (false, tmp, val, 0);
40524 /* target = A B X D */
40525 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
40526 const0_rtx, const1_rtx,
40527 GEN_INT (0+4), GEN_INT (3+4)));
40528 return;
40529
40530 case 3:
40531 /* tmp = target = A B C D */
40532 tmp = copy_to_reg (target);
40533 /* tmp = X B C D */
40534 ix86_expand_vector_set (false, tmp, val, 0);
40535 /* target = A B X D */
40536 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
40537 const0_rtx, const1_rtx,
40538 GEN_INT (2+4), GEN_INT (0+4)));
40539 return;
40540
40541 default:
40542 gcc_unreachable ();
40543 }
40544 break;
40545
40546 case V4SImode:
40547 use_vec_merge = TARGET_SSE4_1;
40548 if (use_vec_merge)
40549 break;
40550
40551 /* Element 0 handled by vec_merge below. */
40552 if (elt == 0)
40553 {
40554 use_vec_merge = true;
40555 break;
40556 }
40557
40558 if (TARGET_SSE2)
40559 {
40560 /* With SSE2, use integer shuffles to swap element 0 and ELT,
40561 store into element 0, then shuffle them back. */
40562
40563 rtx order[4];
40564
40565 order[0] = GEN_INT (elt);
40566 order[1] = const1_rtx;
40567 order[2] = const2_rtx;
40568 order[3] = GEN_INT (3);
40569 order[elt] = const0_rtx;
40570
40571 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
40572 order[1], order[2], order[3]));
40573
40574 ix86_expand_vector_set (false, target, val, 0);
40575
40576 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
40577 order[1], order[2], order[3]));
40578 }
40579 else
40580 {
40581 /* For SSE1, we have to reuse the V4SF code. */
40582 rtx t = gen_reg_rtx (V4SFmode);
40583 ix86_expand_vector_set (false, t, gen_lowpart (SFmode, val), elt);
40584 emit_move_insn (target, gen_lowpart (mode, t));
40585 }
40586 return;
40587
40588 case V8HImode:
40589 use_vec_merge = TARGET_SSE2;
40590 break;
40591 case V4HImode:
40592 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
40593 break;
40594
40595 case V16QImode:
40596 use_vec_merge = TARGET_SSE4_1;
40597 break;
40598
40599 case V8QImode:
40600 break;
40601
40602 case V32QImode:
40603 half_mode = V16QImode;
40604 j = 0;
40605 n = 16;
40606 goto half;
40607
40608 case V16HImode:
40609 half_mode = V8HImode;
40610 j = 1;
40611 n = 8;
40612 goto half;
40613
40614 case V8SImode:
40615 half_mode = V4SImode;
40616 j = 2;
40617 n = 4;
40618 goto half;
40619
40620 case V4DImode:
40621 half_mode = V2DImode;
40622 j = 3;
40623 n = 2;
40624 goto half;
40625
40626 case V8SFmode:
40627 half_mode = V4SFmode;
40628 j = 4;
40629 n = 4;
40630 goto half;
40631
40632 case V4DFmode:
40633 half_mode = V2DFmode;
40634 j = 5;
40635 n = 2;
40636 goto half;
40637
40638 half:
40639 /* Compute offset. */
40640 i = elt / n;
40641 elt %= n;
40642
40643 gcc_assert (i <= 1);
40644
40645 /* Extract the half. */
40646 tmp = gen_reg_rtx (half_mode);
40647 emit_insn (gen_extract[j][i] (tmp, target));
40648
40649 /* Put val in tmp at elt. */
40650 ix86_expand_vector_set (false, tmp, val, elt);
40651
40652 /* Put it back. */
40653 emit_insn (gen_insert[j][i] (target, target, tmp));
40654 return;
40655
40656 default:
40657 break;
40658 }
40659
40660 if (use_vec_merge)
40661 {
40662 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
40663 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
40664 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40665 }
40666 else
40667 {
40668 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
40669
40670 emit_move_insn (mem, target);
40671
40672 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
40673 emit_move_insn (tmp, val);
40674
40675 emit_move_insn (target, mem);
40676 }
40677 }
40678
40679 void
40680 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
40681 {
40682 enum machine_mode mode = GET_MODE (vec);
40683 enum machine_mode inner_mode = GET_MODE_INNER (mode);
40684 bool use_vec_extr = false;
40685 rtx tmp;
40686
40687 switch (mode)
40688 {
40689 case V2SImode:
40690 case V2SFmode:
40691 if (!mmx_ok)
40692 break;
40693 /* FALLTHRU */
40694
40695 case V2DFmode:
40696 case V2DImode:
40697 use_vec_extr = true;
40698 break;
40699
40700 case V4SFmode:
40701 use_vec_extr = TARGET_SSE4_1;
40702 if (use_vec_extr)
40703 break;
40704
40705 switch (elt)
40706 {
40707 case 0:
40708 tmp = vec;
40709 break;
40710
40711 case 1:
40712 case 3:
40713 tmp = gen_reg_rtx (mode);
40714 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
40715 GEN_INT (elt), GEN_INT (elt),
40716 GEN_INT (elt+4), GEN_INT (elt+4)));
40717 break;
40718
40719 case 2:
40720 tmp = gen_reg_rtx (mode);
40721 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
40722 break;
40723
40724 default:
40725 gcc_unreachable ();
40726 }
40727 vec = tmp;
40728 use_vec_extr = true;
40729 elt = 0;
40730 break;
40731
40732 case V4SImode:
40733 use_vec_extr = TARGET_SSE4_1;
40734 if (use_vec_extr)
40735 break;
40736
40737 if (TARGET_SSE2)
40738 {
40739 switch (elt)
40740 {
40741 case 0:
40742 tmp = vec;
40743 break;
40744
40745 case 1:
40746 case 3:
40747 tmp = gen_reg_rtx (mode);
40748 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
40749 GEN_INT (elt), GEN_INT (elt),
40750 GEN_INT (elt), GEN_INT (elt)));
40751 break;
40752
40753 case 2:
40754 tmp = gen_reg_rtx (mode);
40755 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
40756 break;
40757
40758 default:
40759 gcc_unreachable ();
40760 }
40761 vec = tmp;
40762 use_vec_extr = true;
40763 elt = 0;
40764 }
40765 else
40766 {
40767 /* For SSE1, we have to reuse the V4SF code. */
40768 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
40769 gen_lowpart (V4SFmode, vec), elt);
40770 return;
40771 }
40772 break;
40773
40774 case V8HImode:
40775 use_vec_extr = TARGET_SSE2;
40776 break;
40777 case V4HImode:
40778 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
40779 break;
40780
40781 case V16QImode:
40782 use_vec_extr = TARGET_SSE4_1;
40783 break;
40784
40785 case V8SFmode:
40786 if (TARGET_AVX)
40787 {
40788 tmp = gen_reg_rtx (V4SFmode);
40789 if (elt < 4)
40790 emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
40791 else
40792 emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
40793 ix86_expand_vector_extract (false, target, tmp, elt & 3);
40794 return;
40795 }
40796 break;
40797
40798 case V4DFmode:
40799 if (TARGET_AVX)
40800 {
40801 tmp = gen_reg_rtx (V2DFmode);
40802 if (elt < 2)
40803 emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
40804 else
40805 emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
40806 ix86_expand_vector_extract (false, target, tmp, elt & 1);
40807 return;
40808 }
40809 break;
40810
40811 case V32QImode:
40812 if (TARGET_AVX)
40813 {
40814 tmp = gen_reg_rtx (V16QImode);
40815 if (elt < 16)
40816 emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
40817 else
40818 emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
40819 ix86_expand_vector_extract (false, target, tmp, elt & 15);
40820 return;
40821 }
40822 break;
40823
40824 case V16HImode:
40825 if (TARGET_AVX)
40826 {
40827 tmp = gen_reg_rtx (V8HImode);
40828 if (elt < 8)
40829 emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
40830 else
40831 emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
40832 ix86_expand_vector_extract (false, target, tmp, elt & 7);
40833 return;
40834 }
40835 break;
40836
40837 case V8SImode:
40838 if (TARGET_AVX)
40839 {
40840 tmp = gen_reg_rtx (V4SImode);
40841 if (elt < 4)
40842 emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
40843 else
40844 emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
40845 ix86_expand_vector_extract (false, target, tmp, elt & 3);
40846 return;
40847 }
40848 break;
40849
40850 case V4DImode:
40851 if (TARGET_AVX)
40852 {
40853 tmp = gen_reg_rtx (V2DImode);
40854 if (elt < 2)
40855 emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
40856 else
40857 emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
40858 ix86_expand_vector_extract (false, target, tmp, elt & 1);
40859 return;
40860 }
40861 break;
40862
40863 case V16SFmode:
40864 tmp = gen_reg_rtx (V8SFmode);
40865 if (elt < 8)
40866 emit_insn (gen_vec_extract_lo_v16sf (tmp, vec));
40867 else
40868 emit_insn (gen_vec_extract_hi_v16sf (tmp, vec));
40869 ix86_expand_vector_extract (false, target, tmp, elt & 7);
40870 return;
40871
40872 case V8DFmode:
40873 tmp = gen_reg_rtx (V4DFmode);
40874 if (elt < 4)
40875 emit_insn (gen_vec_extract_lo_v8df (tmp, vec));
40876 else
40877 emit_insn (gen_vec_extract_hi_v8df (tmp, vec));
40878 ix86_expand_vector_extract (false, target, tmp, elt & 3);
40879 return;
40880
40881 case V16SImode:
40882 tmp = gen_reg_rtx (V8SImode);
40883 if (elt < 8)
40884 emit_insn (gen_vec_extract_lo_v16si (tmp, vec));
40885 else
40886 emit_insn (gen_vec_extract_hi_v16si (tmp, vec));
40887 ix86_expand_vector_extract (false, target, tmp, elt & 7);
40888 return;
40889
40890 case V8DImode:
40891 tmp = gen_reg_rtx (V4DImode);
40892 if (elt < 4)
40893 emit_insn (gen_vec_extract_lo_v8di (tmp, vec));
40894 else
40895 emit_insn (gen_vec_extract_hi_v8di (tmp, vec));
40896 ix86_expand_vector_extract (false, target, tmp, elt & 3);
40897 return;
40898
40899 case V8QImode:
40900 /* ??? Could extract the appropriate HImode element and shift. */
40901 default:
40902 break;
40903 }
40904
40905 if (use_vec_extr)
40906 {
40907 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
40908 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
40909
40910 /* Let the rtl optimizers know about the zero extension performed. */
40911 if (inner_mode == QImode || inner_mode == HImode)
40912 {
40913 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
40914 target = gen_lowpart (SImode, target);
40915 }
40916
40917 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40918 }
40919 else
40920 {
40921 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
40922
40923 emit_move_insn (mem, vec);
40924
40925 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
40926 emit_move_insn (target, tmp);
40927 }
40928 }
40929
40930 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
40931 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
40932 The upper bits of DEST are undefined, though they shouldn't cause
40933 exceptions (some bits from src or all zeros are ok). */
40934
40935 static void
40936 emit_reduc_half (rtx dest, rtx src, int i)
40937 {
40938 rtx tem, d = dest;
40939 switch (GET_MODE (src))
40940 {
40941 case V4SFmode:
40942 if (i == 128)
40943 tem = gen_sse_movhlps (dest, src, src);
40944 else
40945 tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx,
40946 GEN_INT (1 + 4), GEN_INT (1 + 4));
40947 break;
40948 case V2DFmode:
40949 tem = gen_vec_interleave_highv2df (dest, src, src);
40950 break;
40951 case V16QImode:
40952 case V8HImode:
40953 case V4SImode:
40954 case V2DImode:
40955 d = gen_reg_rtx (V1TImode);
40956 tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src),
40957 GEN_INT (i / 2));
40958 break;
40959 case V8SFmode:
40960 if (i == 256)
40961 tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx);
40962 else
40963 tem = gen_avx_shufps256 (dest, src, src,
40964 GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
40965 break;
40966 case V4DFmode:
40967 if (i == 256)
40968 tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx);
40969 else
40970 tem = gen_avx_shufpd256 (dest, src, src, const1_rtx);
40971 break;
40972 case V32QImode:
40973 case V16HImode:
40974 case V8SImode:
40975 case V4DImode:
40976 if (i == 256)
40977 {
40978 if (GET_MODE (dest) != V4DImode)
40979 d = gen_reg_rtx (V4DImode);
40980 tem = gen_avx2_permv2ti (d, gen_lowpart (V4DImode, src),
40981 gen_lowpart (V4DImode, src),
40982 const1_rtx);
40983 }
40984 else
40985 {
40986 d = gen_reg_rtx (V2TImode);
40987 tem = gen_avx2_lshrv2ti3 (d, gen_lowpart (V2TImode, src),
40988 GEN_INT (i / 2));
40989 }
40990 break;
40991 case V16SImode:
40992 case V16SFmode:
40993 case V8DImode:
40994 case V8DFmode:
40995 if (i > 128)
40996 tem = gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode, dest),
40997 gen_lowpart (V16SImode, src),
40998 gen_lowpart (V16SImode, src),
40999 GEN_INT (0x4 + (i == 512 ? 4 : 0)),
41000 GEN_INT (0x5 + (i == 512 ? 4 : 0)),
41001 GEN_INT (0x6 + (i == 512 ? 4 : 0)),
41002 GEN_INT (0x7 + (i == 512 ? 4 : 0)),
41003 GEN_INT (0xC), GEN_INT (0xD),
41004 GEN_INT (0xE), GEN_INT (0xF),
41005 GEN_INT (0x10), GEN_INT (0x11),
41006 GEN_INT (0x12), GEN_INT (0x13),
41007 GEN_INT (0x14), GEN_INT (0x15),
41008 GEN_INT (0x16), GEN_INT (0x17));
41009 else
41010 tem = gen_avx512f_pshufd_1 (gen_lowpart (V16SImode, dest),
41011 gen_lowpart (V16SImode, src),
41012 GEN_INT (i == 128 ? 0x2 : 0x1),
41013 GEN_INT (0x3),
41014 GEN_INT (0x3),
41015 GEN_INT (0x3),
41016 GEN_INT (i == 128 ? 0x6 : 0x5),
41017 GEN_INT (0x7),
41018 GEN_INT (0x7),
41019 GEN_INT (0x7),
41020 GEN_INT (i == 128 ? 0xA : 0x9),
41021 GEN_INT (0xB),
41022 GEN_INT (0xB),
41023 GEN_INT (0xB),
41024 GEN_INT (i == 128 ? 0xE : 0xD),
41025 GEN_INT (0xF),
41026 GEN_INT (0xF),
41027 GEN_INT (0xF));
41028 break;
41029 default:
41030 gcc_unreachable ();
41031 }
41032 emit_insn (tem);
41033 if (d != dest)
41034 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
41035 }
41036
41037 /* Expand a vector reduction. FN is the binary pattern to reduce;
41038 DEST is the destination; IN is the input vector. */
41039
41040 void
41041 ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
41042 {
41043 rtx half, dst, vec = in;
41044 enum machine_mode mode = GET_MODE (in);
41045 int i;
41046
41047 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
41048 if (TARGET_SSE4_1
41049 && mode == V8HImode
41050 && fn == gen_uminv8hi3)
41051 {
41052 emit_insn (gen_sse4_1_phminposuw (dest, in));
41053 return;
41054 }
41055
41056 for (i = GET_MODE_BITSIZE (mode);
41057 i > GET_MODE_BITSIZE (GET_MODE_INNER (mode));
41058 i >>= 1)
41059 {
41060 half = gen_reg_rtx (mode);
41061 emit_reduc_half (half, vec, i);
41062 if (i == GET_MODE_BITSIZE (GET_MODE_INNER (mode)) * 2)
41063 dst = dest;
41064 else
41065 dst = gen_reg_rtx (mode);
41066 emit_insn (fn (dst, half, vec));
41067 vec = dst;
41068 }
41069 }
41070 \f
41071 /* Target hook for scalar_mode_supported_p. */
41072 static bool
41073 ix86_scalar_mode_supported_p (enum machine_mode mode)
41074 {
41075 if (DECIMAL_FLOAT_MODE_P (mode))
41076 return default_decimal_float_supported_p ();
41077 else if (mode == TFmode)
41078 return true;
41079 else
41080 return default_scalar_mode_supported_p (mode);
41081 }
41082
41083 /* Implements target hook vector_mode_supported_p. */
41084 static bool
41085 ix86_vector_mode_supported_p (enum machine_mode mode)
41086 {
41087 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
41088 return true;
41089 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
41090 return true;
41091 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
41092 return true;
41093 if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
41094 return true;
41095 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
41096 return true;
41097 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
41098 return true;
41099 return false;
41100 }
41101
41102 /* Target hook for c_mode_for_suffix. */
41103 static enum machine_mode
41104 ix86_c_mode_for_suffix (char suffix)
41105 {
41106 if (suffix == 'q')
41107 return TFmode;
41108 if (suffix == 'w')
41109 return XFmode;
41110
41111 return VOIDmode;
41112 }
41113
41114 /* Worker function for TARGET_MD_ASM_CLOBBERS.
41115
41116 We do this in the new i386 backend to maintain source compatibility
41117 with the old cc0-based compiler. */
41118
41119 static tree
41120 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
41121 tree inputs ATTRIBUTE_UNUSED,
41122 tree clobbers)
41123 {
41124 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
41125 clobbers);
41126 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
41127 clobbers);
41128 return clobbers;
41129 }
41130
41131 /* Implements target vector targetm.asm.encode_section_info. */
41132
41133 static void ATTRIBUTE_UNUSED
41134 ix86_encode_section_info (tree decl, rtx rtl, int first)
41135 {
41136 default_encode_section_info (decl, rtl, first);
41137
41138 if (TREE_CODE (decl) == VAR_DECL
41139 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
41140 && ix86_in_large_data_p (decl))
41141 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
41142 }
41143
41144 /* Worker function for REVERSE_CONDITION. */
41145
41146 enum rtx_code
41147 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
41148 {
41149 return (mode != CCFPmode && mode != CCFPUmode
41150 ? reverse_condition (code)
41151 : reverse_condition_maybe_unordered (code));
41152 }
41153
41154 /* Output code to perform an x87 FP register move, from OPERANDS[1]
41155 to OPERANDS[0]. */
41156
41157 const char *
41158 output_387_reg_move (rtx insn, rtx *operands)
41159 {
41160 if (REG_P (operands[0]))
41161 {
41162 if (REG_P (operands[1])
41163 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
41164 {
41165 if (REGNO (operands[0]) == FIRST_STACK_REG)
41166 return output_387_ffreep (operands, 0);
41167 return "fstp\t%y0";
41168 }
41169 if (STACK_TOP_P (operands[0]))
41170 return "fld%Z1\t%y1";
41171 return "fst\t%y0";
41172 }
41173 else if (MEM_P (operands[0]))
41174 {
41175 gcc_assert (REG_P (operands[1]));
41176 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
41177 return "fstp%Z0\t%y0";
41178 else
41179 {
41180 /* There is no non-popping store to memory for XFmode.
41181 So if we need one, follow the store with a load. */
41182 if (GET_MODE (operands[0]) == XFmode)
41183 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
41184 else
41185 return "fst%Z0\t%y0";
41186 }
41187 }
41188 else
41189 gcc_unreachable();
41190 }
41191
41192 /* Output code to perform a conditional jump to LABEL, if C2 flag in
41193 FP status register is set. */
41194
41195 void
41196 ix86_emit_fp_unordered_jump (rtx label)
41197 {
41198 rtx reg = gen_reg_rtx (HImode);
41199 rtx temp;
41200
41201 emit_insn (gen_x86_fnstsw_1 (reg));
41202
41203 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
41204 {
41205 emit_insn (gen_x86_sahf_1 (reg));
41206
41207 temp = gen_rtx_REG (CCmode, FLAGS_REG);
41208 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
41209 }
41210 else
41211 {
41212 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
41213
41214 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
41215 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
41216 }
41217
41218 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
41219 gen_rtx_LABEL_REF (VOIDmode, label),
41220 pc_rtx);
41221 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
41222
41223 emit_jump_insn (temp);
41224 predict_jump (REG_BR_PROB_BASE * 10 / 100);
41225 }
41226
41227 /* Output code to perform a log1p XFmode calculation. */
41228
41229 void ix86_emit_i387_log1p (rtx op0, rtx op1)
41230 {
41231 rtx label1 = gen_label_rtx ();
41232 rtx label2 = gen_label_rtx ();
41233
41234 rtx tmp = gen_reg_rtx (XFmode);
41235 rtx tmp2 = gen_reg_rtx (XFmode);
41236 rtx test;
41237
41238 emit_insn (gen_absxf2 (tmp, op1));
41239 test = gen_rtx_GE (VOIDmode, tmp,
41240 CONST_DOUBLE_FROM_REAL_VALUE (
41241 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
41242 XFmode));
41243 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
41244
41245 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
41246 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
41247 emit_jump (label2);
41248
41249 emit_label (label1);
41250 emit_move_insn (tmp, CONST1_RTX (XFmode));
41251 emit_insn (gen_addxf3 (tmp, op1, tmp));
41252 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
41253 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
41254
41255 emit_label (label2);
41256 }
41257
41258 /* Emit code for round calculation. */
41259 void ix86_emit_i387_round (rtx op0, rtx op1)
41260 {
41261 enum machine_mode inmode = GET_MODE (op1);
41262 enum machine_mode outmode = GET_MODE (op0);
41263 rtx e1, e2, res, tmp, tmp1, half;
41264 rtx scratch = gen_reg_rtx (HImode);
41265 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
41266 rtx jump_label = gen_label_rtx ();
41267 rtx insn;
41268 rtx (*gen_abs) (rtx, rtx);
41269 rtx (*gen_neg) (rtx, rtx);
41270
41271 switch (inmode)
41272 {
41273 case SFmode:
41274 gen_abs = gen_abssf2;
41275 break;
41276 case DFmode:
41277 gen_abs = gen_absdf2;
41278 break;
41279 case XFmode:
41280 gen_abs = gen_absxf2;
41281 break;
41282 default:
41283 gcc_unreachable ();
41284 }
41285
41286 switch (outmode)
41287 {
41288 case SFmode:
41289 gen_neg = gen_negsf2;
41290 break;
41291 case DFmode:
41292 gen_neg = gen_negdf2;
41293 break;
41294 case XFmode:
41295 gen_neg = gen_negxf2;
41296 break;
41297 case HImode:
41298 gen_neg = gen_neghi2;
41299 break;
41300 case SImode:
41301 gen_neg = gen_negsi2;
41302 break;
41303 case DImode:
41304 gen_neg = gen_negdi2;
41305 break;
41306 default:
41307 gcc_unreachable ();
41308 }
41309
41310 e1 = gen_reg_rtx (inmode);
41311 e2 = gen_reg_rtx (inmode);
41312 res = gen_reg_rtx (outmode);
41313
41314 half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, inmode);
41315
41316 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
41317
41318 /* scratch = fxam(op1) */
41319 emit_insn (gen_rtx_SET (VOIDmode, scratch,
41320 gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
41321 UNSPEC_FXAM)));
41322 /* e1 = fabs(op1) */
41323 emit_insn (gen_abs (e1, op1));
41324
41325 /* e2 = e1 + 0.5 */
41326 half = force_reg (inmode, half);
41327 emit_insn (gen_rtx_SET (VOIDmode, e2,
41328 gen_rtx_PLUS (inmode, e1, half)));
41329
41330 /* res = floor(e2) */
41331 if (inmode != XFmode)
41332 {
41333 tmp1 = gen_reg_rtx (XFmode);
41334
41335 emit_insn (gen_rtx_SET (VOIDmode, tmp1,
41336 gen_rtx_FLOAT_EXTEND (XFmode, e2)));
41337 }
41338 else
41339 tmp1 = e2;
41340
41341 switch (outmode)
41342 {
41343 case SFmode:
41344 case DFmode:
41345 {
41346 rtx tmp0 = gen_reg_rtx (XFmode);
41347
41348 emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
41349
41350 emit_insn (gen_rtx_SET (VOIDmode, res,
41351 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
41352 UNSPEC_TRUNC_NOOP)));
41353 }
41354 break;
41355 case XFmode:
41356 emit_insn (gen_frndintxf2_floor (res, tmp1));
41357 break;
41358 case HImode:
41359 emit_insn (gen_lfloorxfhi2 (res, tmp1));
41360 break;
41361 case SImode:
41362 emit_insn (gen_lfloorxfsi2 (res, tmp1));
41363 break;
41364 case DImode:
41365 emit_insn (gen_lfloorxfdi2 (res, tmp1));
41366 break;
41367 default:
41368 gcc_unreachable ();
41369 }
41370
41371 /* flags = signbit(a) */
41372 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02)));
41373
41374 /* if (flags) then res = -res */
41375 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
41376 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
41377 gen_rtx_LABEL_REF (VOIDmode, jump_label),
41378 pc_rtx);
41379 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
41380 predict_jump (REG_BR_PROB_BASE * 50 / 100);
41381 JUMP_LABEL (insn) = jump_label;
41382
41383 emit_insn (gen_neg (res, res));
41384
41385 emit_label (jump_label);
41386 LABEL_NUSES (jump_label) = 1;
41387
41388 emit_move_insn (op0, res);
41389 }
41390
41391 /* Output code to perform a Newton-Rhapson approximation of a single precision
41392 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
41393
41394 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
41395 {
41396 rtx x0, x1, e0, e1;
41397
41398 x0 = gen_reg_rtx (mode);
41399 e0 = gen_reg_rtx (mode);
41400 e1 = gen_reg_rtx (mode);
41401 x1 = gen_reg_rtx (mode);
41402
41403 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
41404
41405 b = force_reg (mode, b);
41406
41407 /* x0 = rcp(b) estimate */
41408 if (mode == V16SFmode || mode == V8DFmode)
41409 emit_insn (gen_rtx_SET (VOIDmode, x0,
41410 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
41411 UNSPEC_RCP14)));
41412 else
41413 emit_insn (gen_rtx_SET (VOIDmode, x0,
41414 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
41415 UNSPEC_RCP)));
41416
41417 /* e0 = x0 * b */
41418 emit_insn (gen_rtx_SET (VOIDmode, e0,
41419 gen_rtx_MULT (mode, x0, b)));
41420
41421 /* e0 = x0 * e0 */
41422 emit_insn (gen_rtx_SET (VOIDmode, e0,
41423 gen_rtx_MULT (mode, x0, e0)));
41424
41425 /* e1 = x0 + x0 */
41426 emit_insn (gen_rtx_SET (VOIDmode, e1,
41427 gen_rtx_PLUS (mode, x0, x0)));
41428
41429 /* x1 = e1 - e0 */
41430 emit_insn (gen_rtx_SET (VOIDmode, x1,
41431 gen_rtx_MINUS (mode, e1, e0)));
41432
41433 /* res = a * x1 */
41434 emit_insn (gen_rtx_SET (VOIDmode, res,
41435 gen_rtx_MULT (mode, a, x1)));
41436 }
41437
41438 /* Output code to perform a Newton-Rhapson approximation of a
41439 single precision floating point [reciprocal] square root. */
41440
41441 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
41442 bool recip)
41443 {
41444 rtx x0, e0, e1, e2, e3, mthree, mhalf;
41445 REAL_VALUE_TYPE r;
41446 int unspec;
41447
41448 x0 = gen_reg_rtx (mode);
41449 e0 = gen_reg_rtx (mode);
41450 e1 = gen_reg_rtx (mode);
41451 e2 = gen_reg_rtx (mode);
41452 e3 = gen_reg_rtx (mode);
41453
41454 real_from_integer (&r, VOIDmode, -3, -1, 0);
41455 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
41456
41457 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
41458 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
41459 unspec = UNSPEC_RSQRT;
41460
41461 if (VECTOR_MODE_P (mode))
41462 {
41463 mthree = ix86_build_const_vector (mode, true, mthree);
41464 mhalf = ix86_build_const_vector (mode, true, mhalf);
41465 /* There is no 512-bit rsqrt. There is however rsqrt14. */
41466 if (GET_MODE_SIZE (mode) == 64)
41467 unspec = UNSPEC_RSQRT14;
41468 }
41469
41470 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
41471 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
41472
41473 a = force_reg (mode, a);
41474
41475 /* x0 = rsqrt(a) estimate */
41476 emit_insn (gen_rtx_SET (VOIDmode, x0,
41477 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
41478 unspec)));
41479
41480 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
41481 if (!recip)
41482 {
41483 rtx zero, mask;
41484
41485 zero = gen_reg_rtx (mode);
41486 mask = gen_reg_rtx (mode);
41487
41488 zero = force_reg (mode, CONST0_RTX(mode));
41489
41490 /* Handle masked compare. */
41491 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
41492 {
41493 mask = gen_reg_rtx (HImode);
41494 /* Imm value 0x4 corresponds to not-equal comparison. */
41495 emit_insn (gen_avx512f_cmpv16sf3 (mask, zero, a, GEN_INT (0x4)));
41496 emit_insn (gen_avx512f_blendmv16sf (x0, zero, x0, mask));
41497 }
41498 else
41499 {
41500 emit_insn (gen_rtx_SET (VOIDmode, mask,
41501 gen_rtx_NE (mode, zero, a)));
41502
41503 emit_insn (gen_rtx_SET (VOIDmode, x0,
41504 gen_rtx_AND (mode, x0, mask)));
41505 }
41506 }
41507
41508 /* e0 = x0 * a */
41509 emit_insn (gen_rtx_SET (VOIDmode, e0,
41510 gen_rtx_MULT (mode, x0, a)));
41511 /* e1 = e0 * x0 */
41512 emit_insn (gen_rtx_SET (VOIDmode, e1,
41513 gen_rtx_MULT (mode, e0, x0)));
41514
41515 /* e2 = e1 - 3. */
41516 mthree = force_reg (mode, mthree);
41517 emit_insn (gen_rtx_SET (VOIDmode, e2,
41518 gen_rtx_PLUS (mode, e1, mthree)));
41519
41520 mhalf = force_reg (mode, mhalf);
41521 if (recip)
41522 /* e3 = -.5 * x0 */
41523 emit_insn (gen_rtx_SET (VOIDmode, e3,
41524 gen_rtx_MULT (mode, x0, mhalf)));
41525 else
41526 /* e3 = -.5 * e0 */
41527 emit_insn (gen_rtx_SET (VOIDmode, e3,
41528 gen_rtx_MULT (mode, e0, mhalf)));
41529 /* ret = e2 * e3 */
41530 emit_insn (gen_rtx_SET (VOIDmode, res,
41531 gen_rtx_MULT (mode, e2, e3)));
41532 }
41533
41534 #ifdef TARGET_SOLARIS
41535 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
41536
41537 static void
41538 i386_solaris_elf_named_section (const char *name, unsigned int flags,
41539 tree decl)
41540 {
41541 /* With Binutils 2.15, the "@unwind" marker must be specified on
41542 every occurrence of the ".eh_frame" section, not just the first
41543 one. */
41544 if (TARGET_64BIT
41545 && strcmp (name, ".eh_frame") == 0)
41546 {
41547 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
41548 flags & SECTION_WRITE ? "aw" : "a");
41549 return;
41550 }
41551
41552 #ifndef USE_GAS
41553 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
41554 {
41555 solaris_elf_asm_comdat_section (name, flags, decl);
41556 return;
41557 }
41558 #endif
41559
41560 default_elf_asm_named_section (name, flags, decl);
41561 }
41562 #endif /* TARGET_SOLARIS */
41563
41564 /* Return the mangling of TYPE if it is an extended fundamental type. */
41565
41566 static const char *
41567 ix86_mangle_type (const_tree type)
41568 {
41569 type = TYPE_MAIN_VARIANT (type);
41570
41571 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
41572 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
41573 return NULL;
41574
41575 switch (TYPE_MODE (type))
41576 {
41577 case TFmode:
41578 /* __float128 is "g". */
41579 return "g";
41580 case XFmode:
41581 /* "long double" or __float80 is "e". */
41582 return "e";
41583 default:
41584 return NULL;
41585 }
41586 }
41587
41588 /* For 32-bit code we can save PIC register setup by using
41589 __stack_chk_fail_local hidden function instead of calling
41590 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
41591 register, so it is better to call __stack_chk_fail directly. */
41592
41593 static tree ATTRIBUTE_UNUSED
41594 ix86_stack_protect_fail (void)
41595 {
41596 return TARGET_64BIT
41597 ? default_external_stack_protect_fail ()
41598 : default_hidden_stack_protect_fail ();
41599 }
41600
41601 /* Select a format to encode pointers in exception handling data. CODE
41602 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
41603 true if the symbol may be affected by dynamic relocations.
41604
41605 ??? All x86 object file formats are capable of representing this.
41606 After all, the relocation needed is the same as for the call insn.
41607 Whether or not a particular assembler allows us to enter such, I
41608 guess we'll have to see. */
41609 int
41610 asm_preferred_eh_data_format (int code, int global)
41611 {
41612 if (flag_pic)
41613 {
41614 int type = DW_EH_PE_sdata8;
41615 if (!TARGET_64BIT
41616 || ix86_cmodel == CM_SMALL_PIC
41617 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
41618 type = DW_EH_PE_sdata4;
41619 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
41620 }
41621 if (ix86_cmodel == CM_SMALL
41622 || (ix86_cmodel == CM_MEDIUM && code))
41623 return DW_EH_PE_udata4;
41624 return DW_EH_PE_absptr;
41625 }
41626 \f
41627 /* Expand copysign from SIGN to the positive value ABS_VALUE
41628 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
41629 the sign-bit. */
41630 static void
41631 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
41632 {
41633 enum machine_mode mode = GET_MODE (sign);
41634 rtx sgn = gen_reg_rtx (mode);
41635 if (mask == NULL_RTX)
41636 {
41637 enum machine_mode vmode;
41638
41639 if (mode == SFmode)
41640 vmode = V4SFmode;
41641 else if (mode == DFmode)
41642 vmode = V2DFmode;
41643 else
41644 vmode = mode;
41645
41646 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
41647 if (!VECTOR_MODE_P (mode))
41648 {
41649 /* We need to generate a scalar mode mask in this case. */
41650 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
41651 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
41652 mask = gen_reg_rtx (mode);
41653 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
41654 }
41655 }
41656 else
41657 mask = gen_rtx_NOT (mode, mask);
41658 emit_insn (gen_rtx_SET (VOIDmode, sgn,
41659 gen_rtx_AND (mode, mask, sign)));
41660 emit_insn (gen_rtx_SET (VOIDmode, result,
41661 gen_rtx_IOR (mode, abs_value, sgn)));
41662 }
41663
41664 /* Expand fabs (OP0) and return a new rtx that holds the result. The
41665 mask for masking out the sign-bit is stored in *SMASK, if that is
41666 non-null. */
41667 static rtx
41668 ix86_expand_sse_fabs (rtx op0, rtx *smask)
41669 {
41670 enum machine_mode vmode, mode = GET_MODE (op0);
41671 rtx xa, mask;
41672
41673 xa = gen_reg_rtx (mode);
41674 if (mode == SFmode)
41675 vmode = V4SFmode;
41676 else if (mode == DFmode)
41677 vmode = V2DFmode;
41678 else
41679 vmode = mode;
41680 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
41681 if (!VECTOR_MODE_P (mode))
41682 {
41683 /* We need to generate a scalar mode mask in this case. */
41684 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
41685 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
41686 mask = gen_reg_rtx (mode);
41687 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
41688 }
41689 emit_insn (gen_rtx_SET (VOIDmode, xa,
41690 gen_rtx_AND (mode, op0, mask)));
41691
41692 if (smask)
41693 *smask = mask;
41694
41695 return xa;
41696 }
41697
41698 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
41699 swapping the operands if SWAP_OPERANDS is true. The expanded
41700 code is a forward jump to a newly created label in case the
41701 comparison is true. The generated label rtx is returned. */
41702 static rtx
41703 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
41704 bool swap_operands)
41705 {
41706 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
41707 rtx label, tmp;
41708
41709 if (swap_operands)
41710 {
41711 tmp = op0;
41712 op0 = op1;
41713 op1 = tmp;
41714 }
41715
41716 label = gen_label_rtx ();
41717 tmp = gen_rtx_REG (fpcmp_mode, FLAGS_REG);
41718 emit_insn (gen_rtx_SET (VOIDmode, tmp,
41719 gen_rtx_COMPARE (fpcmp_mode, op0, op1)));
41720 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
41721 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
41722 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
41723 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
41724 JUMP_LABEL (tmp) = label;
41725
41726 return label;
41727 }
41728
41729 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
41730 using comparison code CODE. Operands are swapped for the comparison if
41731 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
41732 static rtx
41733 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
41734 bool swap_operands)
41735 {
41736 rtx (*insn)(rtx, rtx, rtx, rtx);
41737 enum machine_mode mode = GET_MODE (op0);
41738 rtx mask = gen_reg_rtx (mode);
41739
41740 if (swap_operands)
41741 {
41742 rtx tmp = op0;
41743 op0 = op1;
41744 op1 = tmp;
41745 }
41746
41747 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
41748
41749 emit_insn (insn (mask, op0, op1,
41750 gen_rtx_fmt_ee (code, mode, op0, op1)));
41751 return mask;
41752 }
41753
41754 /* Generate and return a rtx of mode MODE for 2**n where n is the number
41755 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
41756 static rtx
41757 ix86_gen_TWO52 (enum machine_mode mode)
41758 {
41759 REAL_VALUE_TYPE TWO52r;
41760 rtx TWO52;
41761
41762 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
41763 TWO52 = const_double_from_real_value (TWO52r, mode);
41764 TWO52 = force_reg (mode, TWO52);
41765
41766 return TWO52;
41767 }
41768
41769 /* Expand SSE sequence for computing lround from OP1 storing
41770 into OP0. */
41771 void
41772 ix86_expand_lround (rtx op0, rtx op1)
41773 {
41774 /* C code for the stuff we're doing below:
41775 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
41776 return (long)tmp;
41777 */
41778 enum machine_mode mode = GET_MODE (op1);
41779 const struct real_format *fmt;
41780 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
41781 rtx adj;
41782
41783 /* load nextafter (0.5, 0.0) */
41784 fmt = REAL_MODE_FORMAT (mode);
41785 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
41786 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
41787
41788 /* adj = copysign (0.5, op1) */
41789 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
41790 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
41791
41792 /* adj = op1 + adj */
41793 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
41794
41795 /* op0 = (imode)adj */
41796 expand_fix (op0, adj, 0);
41797 }
41798
41799 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
41800 into OPERAND0. */
41801 void
41802 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
41803 {
41804 /* C code for the stuff we're doing below (for do_floor):
41805 xi = (long)op1;
41806 xi -= (double)xi > op1 ? 1 : 0;
41807 return xi;
41808 */
41809 enum machine_mode fmode = GET_MODE (op1);
41810 enum machine_mode imode = GET_MODE (op0);
41811 rtx ireg, freg, label, tmp;
41812
41813 /* reg = (long)op1 */
41814 ireg = gen_reg_rtx (imode);
41815 expand_fix (ireg, op1, 0);
41816
41817 /* freg = (double)reg */
41818 freg = gen_reg_rtx (fmode);
41819 expand_float (freg, ireg, 0);
41820
41821 /* ireg = (freg > op1) ? ireg - 1 : ireg */
41822 label = ix86_expand_sse_compare_and_jump (UNLE,
41823 freg, op1, !do_floor);
41824 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
41825 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
41826 emit_move_insn (ireg, tmp);
41827
41828 emit_label (label);
41829 LABEL_NUSES (label) = 1;
41830
41831 emit_move_insn (op0, ireg);
41832 }
41833
41834 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
41835 result in OPERAND0. */
41836 void
41837 ix86_expand_rint (rtx operand0, rtx operand1)
41838 {
41839 /* C code for the stuff we're doing below:
41840 xa = fabs (operand1);
41841 if (!isless (xa, 2**52))
41842 return operand1;
41843 xa = xa + 2**52 - 2**52;
41844 return copysign (xa, operand1);
41845 */
41846 enum machine_mode mode = GET_MODE (operand0);
41847 rtx res, xa, label, TWO52, mask;
41848
41849 res = gen_reg_rtx (mode);
41850 emit_move_insn (res, operand1);
41851
41852 /* xa = abs (operand1) */
41853 xa = ix86_expand_sse_fabs (res, &mask);
41854
41855 /* if (!isless (xa, TWO52)) goto label; */
41856 TWO52 = ix86_gen_TWO52 (mode);
41857 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
41858
41859 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
41860 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
41861
41862 ix86_sse_copysign_to_positive (res, xa, res, mask);
41863
41864 emit_label (label);
41865 LABEL_NUSES (label) = 1;
41866
41867 emit_move_insn (operand0, res);
41868 }
41869
41870 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
41871 into OPERAND0. */
41872 void
41873 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
41874 {
41875 /* C code for the stuff we expand below.
41876 double xa = fabs (x), x2;
41877 if (!isless (xa, TWO52))
41878 return x;
41879 xa = xa + TWO52 - TWO52;
41880 x2 = copysign (xa, x);
41881 Compensate. Floor:
41882 if (x2 > x)
41883 x2 -= 1;
41884 Compensate. Ceil:
41885 if (x2 < x)
41886 x2 -= -1;
41887 return x2;
41888 */
41889 enum machine_mode mode = GET_MODE (operand0);
41890 rtx xa, TWO52, tmp, label, one, res, mask;
41891
41892 TWO52 = ix86_gen_TWO52 (mode);
41893
41894 /* Temporary for holding the result, initialized to the input
41895 operand to ease control flow. */
41896 res = gen_reg_rtx (mode);
41897 emit_move_insn (res, operand1);
41898
41899 /* xa = abs (operand1) */
41900 xa = ix86_expand_sse_fabs (res, &mask);
41901
41902 /* if (!isless (xa, TWO52)) goto label; */
41903 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
41904
41905 /* xa = xa + TWO52 - TWO52; */
41906 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
41907 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
41908
41909 /* xa = copysign (xa, operand1) */
41910 ix86_sse_copysign_to_positive (xa, xa, res, mask);
41911
41912 /* generate 1.0 or -1.0 */
41913 one = force_reg (mode,
41914 const_double_from_real_value (do_floor
41915 ? dconst1 : dconstm1, mode));
41916
41917 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
41918 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
41919 emit_insn (gen_rtx_SET (VOIDmode, tmp,
41920 gen_rtx_AND (mode, one, tmp)));
41921 /* We always need to subtract here to preserve signed zero. */
41922 tmp = expand_simple_binop (mode, MINUS,
41923 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
41924 emit_move_insn (res, tmp);
41925
41926 emit_label (label);
41927 LABEL_NUSES (label) = 1;
41928
41929 emit_move_insn (operand0, res);
41930 }
41931
41932 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
41933 into OPERAND0. */
41934 void
41935 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
41936 {
41937 /* C code for the stuff we expand below.
41938 double xa = fabs (x), x2;
41939 if (!isless (xa, TWO52))
41940 return x;
41941 x2 = (double)(long)x;
41942 Compensate. Floor:
41943 if (x2 > x)
41944 x2 -= 1;
41945 Compensate. Ceil:
41946 if (x2 < x)
41947 x2 += 1;
41948 if (HONOR_SIGNED_ZEROS (mode))
41949 return copysign (x2, x);
41950 return x2;
41951 */
41952 enum machine_mode mode = GET_MODE (operand0);
41953 rtx xa, xi, TWO52, tmp, label, one, res, mask;
41954
41955 TWO52 = ix86_gen_TWO52 (mode);
41956
41957 /* Temporary for holding the result, initialized to the input
41958 operand to ease control flow. */
41959 res = gen_reg_rtx (mode);
41960 emit_move_insn (res, operand1);
41961
41962 /* xa = abs (operand1) */
41963 xa = ix86_expand_sse_fabs (res, &mask);
41964
41965 /* if (!isless (xa, TWO52)) goto label; */
41966 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
41967
41968 /* xa = (double)(long)x */
41969 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
41970 expand_fix (xi, res, 0);
41971 expand_float (xa, xi, 0);
41972
41973 /* generate 1.0 */
41974 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
41975
41976 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
41977 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
41978 emit_insn (gen_rtx_SET (VOIDmode, tmp,
41979 gen_rtx_AND (mode, one, tmp)));
41980 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
41981 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
41982 emit_move_insn (res, tmp);
41983
41984 if (HONOR_SIGNED_ZEROS (mode))
41985 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
41986
41987 emit_label (label);
41988 LABEL_NUSES (label) = 1;
41989
41990 emit_move_insn (operand0, res);
41991 }
41992
41993 /* Expand SSE sequence for computing round from OPERAND1 storing
41994 into OPERAND0. Sequence that works without relying on DImode truncation
41995 via cvttsd2siq that is only available on 64bit targets. */
41996 void
41997 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
41998 {
41999 /* C code for the stuff we expand below.
42000 double xa = fabs (x), xa2, x2;
42001 if (!isless (xa, TWO52))
42002 return x;
42003 Using the absolute value and copying back sign makes
42004 -0.0 -> -0.0 correct.
42005 xa2 = xa + TWO52 - TWO52;
42006 Compensate.
42007 dxa = xa2 - xa;
42008 if (dxa <= -0.5)
42009 xa2 += 1;
42010 else if (dxa > 0.5)
42011 xa2 -= 1;
42012 x2 = copysign (xa2, x);
42013 return x2;
42014 */
42015 enum machine_mode mode = GET_MODE (operand0);
42016 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
42017
42018 TWO52 = ix86_gen_TWO52 (mode);
42019
42020 /* Temporary for holding the result, initialized to the input
42021 operand to ease control flow. */
42022 res = gen_reg_rtx (mode);
42023 emit_move_insn (res, operand1);
42024
42025 /* xa = abs (operand1) */
42026 xa = ix86_expand_sse_fabs (res, &mask);
42027
42028 /* if (!isless (xa, TWO52)) goto label; */
42029 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42030
42031 /* xa2 = xa + TWO52 - TWO52; */
42032 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
42033 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
42034
42035 /* dxa = xa2 - xa; */
42036 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
42037
42038 /* generate 0.5, 1.0 and -0.5 */
42039 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
42040 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
42041 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
42042 0, OPTAB_DIRECT);
42043
42044 /* Compensate. */
42045 tmp = gen_reg_rtx (mode);
42046 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
42047 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
42048 emit_insn (gen_rtx_SET (VOIDmode, tmp,
42049 gen_rtx_AND (mode, one, tmp)));
42050 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
42051 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
42052 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
42053 emit_insn (gen_rtx_SET (VOIDmode, tmp,
42054 gen_rtx_AND (mode, one, tmp)));
42055 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
42056
42057 /* res = copysign (xa2, operand1) */
42058 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
42059
42060 emit_label (label);
42061 LABEL_NUSES (label) = 1;
42062
42063 emit_move_insn (operand0, res);
42064 }
42065
42066 /* Expand SSE sequence for computing trunc from OPERAND1 storing
42067 into OPERAND0. */
42068 void
42069 ix86_expand_trunc (rtx operand0, rtx operand1)
42070 {
42071 /* C code for SSE variant we expand below.
42072 double xa = fabs (x), x2;
42073 if (!isless (xa, TWO52))
42074 return x;
42075 x2 = (double)(long)x;
42076 if (HONOR_SIGNED_ZEROS (mode))
42077 return copysign (x2, x);
42078 return x2;
42079 */
42080 enum machine_mode mode = GET_MODE (operand0);
42081 rtx xa, xi, TWO52, label, res, mask;
42082
42083 TWO52 = ix86_gen_TWO52 (mode);
42084
42085 /* Temporary for holding the result, initialized to the input
42086 operand to ease control flow. */
42087 res = gen_reg_rtx (mode);
42088 emit_move_insn (res, operand1);
42089
42090 /* xa = abs (operand1) */
42091 xa = ix86_expand_sse_fabs (res, &mask);
42092
42093 /* if (!isless (xa, TWO52)) goto label; */
42094 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42095
42096 /* x = (double)(long)x */
42097 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
42098 expand_fix (xi, res, 0);
42099 expand_float (res, xi, 0);
42100
42101 if (HONOR_SIGNED_ZEROS (mode))
42102 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
42103
42104 emit_label (label);
42105 LABEL_NUSES (label) = 1;
42106
42107 emit_move_insn (operand0, res);
42108 }
42109
42110 /* Expand SSE sequence for computing trunc from OPERAND1 storing
42111 into OPERAND0. */
42112 void
42113 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
42114 {
42115 enum machine_mode mode = GET_MODE (operand0);
42116 rtx xa, mask, TWO52, label, one, res, smask, tmp;
42117
42118 /* C code for SSE variant we expand below.
42119 double xa = fabs (x), x2;
42120 if (!isless (xa, TWO52))
42121 return x;
42122 xa2 = xa + TWO52 - TWO52;
42123 Compensate:
42124 if (xa2 > xa)
42125 xa2 -= 1.0;
42126 x2 = copysign (xa2, x);
42127 return x2;
42128 */
42129
42130 TWO52 = ix86_gen_TWO52 (mode);
42131
42132 /* Temporary for holding the result, initialized to the input
42133 operand to ease control flow. */
42134 res = gen_reg_rtx (mode);
42135 emit_move_insn (res, operand1);
42136
42137 /* xa = abs (operand1) */
42138 xa = ix86_expand_sse_fabs (res, &smask);
42139
42140 /* if (!isless (xa, TWO52)) goto label; */
42141 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42142
42143 /* res = xa + TWO52 - TWO52; */
42144 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
42145 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
42146 emit_move_insn (res, tmp);
42147
42148 /* generate 1.0 */
42149 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
42150
42151 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
42152 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
42153 emit_insn (gen_rtx_SET (VOIDmode, mask,
42154 gen_rtx_AND (mode, mask, one)));
42155 tmp = expand_simple_binop (mode, MINUS,
42156 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
42157 emit_move_insn (res, tmp);
42158
42159 /* res = copysign (res, operand1) */
42160 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
42161
42162 emit_label (label);
42163 LABEL_NUSES (label) = 1;
42164
42165 emit_move_insn (operand0, res);
42166 }
42167
42168 /* Expand SSE sequence for computing round from OPERAND1 storing
42169 into OPERAND0. */
42170 void
42171 ix86_expand_round (rtx operand0, rtx operand1)
42172 {
42173 /* C code for the stuff we're doing below:
42174 double xa = fabs (x);
42175 if (!isless (xa, TWO52))
42176 return x;
42177 xa = (double)(long)(xa + nextafter (0.5, 0.0));
42178 return copysign (xa, x);
42179 */
42180 enum machine_mode mode = GET_MODE (operand0);
42181 rtx res, TWO52, xa, label, xi, half, mask;
42182 const struct real_format *fmt;
42183 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
42184
42185 /* Temporary for holding the result, initialized to the input
42186 operand to ease control flow. */
42187 res = gen_reg_rtx (mode);
42188 emit_move_insn (res, operand1);
42189
42190 TWO52 = ix86_gen_TWO52 (mode);
42191 xa = ix86_expand_sse_fabs (res, &mask);
42192 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42193
42194 /* load nextafter (0.5, 0.0) */
42195 fmt = REAL_MODE_FORMAT (mode);
42196 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
42197 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
42198
42199 /* xa = xa + 0.5 */
42200 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
42201 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
42202
42203 /* xa = (double)(int64_t)xa */
42204 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
42205 expand_fix (xi, xa, 0);
42206 expand_float (xa, xi, 0);
42207
42208 /* res = copysign (xa, operand1) */
42209 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
42210
42211 emit_label (label);
42212 LABEL_NUSES (label) = 1;
42213
42214 emit_move_insn (operand0, res);
42215 }
42216
42217 /* Expand SSE sequence for computing round
42218 from OP1 storing into OP0 using sse4 round insn. */
42219 void
42220 ix86_expand_round_sse4 (rtx op0, rtx op1)
42221 {
42222 enum machine_mode mode = GET_MODE (op0);
42223 rtx e1, e2, res, half;
42224 const struct real_format *fmt;
42225 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
42226 rtx (*gen_copysign) (rtx, rtx, rtx);
42227 rtx (*gen_round) (rtx, rtx, rtx);
42228
42229 switch (mode)
42230 {
42231 case SFmode:
42232 gen_copysign = gen_copysignsf3;
42233 gen_round = gen_sse4_1_roundsf2;
42234 break;
42235 case DFmode:
42236 gen_copysign = gen_copysigndf3;
42237 gen_round = gen_sse4_1_rounddf2;
42238 break;
42239 default:
42240 gcc_unreachable ();
42241 }
42242
42243 /* round (a) = trunc (a + copysign (0.5, a)) */
42244
42245 /* load nextafter (0.5, 0.0) */
42246 fmt = REAL_MODE_FORMAT (mode);
42247 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
42248 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
42249 half = const_double_from_real_value (pred_half, mode);
42250
42251 /* e1 = copysign (0.5, op1) */
42252 e1 = gen_reg_rtx (mode);
42253 emit_insn (gen_copysign (e1, half, op1));
42254
42255 /* e2 = op1 + e1 */
42256 e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
42257
42258 /* res = trunc (e2) */
42259 res = gen_reg_rtx (mode);
42260 emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
42261
42262 emit_move_insn (op0, res);
42263 }
42264 \f
42265
42266 /* Table of valid machine attributes. */
42267 static const struct attribute_spec ix86_attribute_table[] =
42268 {
42269 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
42270 affects_type_identity } */
42271 /* Stdcall attribute says callee is responsible for popping arguments
42272 if they are not variable. */
42273 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42274 true },
42275 /* Fastcall attribute says callee is responsible for popping arguments
42276 if they are not variable. */
42277 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42278 true },
42279 /* Thiscall attribute says callee is responsible for popping arguments
42280 if they are not variable. */
42281 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42282 true },
42283 /* Cdecl attribute says the callee is a normal C declaration */
42284 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42285 true },
42286 /* Regparm attribute specifies how many integer arguments are to be
42287 passed in registers. */
42288 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
42289 true },
42290 /* Sseregparm attribute says we are using x86_64 calling conventions
42291 for FP arguments. */
42292 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42293 true },
42294 /* The transactional memory builtins are implicitly regparm or fastcall
42295 depending on the ABI. Override the generic do-nothing attribute that
42296 these builtins were declared with. */
42297 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute,
42298 true },
42299 /* force_align_arg_pointer says this function realigns the stack at entry. */
42300 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
42301 false, true, true, ix86_handle_cconv_attribute, false },
42302 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
42303 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
42304 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
42305 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
42306 false },
42307 #endif
42308 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
42309 false },
42310 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
42311 false },
42312 #ifdef SUBTARGET_ATTRIBUTE_TABLE
42313 SUBTARGET_ATTRIBUTE_TABLE,
42314 #endif
42315 /* ms_abi and sysv_abi calling convention function attributes. */
42316 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
42317 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
42318 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
42319 false },
42320 { "callee_pop_aggregate_return", 1, 1, false, true, true,
42321 ix86_handle_callee_pop_aggregate_return, true },
42322 /* End element. */
42323 { NULL, 0, 0, false, false, false, NULL, false }
42324 };
42325
42326 /* Implement targetm.vectorize.builtin_vectorization_cost. */
42327 static int
42328 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
42329 tree vectype,
42330 int misalign ATTRIBUTE_UNUSED)
42331 {
42332 unsigned elements;
42333
42334 switch (type_of_cost)
42335 {
42336 case scalar_stmt:
42337 return ix86_cost->scalar_stmt_cost;
42338
42339 case scalar_load:
42340 return ix86_cost->scalar_load_cost;
42341
42342 case scalar_store:
42343 return ix86_cost->scalar_store_cost;
42344
42345 case vector_stmt:
42346 return ix86_cost->vec_stmt_cost;
42347
42348 case vector_load:
42349 return ix86_cost->vec_align_load_cost;
42350
42351 case vector_store:
42352 return ix86_cost->vec_store_cost;
42353
42354 case vec_to_scalar:
42355 return ix86_cost->vec_to_scalar_cost;
42356
42357 case scalar_to_vec:
42358 return ix86_cost->scalar_to_vec_cost;
42359
42360 case unaligned_load:
42361 case unaligned_store:
42362 return ix86_cost->vec_unalign_load_cost;
42363
42364 case cond_branch_taken:
42365 return ix86_cost->cond_taken_branch_cost;
42366
42367 case cond_branch_not_taken:
42368 return ix86_cost->cond_not_taken_branch_cost;
42369
42370 case vec_perm:
42371 case vec_promote_demote:
42372 return ix86_cost->vec_stmt_cost;
42373
42374 case vec_construct:
42375 elements = TYPE_VECTOR_SUBPARTS (vectype);
42376 return elements / 2 + 1;
42377
42378 default:
42379 gcc_unreachable ();
42380 }
42381 }
42382
42383 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
42384 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
42385 insn every time. */
42386
42387 static GTY(()) rtx vselect_insn;
42388
42389 /* Initialize vselect_insn. */
42390
42391 static void
42392 init_vselect_insn (void)
42393 {
42394 unsigned i;
42395 rtx x;
42396
42397 x = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (MAX_VECT_LEN));
42398 for (i = 0; i < MAX_VECT_LEN; ++i)
42399 XVECEXP (x, 0, i) = const0_rtx;
42400 x = gen_rtx_VEC_SELECT (V2DFmode, gen_rtx_VEC_CONCAT (V4DFmode, const0_rtx,
42401 const0_rtx), x);
42402 x = gen_rtx_SET (VOIDmode, const0_rtx, x);
42403 start_sequence ();
42404 vselect_insn = emit_insn (x);
42405 end_sequence ();
42406 }
42407
42408 /* Construct (set target (vec_select op0 (parallel perm))) and
42409 return true if that's a valid instruction in the active ISA. */
42410
42411 static bool
42412 expand_vselect (rtx target, rtx op0, const unsigned char *perm,
42413 unsigned nelt, bool testing_p)
42414 {
42415 unsigned int i;
42416 rtx x, save_vconcat;
42417 int icode;
42418
42419 if (vselect_insn == NULL_RTX)
42420 init_vselect_insn ();
42421
42422 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 1);
42423 PUT_NUM_ELEM (XVEC (x, 0), nelt);
42424 for (i = 0; i < nelt; ++i)
42425 XVECEXP (x, 0, i) = GEN_INT (perm[i]);
42426 save_vconcat = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
42427 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = op0;
42428 PUT_MODE (SET_SRC (PATTERN (vselect_insn)), GET_MODE (target));
42429 SET_DEST (PATTERN (vselect_insn)) = target;
42430 icode = recog_memoized (vselect_insn);
42431
42432 if (icode >= 0 && !testing_p)
42433 emit_insn (copy_rtx (PATTERN (vselect_insn)));
42434
42435 SET_DEST (PATTERN (vselect_insn)) = const0_rtx;
42436 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = save_vconcat;
42437 INSN_CODE (vselect_insn) = -1;
42438
42439 return icode >= 0;
42440 }
42441
42442 /* Similar, but generate a vec_concat from op0 and op1 as well. */
42443
42444 static bool
42445 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
42446 const unsigned char *perm, unsigned nelt,
42447 bool testing_p)
42448 {
42449 enum machine_mode v2mode;
42450 rtx x;
42451 bool ok;
42452
42453 if (vselect_insn == NULL_RTX)
42454 init_vselect_insn ();
42455
42456 v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
42457 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
42458 PUT_MODE (x, v2mode);
42459 XEXP (x, 0) = op0;
42460 XEXP (x, 1) = op1;
42461 ok = expand_vselect (target, x, perm, nelt, testing_p);
42462 XEXP (x, 0) = const0_rtx;
42463 XEXP (x, 1) = const0_rtx;
42464 return ok;
42465 }
42466
42467 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
42468 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
42469
42470 static bool
42471 expand_vec_perm_blend (struct expand_vec_perm_d *d)
42472 {
42473 enum machine_mode vmode = d->vmode;
42474 unsigned i, mask, nelt = d->nelt;
42475 rtx target, op0, op1, x;
42476 rtx rperm[32], vperm;
42477
42478 if (d->one_operand_p)
42479 return false;
42480 if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
42481 ;
42482 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
42483 ;
42484 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
42485 ;
42486 else
42487 return false;
42488
42489 /* This is a blend, not a permute. Elements must stay in their
42490 respective lanes. */
42491 for (i = 0; i < nelt; ++i)
42492 {
42493 unsigned e = d->perm[i];
42494 if (!(e == i || e == i + nelt))
42495 return false;
42496 }
42497
42498 if (d->testing_p)
42499 return true;
42500
42501 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
42502 decision should be extracted elsewhere, so that we only try that
42503 sequence once all budget==3 options have been tried. */
42504 target = d->target;
42505 op0 = d->op0;
42506 op1 = d->op1;
42507 mask = 0;
42508
42509 switch (vmode)
42510 {
42511 case V4DFmode:
42512 case V8SFmode:
42513 case V2DFmode:
42514 case V4SFmode:
42515 case V8HImode:
42516 case V8SImode:
42517 for (i = 0; i < nelt; ++i)
42518 mask |= (d->perm[i] >= nelt) << i;
42519 break;
42520
42521 case V2DImode:
42522 for (i = 0; i < 2; ++i)
42523 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
42524 vmode = V8HImode;
42525 goto do_subreg;
42526
42527 case V4SImode:
42528 for (i = 0; i < 4; ++i)
42529 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
42530 vmode = V8HImode;
42531 goto do_subreg;
42532
42533 case V16QImode:
42534 /* See if bytes move in pairs so we can use pblendw with
42535 an immediate argument, rather than pblendvb with a vector
42536 argument. */
42537 for (i = 0; i < 16; i += 2)
42538 if (d->perm[i] + 1 != d->perm[i + 1])
42539 {
42540 use_pblendvb:
42541 for (i = 0; i < nelt; ++i)
42542 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
42543
42544 finish_pblendvb:
42545 vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
42546 vperm = force_reg (vmode, vperm);
42547
42548 if (GET_MODE_SIZE (vmode) == 16)
42549 emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
42550 else
42551 emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm));
42552 if (target != d->target)
42553 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42554 return true;
42555 }
42556
42557 for (i = 0; i < 8; ++i)
42558 mask |= (d->perm[i * 2] >= 16) << i;
42559 vmode = V8HImode;
42560 /* FALLTHRU */
42561
42562 do_subreg:
42563 target = gen_reg_rtx (vmode);
42564 op0 = gen_lowpart (vmode, op0);
42565 op1 = gen_lowpart (vmode, op1);
42566 break;
42567
42568 case V32QImode:
42569 /* See if bytes move in pairs. If not, vpblendvb must be used. */
42570 for (i = 0; i < 32; i += 2)
42571 if (d->perm[i] + 1 != d->perm[i + 1])
42572 goto use_pblendvb;
42573 /* See if bytes move in quadruplets. If yes, vpblendd
42574 with immediate can be used. */
42575 for (i = 0; i < 32; i += 4)
42576 if (d->perm[i] + 2 != d->perm[i + 2])
42577 break;
42578 if (i < 32)
42579 {
42580 /* See if bytes move the same in both lanes. If yes,
42581 vpblendw with immediate can be used. */
42582 for (i = 0; i < 16; i += 2)
42583 if (d->perm[i] + 16 != d->perm[i + 16])
42584 goto use_pblendvb;
42585
42586 /* Use vpblendw. */
42587 for (i = 0; i < 16; ++i)
42588 mask |= (d->perm[i * 2] >= 32) << i;
42589 vmode = V16HImode;
42590 goto do_subreg;
42591 }
42592
42593 /* Use vpblendd. */
42594 for (i = 0; i < 8; ++i)
42595 mask |= (d->perm[i * 4] >= 32) << i;
42596 vmode = V8SImode;
42597 goto do_subreg;
42598
42599 case V16HImode:
42600 /* See if words move in pairs. If yes, vpblendd can be used. */
42601 for (i = 0; i < 16; i += 2)
42602 if (d->perm[i] + 1 != d->perm[i + 1])
42603 break;
42604 if (i < 16)
42605 {
42606 /* See if words move the same in both lanes. If not,
42607 vpblendvb must be used. */
42608 for (i = 0; i < 8; i++)
42609 if (d->perm[i] + 8 != d->perm[i + 8])
42610 {
42611 /* Use vpblendvb. */
42612 for (i = 0; i < 32; ++i)
42613 rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx);
42614
42615 vmode = V32QImode;
42616 nelt = 32;
42617 target = gen_reg_rtx (vmode);
42618 op0 = gen_lowpart (vmode, op0);
42619 op1 = gen_lowpart (vmode, op1);
42620 goto finish_pblendvb;
42621 }
42622
42623 /* Use vpblendw. */
42624 for (i = 0; i < 16; ++i)
42625 mask |= (d->perm[i] >= 16) << i;
42626 break;
42627 }
42628
42629 /* Use vpblendd. */
42630 for (i = 0; i < 8; ++i)
42631 mask |= (d->perm[i * 2] >= 16) << i;
42632 vmode = V8SImode;
42633 goto do_subreg;
42634
42635 case V4DImode:
42636 /* Use vpblendd. */
42637 for (i = 0; i < 4; ++i)
42638 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
42639 vmode = V8SImode;
42640 goto do_subreg;
42641
42642 default:
42643 gcc_unreachable ();
42644 }
42645
42646 /* This matches five different patterns with the different modes. */
42647 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
42648 x = gen_rtx_SET (VOIDmode, target, x);
42649 emit_insn (x);
42650 if (target != d->target)
42651 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42652
42653 return true;
42654 }
42655
42656 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
42657 in terms of the variable form of vpermilps.
42658
42659 Note that we will have already failed the immediate input vpermilps,
42660 which requires that the high and low part shuffle be identical; the
42661 variable form doesn't require that. */
42662
42663 static bool
42664 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
42665 {
42666 rtx rperm[8], vperm;
42667 unsigned i;
42668
42669 if (!TARGET_AVX || d->vmode != V8SFmode || !d->one_operand_p)
42670 return false;
42671
42672 /* We can only permute within the 128-bit lane. */
42673 for (i = 0; i < 8; ++i)
42674 {
42675 unsigned e = d->perm[i];
42676 if (i < 4 ? e >= 4 : e < 4)
42677 return false;
42678 }
42679
42680 if (d->testing_p)
42681 return true;
42682
42683 for (i = 0; i < 8; ++i)
42684 {
42685 unsigned e = d->perm[i];
42686
42687 /* Within each 128-bit lane, the elements of op0 are numbered
42688 from 0 and the elements of op1 are numbered from 4. */
42689 if (e >= 8 + 4)
42690 e -= 8;
42691 else if (e >= 4)
42692 e -= 4;
42693
42694 rperm[i] = GEN_INT (e);
42695 }
42696
42697 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
42698 vperm = force_reg (V8SImode, vperm);
42699 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
42700
42701 return true;
42702 }
42703
42704 /* Return true if permutation D can be performed as VMODE permutation
42705 instead. */
42706
42707 static bool
42708 valid_perm_using_mode_p (enum machine_mode vmode, struct expand_vec_perm_d *d)
42709 {
42710 unsigned int i, j, chunk;
42711
42712 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT
42713 || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT
42714 || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode))
42715 return false;
42716
42717 if (GET_MODE_NUNITS (vmode) >= d->nelt)
42718 return true;
42719
42720 chunk = d->nelt / GET_MODE_NUNITS (vmode);
42721 for (i = 0; i < d->nelt; i += chunk)
42722 if (d->perm[i] & (chunk - 1))
42723 return false;
42724 else
42725 for (j = 1; j < chunk; ++j)
42726 if (d->perm[i] + j != d->perm[i + j])
42727 return false;
42728
42729 return true;
42730 }
42731
42732 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
42733 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
42734
42735 static bool
42736 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
42737 {
42738 unsigned i, nelt, eltsz, mask;
42739 unsigned char perm[32];
42740 enum machine_mode vmode = V16QImode;
42741 rtx rperm[32], vperm, target, op0, op1;
42742
42743 nelt = d->nelt;
42744
42745 if (!d->one_operand_p)
42746 {
42747 if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
42748 {
42749 if (TARGET_AVX2
42750 && valid_perm_using_mode_p (V2TImode, d))
42751 {
42752 if (d->testing_p)
42753 return true;
42754
42755 /* Use vperm2i128 insn. The pattern uses
42756 V4DImode instead of V2TImode. */
42757 target = d->target;
42758 if (d->vmode != V4DImode)
42759 target = gen_reg_rtx (V4DImode);
42760 op0 = gen_lowpart (V4DImode, d->op0);
42761 op1 = gen_lowpart (V4DImode, d->op1);
42762 rperm[0]
42763 = GEN_INT (((d->perm[0] & (nelt / 2)) ? 1 : 0)
42764 || ((d->perm[nelt / 2] & (nelt / 2)) ? 2 : 0));
42765 emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
42766 if (target != d->target)
42767 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42768 return true;
42769 }
42770 return false;
42771 }
42772 }
42773 else
42774 {
42775 if (GET_MODE_SIZE (d->vmode) == 16)
42776 {
42777 if (!TARGET_SSSE3)
42778 return false;
42779 }
42780 else if (GET_MODE_SIZE (d->vmode) == 32)
42781 {
42782 if (!TARGET_AVX2)
42783 return false;
42784
42785 /* V4DImode should be already handled through
42786 expand_vselect by vpermq instruction. */
42787 gcc_assert (d->vmode != V4DImode);
42788
42789 vmode = V32QImode;
42790 if (d->vmode == V8SImode
42791 || d->vmode == V16HImode
42792 || d->vmode == V32QImode)
42793 {
42794 /* First see if vpermq can be used for
42795 V8SImode/V16HImode/V32QImode. */
42796 if (valid_perm_using_mode_p (V4DImode, d))
42797 {
42798 for (i = 0; i < 4; i++)
42799 perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
42800 if (d->testing_p)
42801 return true;
42802 target = gen_reg_rtx (V4DImode);
42803 if (expand_vselect (target, gen_lowpart (V4DImode, d->op0),
42804 perm, 4, false))
42805 {
42806 emit_move_insn (d->target,
42807 gen_lowpart (d->vmode, target));
42808 return true;
42809 }
42810 return false;
42811 }
42812
42813 /* Next see if vpermd can be used. */
42814 if (valid_perm_using_mode_p (V8SImode, d))
42815 vmode = V8SImode;
42816 }
42817 /* Or if vpermps can be used. */
42818 else if (d->vmode == V8SFmode)
42819 vmode = V8SImode;
42820
42821 if (vmode == V32QImode)
42822 {
42823 /* vpshufb only works intra lanes, it is not
42824 possible to shuffle bytes in between the lanes. */
42825 for (i = 0; i < nelt; ++i)
42826 if ((d->perm[i] ^ i) & (nelt / 2))
42827 return false;
42828 }
42829 }
42830 else
42831 return false;
42832 }
42833
42834 if (d->testing_p)
42835 return true;
42836
42837 if (vmode == V8SImode)
42838 for (i = 0; i < 8; ++i)
42839 rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7);
42840 else
42841 {
42842 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
42843 if (!d->one_operand_p)
42844 mask = 2 * nelt - 1;
42845 else if (vmode == V16QImode)
42846 mask = nelt - 1;
42847 else
42848 mask = nelt / 2 - 1;
42849
42850 for (i = 0; i < nelt; ++i)
42851 {
42852 unsigned j, e = d->perm[i] & mask;
42853 for (j = 0; j < eltsz; ++j)
42854 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
42855 }
42856 }
42857
42858 vperm = gen_rtx_CONST_VECTOR (vmode,
42859 gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
42860 vperm = force_reg (vmode, vperm);
42861
42862 target = d->target;
42863 if (d->vmode != vmode)
42864 target = gen_reg_rtx (vmode);
42865 op0 = gen_lowpart (vmode, d->op0);
42866 if (d->one_operand_p)
42867 {
42868 if (vmode == V16QImode)
42869 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
42870 else if (vmode == V32QImode)
42871 emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
42872 else if (vmode == V8SFmode)
42873 emit_insn (gen_avx2_permvarv8sf (target, op0, vperm));
42874 else
42875 emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
42876 }
42877 else
42878 {
42879 op1 = gen_lowpart (vmode, d->op1);
42880 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
42881 }
42882 if (target != d->target)
42883 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42884
42885 return true;
42886 }
42887
42888 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
42889 in a single instruction. */
42890
42891 static bool
42892 expand_vec_perm_1 (struct expand_vec_perm_d *d)
42893 {
42894 unsigned i, nelt = d->nelt;
42895 unsigned char perm2[MAX_VECT_LEN];
42896
42897 /* Check plain VEC_SELECT first, because AVX has instructions that could
42898 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
42899 input where SEL+CONCAT may not. */
42900 if (d->one_operand_p)
42901 {
42902 int mask = nelt - 1;
42903 bool identity_perm = true;
42904 bool broadcast_perm = true;
42905
42906 for (i = 0; i < nelt; i++)
42907 {
42908 perm2[i] = d->perm[i] & mask;
42909 if (perm2[i] != i)
42910 identity_perm = false;
42911 if (perm2[i])
42912 broadcast_perm = false;
42913 }
42914
42915 if (identity_perm)
42916 {
42917 if (!d->testing_p)
42918 emit_move_insn (d->target, d->op0);
42919 return true;
42920 }
42921 else if (broadcast_perm && TARGET_AVX2)
42922 {
42923 /* Use vpbroadcast{b,w,d}. */
42924 rtx (*gen) (rtx, rtx) = NULL;
42925 switch (d->vmode)
42926 {
42927 case V32QImode:
42928 gen = gen_avx2_pbroadcastv32qi_1;
42929 break;
42930 case V16HImode:
42931 gen = gen_avx2_pbroadcastv16hi_1;
42932 break;
42933 case V8SImode:
42934 gen = gen_avx2_pbroadcastv8si_1;
42935 break;
42936 case V16QImode:
42937 gen = gen_avx2_pbroadcastv16qi;
42938 break;
42939 case V8HImode:
42940 gen = gen_avx2_pbroadcastv8hi;
42941 break;
42942 case V8SFmode:
42943 gen = gen_avx2_vec_dupv8sf_1;
42944 break;
42945 /* For other modes prefer other shuffles this function creates. */
42946 default: break;
42947 }
42948 if (gen != NULL)
42949 {
42950 if (!d->testing_p)
42951 emit_insn (gen (d->target, d->op0));
42952 return true;
42953 }
42954 }
42955
42956 if (expand_vselect (d->target, d->op0, perm2, nelt, d->testing_p))
42957 return true;
42958
42959 /* There are plenty of patterns in sse.md that are written for
42960 SEL+CONCAT and are not replicated for a single op. Perhaps
42961 that should be changed, to avoid the nastiness here. */
42962
42963 /* Recognize interleave style patterns, which means incrementing
42964 every other permutation operand. */
42965 for (i = 0; i < nelt; i += 2)
42966 {
42967 perm2[i] = d->perm[i] & mask;
42968 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
42969 }
42970 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
42971 d->testing_p))
42972 return true;
42973
42974 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
42975 if (nelt >= 4)
42976 {
42977 for (i = 0; i < nelt; i += 4)
42978 {
42979 perm2[i + 0] = d->perm[i + 0] & mask;
42980 perm2[i + 1] = d->perm[i + 1] & mask;
42981 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
42982 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
42983 }
42984
42985 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
42986 d->testing_p))
42987 return true;
42988 }
42989 }
42990
42991 /* Finally, try the fully general two operand permute. */
42992 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt,
42993 d->testing_p))
42994 return true;
42995
42996 /* Recognize interleave style patterns with reversed operands. */
42997 if (!d->one_operand_p)
42998 {
42999 for (i = 0; i < nelt; ++i)
43000 {
43001 unsigned e = d->perm[i];
43002 if (e >= nelt)
43003 e -= nelt;
43004 else
43005 e += nelt;
43006 perm2[i] = e;
43007 }
43008
43009 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt,
43010 d->testing_p))
43011 return true;
43012 }
43013
43014 /* Try the SSE4.1 blend variable merge instructions. */
43015 if (expand_vec_perm_blend (d))
43016 return true;
43017
43018 /* Try one of the AVX vpermil variable permutations. */
43019 if (expand_vec_perm_vpermil (d))
43020 return true;
43021
43022 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
43023 vpshufb, vpermd, vpermps or vpermq variable permutation. */
43024 if (expand_vec_perm_pshufb (d))
43025 return true;
43026
43027 /* Try the AVX512F vpermi2 instructions. */
43028 rtx vec[64];
43029 enum machine_mode mode = d->vmode;
43030 if (mode == V8DFmode)
43031 mode = V8DImode;
43032 else if (mode == V16SFmode)
43033 mode = V16SImode;
43034 for (i = 0; i < nelt; ++i)
43035 vec[i] = GEN_INT (d->perm[i]);
43036 rtx mask = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt, vec));
43037 if (ix86_expand_vec_perm_vpermi2 (d->target, d->op0, mask, d->op1))
43038 return true;
43039
43040 return false;
43041 }
43042
43043 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
43044 in terms of a pair of pshuflw + pshufhw instructions. */
43045
43046 static bool
43047 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
43048 {
43049 unsigned char perm2[MAX_VECT_LEN];
43050 unsigned i;
43051 bool ok;
43052
43053 if (d->vmode != V8HImode || !d->one_operand_p)
43054 return false;
43055
43056 /* The two permutations only operate in 64-bit lanes. */
43057 for (i = 0; i < 4; ++i)
43058 if (d->perm[i] >= 4)
43059 return false;
43060 for (i = 4; i < 8; ++i)
43061 if (d->perm[i] < 4)
43062 return false;
43063
43064 if (d->testing_p)
43065 return true;
43066
43067 /* Emit the pshuflw. */
43068 memcpy (perm2, d->perm, 4);
43069 for (i = 4; i < 8; ++i)
43070 perm2[i] = i;
43071 ok = expand_vselect (d->target, d->op0, perm2, 8, d->testing_p);
43072 gcc_assert (ok);
43073
43074 /* Emit the pshufhw. */
43075 memcpy (perm2 + 4, d->perm + 4, 4);
43076 for (i = 0; i < 4; ++i)
43077 perm2[i] = i;
43078 ok = expand_vselect (d->target, d->target, perm2, 8, d->testing_p);
43079 gcc_assert (ok);
43080
43081 return true;
43082 }
43083
43084 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43085 the permutation using the SSSE3 palignr instruction. This succeeds
43086 when all of the elements in PERM fit within one vector and we merely
43087 need to shift them down so that a single vector permutation has a
43088 chance to succeed. */
43089
43090 static bool
43091 expand_vec_perm_palignr (struct expand_vec_perm_d *d)
43092 {
43093 unsigned i, nelt = d->nelt;
43094 unsigned min, max;
43095 bool in_order, ok;
43096 rtx shift, target;
43097 struct expand_vec_perm_d dcopy;
43098
43099 /* Even with AVX, palignr only operates on 128-bit vectors. */
43100 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
43101 return false;
43102
43103 min = nelt, max = 0;
43104 for (i = 0; i < nelt; ++i)
43105 {
43106 unsigned e = d->perm[i];
43107 if (e < min)
43108 min = e;
43109 if (e > max)
43110 max = e;
43111 }
43112 if (min == 0 || max - min >= nelt)
43113 return false;
43114
43115 /* Given that we have SSSE3, we know we'll be able to implement the
43116 single operand permutation after the palignr with pshufb. */
43117 if (d->testing_p)
43118 return true;
43119
43120 dcopy = *d;
43121 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
43122 target = gen_reg_rtx (TImode);
43123 emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, d->op1),
43124 gen_lowpart (TImode, d->op0), shift));
43125
43126 dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target);
43127 dcopy.one_operand_p = true;
43128
43129 in_order = true;
43130 for (i = 0; i < nelt; ++i)
43131 {
43132 unsigned e = dcopy.perm[i] - min;
43133 if (e != i)
43134 in_order = false;
43135 dcopy.perm[i] = e;
43136 }
43137
43138 /* Test for the degenerate case where the alignment by itself
43139 produces the desired permutation. */
43140 if (in_order)
43141 {
43142 emit_move_insn (d->target, dcopy.op0);
43143 return true;
43144 }
43145
43146 ok = expand_vec_perm_1 (&dcopy);
43147 gcc_assert (ok);
43148
43149 return ok;
43150 }
43151
43152 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
43153
43154 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43155 a two vector permutation into a single vector permutation by using
43156 an interleave operation to merge the vectors. */
43157
43158 static bool
43159 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
43160 {
43161 struct expand_vec_perm_d dremap, dfinal;
43162 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
43163 unsigned HOST_WIDE_INT contents;
43164 unsigned char remap[2 * MAX_VECT_LEN];
43165 rtx seq;
43166 bool ok, same_halves = false;
43167
43168 if (GET_MODE_SIZE (d->vmode) == 16)
43169 {
43170 if (d->one_operand_p)
43171 return false;
43172 }
43173 else if (GET_MODE_SIZE (d->vmode) == 32)
43174 {
43175 if (!TARGET_AVX)
43176 return false;
43177 /* For 32-byte modes allow even d->one_operand_p.
43178 The lack of cross-lane shuffling in some instructions
43179 might prevent a single insn shuffle. */
43180 dfinal = *d;
43181 dfinal.testing_p = true;
43182 /* If expand_vec_perm_interleave3 can expand this into
43183 a 3 insn sequence, give up and let it be expanded as
43184 3 insn sequence. While that is one insn longer,
43185 it doesn't need a memory operand and in the common
43186 case that both interleave low and high permutations
43187 with the same operands are adjacent needs 4 insns
43188 for both after CSE. */
43189 if (expand_vec_perm_interleave3 (&dfinal))
43190 return false;
43191 }
43192 else
43193 return false;
43194
43195 /* Examine from whence the elements come. */
43196 contents = 0;
43197 for (i = 0; i < nelt; ++i)
43198 contents |= ((unsigned HOST_WIDE_INT) 1) << d->perm[i];
43199
43200 memset (remap, 0xff, sizeof (remap));
43201 dremap = *d;
43202
43203 if (GET_MODE_SIZE (d->vmode) == 16)
43204 {
43205 unsigned HOST_WIDE_INT h1, h2, h3, h4;
43206
43207 /* Split the two input vectors into 4 halves. */
43208 h1 = (((unsigned HOST_WIDE_INT) 1) << nelt2) - 1;
43209 h2 = h1 << nelt2;
43210 h3 = h2 << nelt2;
43211 h4 = h3 << nelt2;
43212
43213 /* If the elements from the low halves use interleave low, and similarly
43214 for interleave high. If the elements are from mis-matched halves, we
43215 can use shufps for V4SF/V4SI or do a DImode shuffle. */
43216 if ((contents & (h1 | h3)) == contents)
43217 {
43218 /* punpckl* */
43219 for (i = 0; i < nelt2; ++i)
43220 {
43221 remap[i] = i * 2;
43222 remap[i + nelt] = i * 2 + 1;
43223 dremap.perm[i * 2] = i;
43224 dremap.perm[i * 2 + 1] = i + nelt;
43225 }
43226 if (!TARGET_SSE2 && d->vmode == V4SImode)
43227 dremap.vmode = V4SFmode;
43228 }
43229 else if ((contents & (h2 | h4)) == contents)
43230 {
43231 /* punpckh* */
43232 for (i = 0; i < nelt2; ++i)
43233 {
43234 remap[i + nelt2] = i * 2;
43235 remap[i + nelt + nelt2] = i * 2 + 1;
43236 dremap.perm[i * 2] = i + nelt2;
43237 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
43238 }
43239 if (!TARGET_SSE2 && d->vmode == V4SImode)
43240 dremap.vmode = V4SFmode;
43241 }
43242 else if ((contents & (h1 | h4)) == contents)
43243 {
43244 /* shufps */
43245 for (i = 0; i < nelt2; ++i)
43246 {
43247 remap[i] = i;
43248 remap[i + nelt + nelt2] = i + nelt2;
43249 dremap.perm[i] = i;
43250 dremap.perm[i + nelt2] = i + nelt + nelt2;
43251 }
43252 if (nelt != 4)
43253 {
43254 /* shufpd */
43255 dremap.vmode = V2DImode;
43256 dremap.nelt = 2;
43257 dremap.perm[0] = 0;
43258 dremap.perm[1] = 3;
43259 }
43260 }
43261 else if ((contents & (h2 | h3)) == contents)
43262 {
43263 /* shufps */
43264 for (i = 0; i < nelt2; ++i)
43265 {
43266 remap[i + nelt2] = i;
43267 remap[i + nelt] = i + nelt2;
43268 dremap.perm[i] = i + nelt2;
43269 dremap.perm[i + nelt2] = i + nelt;
43270 }
43271 if (nelt != 4)
43272 {
43273 /* shufpd */
43274 dremap.vmode = V2DImode;
43275 dremap.nelt = 2;
43276 dremap.perm[0] = 1;
43277 dremap.perm[1] = 2;
43278 }
43279 }
43280 else
43281 return false;
43282 }
43283 else
43284 {
43285 unsigned int nelt4 = nelt / 4, nzcnt = 0;
43286 unsigned HOST_WIDE_INT q[8];
43287 unsigned int nonzero_halves[4];
43288
43289 /* Split the two input vectors into 8 quarters. */
43290 q[0] = (((unsigned HOST_WIDE_INT) 1) << nelt4) - 1;
43291 for (i = 1; i < 8; ++i)
43292 q[i] = q[0] << (nelt4 * i);
43293 for (i = 0; i < 4; ++i)
43294 if (((q[2 * i] | q[2 * i + 1]) & contents) != 0)
43295 {
43296 nonzero_halves[nzcnt] = i;
43297 ++nzcnt;
43298 }
43299
43300 if (nzcnt == 1)
43301 {
43302 gcc_assert (d->one_operand_p);
43303 nonzero_halves[1] = nonzero_halves[0];
43304 same_halves = true;
43305 }
43306 else if (d->one_operand_p)
43307 {
43308 gcc_assert (nonzero_halves[0] == 0);
43309 gcc_assert (nonzero_halves[1] == 1);
43310 }
43311
43312 if (nzcnt <= 2)
43313 {
43314 if (d->perm[0] / nelt2 == nonzero_halves[1])
43315 {
43316 /* Attempt to increase the likelihood that dfinal
43317 shuffle will be intra-lane. */
43318 char tmph = nonzero_halves[0];
43319 nonzero_halves[0] = nonzero_halves[1];
43320 nonzero_halves[1] = tmph;
43321 }
43322
43323 /* vperm2f128 or vperm2i128. */
43324 for (i = 0; i < nelt2; ++i)
43325 {
43326 remap[i + nonzero_halves[1] * nelt2] = i + nelt2;
43327 remap[i + nonzero_halves[0] * nelt2] = i;
43328 dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2;
43329 dremap.perm[i] = i + nonzero_halves[0] * nelt2;
43330 }
43331
43332 if (d->vmode != V8SFmode
43333 && d->vmode != V4DFmode
43334 && d->vmode != V8SImode)
43335 {
43336 dremap.vmode = V8SImode;
43337 dremap.nelt = 8;
43338 for (i = 0; i < 4; ++i)
43339 {
43340 dremap.perm[i] = i + nonzero_halves[0] * 4;
43341 dremap.perm[i + 4] = i + nonzero_halves[1] * 4;
43342 }
43343 }
43344 }
43345 else if (d->one_operand_p)
43346 return false;
43347 else if (TARGET_AVX2
43348 && (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
43349 {
43350 /* vpunpckl* */
43351 for (i = 0; i < nelt4; ++i)
43352 {
43353 remap[i] = i * 2;
43354 remap[i + nelt] = i * 2 + 1;
43355 remap[i + nelt2] = i * 2 + nelt2;
43356 remap[i + nelt + nelt2] = i * 2 + nelt2 + 1;
43357 dremap.perm[i * 2] = i;
43358 dremap.perm[i * 2 + 1] = i + nelt;
43359 dremap.perm[i * 2 + nelt2] = i + nelt2;
43360 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2;
43361 }
43362 }
43363 else if (TARGET_AVX2
43364 && (contents & (q[1] | q[3] | q[5] | q[7])) == contents)
43365 {
43366 /* vpunpckh* */
43367 for (i = 0; i < nelt4; ++i)
43368 {
43369 remap[i + nelt4] = i * 2;
43370 remap[i + nelt + nelt4] = i * 2 + 1;
43371 remap[i + nelt2 + nelt4] = i * 2 + nelt2;
43372 remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1;
43373 dremap.perm[i * 2] = i + nelt4;
43374 dremap.perm[i * 2 + 1] = i + nelt + nelt4;
43375 dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4;
43376 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4;
43377 }
43378 }
43379 else
43380 return false;
43381 }
43382
43383 /* Use the remapping array set up above to move the elements from their
43384 swizzled locations into their final destinations. */
43385 dfinal = *d;
43386 for (i = 0; i < nelt; ++i)
43387 {
43388 unsigned e = remap[d->perm[i]];
43389 gcc_assert (e < nelt);
43390 /* If same_halves is true, both halves of the remapped vector are the
43391 same. Avoid cross-lane accesses if possible. */
43392 if (same_halves && i >= nelt2)
43393 {
43394 gcc_assert (e < nelt2);
43395 dfinal.perm[i] = e + nelt2;
43396 }
43397 else
43398 dfinal.perm[i] = e;
43399 }
43400 dremap.target = gen_reg_rtx (dremap.vmode);
43401 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
43402 dfinal.op1 = dfinal.op0;
43403 dfinal.one_operand_p = true;
43404
43405 /* Test if the final remap can be done with a single insn. For V4SFmode or
43406 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
43407 start_sequence ();
43408 ok = expand_vec_perm_1 (&dfinal);
43409 seq = get_insns ();
43410 end_sequence ();
43411
43412 if (!ok)
43413 return false;
43414
43415 if (d->testing_p)
43416 return true;
43417
43418 if (dremap.vmode != dfinal.vmode)
43419 {
43420 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
43421 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
43422 }
43423
43424 ok = expand_vec_perm_1 (&dremap);
43425 gcc_assert (ok);
43426
43427 emit_insn (seq);
43428 return true;
43429 }
43430
43431 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43432 a single vector cross-lane permutation into vpermq followed
43433 by any of the single insn permutations. */
43434
43435 static bool
43436 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
43437 {
43438 struct expand_vec_perm_d dremap, dfinal;
43439 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4;
43440 unsigned contents[2];
43441 bool ok;
43442
43443 if (!(TARGET_AVX2
43444 && (d->vmode == V32QImode || d->vmode == V16HImode)
43445 && d->one_operand_p))
43446 return false;
43447
43448 contents[0] = 0;
43449 contents[1] = 0;
43450 for (i = 0; i < nelt2; ++i)
43451 {
43452 contents[0] |= 1u << (d->perm[i] / nelt4);
43453 contents[1] |= 1u << (d->perm[i + nelt2] / nelt4);
43454 }
43455
43456 for (i = 0; i < 2; ++i)
43457 {
43458 unsigned int cnt = 0;
43459 for (j = 0; j < 4; ++j)
43460 if ((contents[i] & (1u << j)) != 0 && ++cnt > 2)
43461 return false;
43462 }
43463
43464 if (d->testing_p)
43465 return true;
43466
43467 dremap = *d;
43468 dremap.vmode = V4DImode;
43469 dremap.nelt = 4;
43470 dremap.target = gen_reg_rtx (V4DImode);
43471 dremap.op0 = gen_lowpart (V4DImode, d->op0);
43472 dremap.op1 = dremap.op0;
43473 dremap.one_operand_p = true;
43474 for (i = 0; i < 2; ++i)
43475 {
43476 unsigned int cnt = 0;
43477 for (j = 0; j < 4; ++j)
43478 if ((contents[i] & (1u << j)) != 0)
43479 dremap.perm[2 * i + cnt++] = j;
43480 for (; cnt < 2; ++cnt)
43481 dremap.perm[2 * i + cnt] = 0;
43482 }
43483
43484 dfinal = *d;
43485 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
43486 dfinal.op1 = dfinal.op0;
43487 dfinal.one_operand_p = true;
43488 for (i = 0, j = 0; i < nelt; ++i)
43489 {
43490 if (i == nelt2)
43491 j = 2;
43492 dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0);
43493 if ((d->perm[i] / nelt4) == dremap.perm[j])
43494 ;
43495 else if ((d->perm[i] / nelt4) == dremap.perm[j + 1])
43496 dfinal.perm[i] |= nelt4;
43497 else
43498 gcc_unreachable ();
43499 }
43500
43501 ok = expand_vec_perm_1 (&dremap);
43502 gcc_assert (ok);
43503
43504 ok = expand_vec_perm_1 (&dfinal);
43505 gcc_assert (ok);
43506
43507 return true;
43508 }
43509
43510 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
43511 a vector permutation using two instructions, vperm2f128 resp.
43512 vperm2i128 followed by any single in-lane permutation. */
43513
43514 static bool
43515 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d)
43516 {
43517 struct expand_vec_perm_d dfirst, dsecond;
43518 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, perm;
43519 bool ok;
43520
43521 if (!TARGET_AVX
43522 || GET_MODE_SIZE (d->vmode) != 32
43523 || (d->vmode != V8SFmode && d->vmode != V4DFmode && !TARGET_AVX2))
43524 return false;
43525
43526 dsecond = *d;
43527 dsecond.one_operand_p = false;
43528 dsecond.testing_p = true;
43529
43530 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
43531 immediate. For perm < 16 the second permutation uses
43532 d->op0 as first operand, for perm >= 16 it uses d->op1
43533 as first operand. The second operand is the result of
43534 vperm2[fi]128. */
43535 for (perm = 0; perm < 32; perm++)
43536 {
43537 /* Ignore permutations which do not move anything cross-lane. */
43538 if (perm < 16)
43539 {
43540 /* The second shuffle for e.g. V4DFmode has
43541 0123 and ABCD operands.
43542 Ignore AB23, as 23 is already in the second lane
43543 of the first operand. */
43544 if ((perm & 0xc) == (1 << 2)) continue;
43545 /* And 01CD, as 01 is in the first lane of the first
43546 operand. */
43547 if ((perm & 3) == 0) continue;
43548 /* And 4567, as then the vperm2[fi]128 doesn't change
43549 anything on the original 4567 second operand. */
43550 if ((perm & 0xf) == ((3 << 2) | 2)) continue;
43551 }
43552 else
43553 {
43554 /* The second shuffle for e.g. V4DFmode has
43555 4567 and ABCD operands.
43556 Ignore AB67, as 67 is already in the second lane
43557 of the first operand. */
43558 if ((perm & 0xc) == (3 << 2)) continue;
43559 /* And 45CD, as 45 is in the first lane of the first
43560 operand. */
43561 if ((perm & 3) == 2) continue;
43562 /* And 0123, as then the vperm2[fi]128 doesn't change
43563 anything on the original 0123 first operand. */
43564 if ((perm & 0xf) == (1 << 2)) continue;
43565 }
43566
43567 for (i = 0; i < nelt; i++)
43568 {
43569 j = d->perm[i] / nelt2;
43570 if (j == ((perm >> (2 * (i >= nelt2))) & 3))
43571 dsecond.perm[i] = nelt + (i & nelt2) + (d->perm[i] & (nelt2 - 1));
43572 else if (j == (unsigned) (i >= nelt2) + 2 * (perm >= 16))
43573 dsecond.perm[i] = d->perm[i] & (nelt - 1);
43574 else
43575 break;
43576 }
43577
43578 if (i == nelt)
43579 {
43580 start_sequence ();
43581 ok = expand_vec_perm_1 (&dsecond);
43582 end_sequence ();
43583 }
43584 else
43585 ok = false;
43586
43587 if (ok)
43588 {
43589 if (d->testing_p)
43590 return true;
43591
43592 /* Found a usable second shuffle. dfirst will be
43593 vperm2f128 on d->op0 and d->op1. */
43594 dsecond.testing_p = false;
43595 dfirst = *d;
43596 dfirst.target = gen_reg_rtx (d->vmode);
43597 for (i = 0; i < nelt; i++)
43598 dfirst.perm[i] = (i & (nelt2 - 1))
43599 + ((perm >> (2 * (i >= nelt2))) & 3) * nelt2;
43600
43601 ok = expand_vec_perm_1 (&dfirst);
43602 gcc_assert (ok);
43603
43604 /* And dsecond is some single insn shuffle, taking
43605 d->op0 and result of vperm2f128 (if perm < 16) or
43606 d->op1 and result of vperm2f128 (otherwise). */
43607 dsecond.op1 = dfirst.target;
43608 if (perm >= 16)
43609 dsecond.op0 = dfirst.op1;
43610
43611 ok = expand_vec_perm_1 (&dsecond);
43612 gcc_assert (ok);
43613
43614 return true;
43615 }
43616
43617 /* For one operand, the only useful vperm2f128 permutation is 0x10. */
43618 if (d->one_operand_p)
43619 return false;
43620 }
43621
43622 return false;
43623 }
43624
43625 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43626 a two vector permutation using 2 intra-lane interleave insns
43627 and cross-lane shuffle for 32-byte vectors. */
43628
43629 static bool
43630 expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
43631 {
43632 unsigned i, nelt;
43633 rtx (*gen) (rtx, rtx, rtx);
43634
43635 if (d->one_operand_p)
43636 return false;
43637 if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
43638 ;
43639 else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
43640 ;
43641 else
43642 return false;
43643
43644 nelt = d->nelt;
43645 if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
43646 return false;
43647 for (i = 0; i < nelt; i += 2)
43648 if (d->perm[i] != d->perm[0] + i / 2
43649 || d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
43650 return false;
43651
43652 if (d->testing_p)
43653 return true;
43654
43655 switch (d->vmode)
43656 {
43657 case V32QImode:
43658 if (d->perm[0])
43659 gen = gen_vec_interleave_highv32qi;
43660 else
43661 gen = gen_vec_interleave_lowv32qi;
43662 break;
43663 case V16HImode:
43664 if (d->perm[0])
43665 gen = gen_vec_interleave_highv16hi;
43666 else
43667 gen = gen_vec_interleave_lowv16hi;
43668 break;
43669 case V8SImode:
43670 if (d->perm[0])
43671 gen = gen_vec_interleave_highv8si;
43672 else
43673 gen = gen_vec_interleave_lowv8si;
43674 break;
43675 case V4DImode:
43676 if (d->perm[0])
43677 gen = gen_vec_interleave_highv4di;
43678 else
43679 gen = gen_vec_interleave_lowv4di;
43680 break;
43681 case V8SFmode:
43682 if (d->perm[0])
43683 gen = gen_vec_interleave_highv8sf;
43684 else
43685 gen = gen_vec_interleave_lowv8sf;
43686 break;
43687 case V4DFmode:
43688 if (d->perm[0])
43689 gen = gen_vec_interleave_highv4df;
43690 else
43691 gen = gen_vec_interleave_lowv4df;
43692 break;
43693 default:
43694 gcc_unreachable ();
43695 }
43696
43697 emit_insn (gen (d->target, d->op0, d->op1));
43698 return true;
43699 }
43700
43701 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
43702 a single vector permutation using a single intra-lane vector
43703 permutation, vperm2f128 swapping the lanes and vblend* insn blending
43704 the non-swapped and swapped vectors together. */
43705
43706 static bool
43707 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
43708 {
43709 struct expand_vec_perm_d dfirst, dsecond;
43710 unsigned i, j, msk, nelt = d->nelt, nelt2 = nelt / 2;
43711 rtx seq;
43712 bool ok;
43713 rtx (*blend) (rtx, rtx, rtx, rtx) = NULL;
43714
43715 if (!TARGET_AVX
43716 || TARGET_AVX2
43717 || (d->vmode != V8SFmode && d->vmode != V4DFmode)
43718 || !d->one_operand_p)
43719 return false;
43720
43721 dfirst = *d;
43722 for (i = 0; i < nelt; i++)
43723 dfirst.perm[i] = 0xff;
43724 for (i = 0, msk = 0; i < nelt; i++)
43725 {
43726 j = (d->perm[i] & nelt2) ? i | nelt2 : i & ~nelt2;
43727 if (dfirst.perm[j] != 0xff && dfirst.perm[j] != d->perm[i])
43728 return false;
43729 dfirst.perm[j] = d->perm[i];
43730 if (j != i)
43731 msk |= (1 << i);
43732 }
43733 for (i = 0; i < nelt; i++)
43734 if (dfirst.perm[i] == 0xff)
43735 dfirst.perm[i] = i;
43736
43737 if (!d->testing_p)
43738 dfirst.target = gen_reg_rtx (dfirst.vmode);
43739
43740 start_sequence ();
43741 ok = expand_vec_perm_1 (&dfirst);
43742 seq = get_insns ();
43743 end_sequence ();
43744
43745 if (!ok)
43746 return false;
43747
43748 if (d->testing_p)
43749 return true;
43750
43751 emit_insn (seq);
43752
43753 dsecond = *d;
43754 dsecond.op0 = dfirst.target;
43755 dsecond.op1 = dfirst.target;
43756 dsecond.one_operand_p = true;
43757 dsecond.target = gen_reg_rtx (dsecond.vmode);
43758 for (i = 0; i < nelt; i++)
43759 dsecond.perm[i] = i ^ nelt2;
43760
43761 ok = expand_vec_perm_1 (&dsecond);
43762 gcc_assert (ok);
43763
43764 blend = d->vmode == V8SFmode ? gen_avx_blendps256 : gen_avx_blendpd256;
43765 emit_insn (blend (d->target, dfirst.target, dsecond.target, GEN_INT (msk)));
43766 return true;
43767 }
43768
43769 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
43770 permutation using two vperm2f128, followed by a vshufpd insn blending
43771 the two vectors together. */
43772
43773 static bool
43774 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d *d)
43775 {
43776 struct expand_vec_perm_d dfirst, dsecond, dthird;
43777 bool ok;
43778
43779 if (!TARGET_AVX || (d->vmode != V4DFmode))
43780 return false;
43781
43782 if (d->testing_p)
43783 return true;
43784
43785 dfirst = *d;
43786 dsecond = *d;
43787 dthird = *d;
43788
43789 dfirst.perm[0] = (d->perm[0] & ~1);
43790 dfirst.perm[1] = (d->perm[0] & ~1) + 1;
43791 dfirst.perm[2] = (d->perm[2] & ~1);
43792 dfirst.perm[3] = (d->perm[2] & ~1) + 1;
43793 dsecond.perm[0] = (d->perm[1] & ~1);
43794 dsecond.perm[1] = (d->perm[1] & ~1) + 1;
43795 dsecond.perm[2] = (d->perm[3] & ~1);
43796 dsecond.perm[3] = (d->perm[3] & ~1) + 1;
43797 dthird.perm[0] = (d->perm[0] % 2);
43798 dthird.perm[1] = (d->perm[1] % 2) + 4;
43799 dthird.perm[2] = (d->perm[2] % 2) + 2;
43800 dthird.perm[3] = (d->perm[3] % 2) + 6;
43801
43802 dfirst.target = gen_reg_rtx (dfirst.vmode);
43803 dsecond.target = gen_reg_rtx (dsecond.vmode);
43804 dthird.op0 = dfirst.target;
43805 dthird.op1 = dsecond.target;
43806 dthird.one_operand_p = false;
43807
43808 canonicalize_perm (&dfirst);
43809 canonicalize_perm (&dsecond);
43810
43811 ok = expand_vec_perm_1 (&dfirst)
43812 && expand_vec_perm_1 (&dsecond)
43813 && expand_vec_perm_1 (&dthird);
43814
43815 gcc_assert (ok);
43816
43817 return true;
43818 }
43819
43820 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
43821 permutation with two pshufb insns and an ior. We should have already
43822 failed all two instruction sequences. */
43823
43824 static bool
43825 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
43826 {
43827 rtx rperm[2][16], vperm, l, h, op, m128;
43828 unsigned int i, nelt, eltsz;
43829
43830 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
43831 return false;
43832 gcc_assert (!d->one_operand_p);
43833
43834 nelt = d->nelt;
43835 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
43836
43837 /* Generate two permutation masks. If the required element is within
43838 the given vector it is shuffled into the proper lane. If the required
43839 element is in the other vector, force a zero into the lane by setting
43840 bit 7 in the permutation mask. */
43841 m128 = GEN_INT (-128);
43842 for (i = 0; i < nelt; ++i)
43843 {
43844 unsigned j, e = d->perm[i];
43845 unsigned which = (e >= nelt);
43846 if (e >= nelt)
43847 e -= nelt;
43848
43849 for (j = 0; j < eltsz; ++j)
43850 {
43851 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
43852 rperm[1-which][i*eltsz + j] = m128;
43853 }
43854 }
43855
43856 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
43857 vperm = force_reg (V16QImode, vperm);
43858
43859 l = gen_reg_rtx (V16QImode);
43860 op = gen_lowpart (V16QImode, d->op0);
43861 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
43862
43863 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
43864 vperm = force_reg (V16QImode, vperm);
43865
43866 h = gen_reg_rtx (V16QImode);
43867 op = gen_lowpart (V16QImode, d->op1);
43868 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
43869
43870 op = d->target;
43871 if (d->vmode != V16QImode)
43872 op = gen_reg_rtx (V16QImode);
43873 emit_insn (gen_iorv16qi3 (op, l, h));
43874 if (op != d->target)
43875 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
43876
43877 return true;
43878 }
43879
43880 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
43881 with two vpshufb insns, vpermq and vpor. We should have already failed
43882 all two or three instruction sequences. */
43883
43884 static bool
43885 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
43886 {
43887 rtx rperm[2][32], vperm, l, h, hp, op, m128;
43888 unsigned int i, nelt, eltsz;
43889
43890 if (!TARGET_AVX2
43891 || !d->one_operand_p
43892 || (d->vmode != V32QImode && d->vmode != V16HImode))
43893 return false;
43894
43895 if (d->testing_p)
43896 return true;
43897
43898 nelt = d->nelt;
43899 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
43900
43901 /* Generate two permutation masks. If the required element is within
43902 the same lane, it is shuffled in. If the required element from the
43903 other lane, force a zero by setting bit 7 in the permutation mask.
43904 In the other mask the mask has non-negative elements if element
43905 is requested from the other lane, but also moved to the other lane,
43906 so that the result of vpshufb can have the two V2TImode halves
43907 swapped. */
43908 m128 = GEN_INT (-128);
43909 for (i = 0; i < nelt; ++i)
43910 {
43911 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
43912 unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
43913
43914 for (j = 0; j < eltsz; ++j)
43915 {
43916 rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j);
43917 rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128;
43918 }
43919 }
43920
43921 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
43922 vperm = force_reg (V32QImode, vperm);
43923
43924 h = gen_reg_rtx (V32QImode);
43925 op = gen_lowpart (V32QImode, d->op0);
43926 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
43927
43928 /* Swap the 128-byte lanes of h into hp. */
43929 hp = gen_reg_rtx (V4DImode);
43930 op = gen_lowpart (V4DImode, h);
43931 emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx,
43932 const1_rtx));
43933
43934 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
43935 vperm = force_reg (V32QImode, vperm);
43936
43937 l = gen_reg_rtx (V32QImode);
43938 op = gen_lowpart (V32QImode, d->op0);
43939 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
43940
43941 op = d->target;
43942 if (d->vmode != V32QImode)
43943 op = gen_reg_rtx (V32QImode);
43944 emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp)));
43945 if (op != d->target)
43946 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
43947
43948 return true;
43949 }
43950
43951 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
43952 and extract-odd permutations of two V32QImode and V16QImode operand
43953 with two vpshufb insns, vpor and vpermq. We should have already
43954 failed all two or three instruction sequences. */
43955
43956 static bool
43957 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
43958 {
43959 rtx rperm[2][32], vperm, l, h, ior, op, m128;
43960 unsigned int i, nelt, eltsz;
43961
43962 if (!TARGET_AVX2
43963 || d->one_operand_p
43964 || (d->vmode != V32QImode && d->vmode != V16HImode))
43965 return false;
43966
43967 for (i = 0; i < d->nelt; ++i)
43968 if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2))
43969 return false;
43970
43971 if (d->testing_p)
43972 return true;
43973
43974 nelt = d->nelt;
43975 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
43976
43977 /* Generate two permutation masks. In the first permutation mask
43978 the first quarter will contain indexes for the first half
43979 of the op0, the second quarter will contain bit 7 set, third quarter
43980 will contain indexes for the second half of the op0 and the
43981 last quarter bit 7 set. In the second permutation mask
43982 the first quarter will contain bit 7 set, the second quarter
43983 indexes for the first half of the op1, the third quarter bit 7 set
43984 and last quarter indexes for the second half of the op1.
43985 I.e. the first mask e.g. for V32QImode extract even will be:
43986 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
43987 (all values masked with 0xf except for -128) and second mask
43988 for extract even will be
43989 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
43990 m128 = GEN_INT (-128);
43991 for (i = 0; i < nelt; ++i)
43992 {
43993 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
43994 unsigned which = d->perm[i] >= nelt;
43995 unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0;
43996
43997 for (j = 0; j < eltsz; ++j)
43998 {
43999 rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j);
44000 rperm[1 - which][(i * eltsz + j) ^ xorv] = m128;
44001 }
44002 }
44003
44004 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
44005 vperm = force_reg (V32QImode, vperm);
44006
44007 l = gen_reg_rtx (V32QImode);
44008 op = gen_lowpart (V32QImode, d->op0);
44009 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
44010
44011 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
44012 vperm = force_reg (V32QImode, vperm);
44013
44014 h = gen_reg_rtx (V32QImode);
44015 op = gen_lowpart (V32QImode, d->op1);
44016 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
44017
44018 ior = gen_reg_rtx (V32QImode);
44019 emit_insn (gen_iorv32qi3 (ior, l, h));
44020
44021 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
44022 op = gen_reg_rtx (V4DImode);
44023 ior = gen_lowpart (V4DImode, ior);
44024 emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx,
44025 const1_rtx, GEN_INT (3)));
44026 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
44027
44028 return true;
44029 }
44030
44031 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
44032 and extract-odd permutations. */
44033
44034 static bool
44035 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
44036 {
44037 rtx t1, t2, t3, t4, t5;
44038
44039 switch (d->vmode)
44040 {
44041 case V4DFmode:
44042 t1 = gen_reg_rtx (V4DFmode);
44043 t2 = gen_reg_rtx (V4DFmode);
44044
44045 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
44046 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
44047 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
44048
44049 /* Now an unpck[lh]pd will produce the result required. */
44050 if (odd)
44051 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
44052 else
44053 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
44054 emit_insn (t3);
44055 break;
44056
44057 case V8SFmode:
44058 {
44059 int mask = odd ? 0xdd : 0x88;
44060
44061 t1 = gen_reg_rtx (V8SFmode);
44062 t2 = gen_reg_rtx (V8SFmode);
44063 t3 = gen_reg_rtx (V8SFmode);
44064
44065 /* Shuffle within the 128-bit lanes to produce:
44066 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
44067 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
44068 GEN_INT (mask)));
44069
44070 /* Shuffle the lanes around to produce:
44071 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
44072 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
44073 GEN_INT (0x3)));
44074
44075 /* Shuffle within the 128-bit lanes to produce:
44076 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
44077 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
44078
44079 /* Shuffle within the 128-bit lanes to produce:
44080 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
44081 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
44082
44083 /* Shuffle the lanes around to produce:
44084 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
44085 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
44086 GEN_INT (0x20)));
44087 }
44088 break;
44089
44090 case V2DFmode:
44091 case V4SFmode:
44092 case V2DImode:
44093 case V4SImode:
44094 /* These are always directly implementable by expand_vec_perm_1. */
44095 gcc_unreachable ();
44096
44097 case V8HImode:
44098 if (TARGET_SSSE3)
44099 return expand_vec_perm_pshufb2 (d);
44100 else
44101 {
44102 /* We need 2*log2(N)-1 operations to achieve odd/even
44103 with interleave. */
44104 t1 = gen_reg_rtx (V8HImode);
44105 t2 = gen_reg_rtx (V8HImode);
44106 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
44107 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
44108 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
44109 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
44110 if (odd)
44111 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
44112 else
44113 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
44114 emit_insn (t3);
44115 }
44116 break;
44117
44118 case V16QImode:
44119 if (TARGET_SSSE3)
44120 return expand_vec_perm_pshufb2 (d);
44121 else
44122 {
44123 t1 = gen_reg_rtx (V16QImode);
44124 t2 = gen_reg_rtx (V16QImode);
44125 t3 = gen_reg_rtx (V16QImode);
44126 emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
44127 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
44128 emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
44129 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
44130 emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
44131 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
44132 if (odd)
44133 t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
44134 else
44135 t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
44136 emit_insn (t3);
44137 }
44138 break;
44139
44140 case V16HImode:
44141 case V32QImode:
44142 return expand_vec_perm_vpshufb2_vpermq_even_odd (d);
44143
44144 case V4DImode:
44145 if (!TARGET_AVX2)
44146 {
44147 struct expand_vec_perm_d d_copy = *d;
44148 d_copy.vmode = V4DFmode;
44149 d_copy.target = gen_reg_rtx (V4DFmode);
44150 d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
44151 d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
44152 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
44153 {
44154 if (!d->testing_p)
44155 emit_move_insn (d->target,
44156 gen_lowpart (V4DImode, d_copy.target));
44157 return true;
44158 }
44159 return false;
44160 }
44161
44162 t1 = gen_reg_rtx (V4DImode);
44163 t2 = gen_reg_rtx (V4DImode);
44164
44165 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
44166 emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20)));
44167 emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31)));
44168
44169 /* Now an vpunpck[lh]qdq will produce the result required. */
44170 if (odd)
44171 t3 = gen_avx2_interleave_highv4di (d->target, t1, t2);
44172 else
44173 t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2);
44174 emit_insn (t3);
44175 break;
44176
44177 case V8SImode:
44178 if (!TARGET_AVX2)
44179 {
44180 struct expand_vec_perm_d d_copy = *d;
44181 d_copy.vmode = V8SFmode;
44182 d_copy.target = gen_reg_rtx (V8SFmode);
44183 d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
44184 d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
44185 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
44186 {
44187 if (!d->testing_p)
44188 emit_move_insn (d->target,
44189 gen_lowpart (V8SImode, d_copy.target));
44190 return true;
44191 }
44192 return false;
44193 }
44194
44195 t1 = gen_reg_rtx (V8SImode);
44196 t2 = gen_reg_rtx (V8SImode);
44197 t3 = gen_reg_rtx (V4DImode);
44198 t4 = gen_reg_rtx (V4DImode);
44199 t5 = gen_reg_rtx (V4DImode);
44200
44201 /* Shuffle the lanes around into
44202 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
44203 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, d->op0),
44204 gen_lowpart (V4DImode, d->op1),
44205 GEN_INT (0x20)));
44206 emit_insn (gen_avx2_permv2ti (t4, gen_lowpart (V4DImode, d->op0),
44207 gen_lowpart (V4DImode, d->op1),
44208 GEN_INT (0x31)));
44209
44210 /* Swap the 2nd and 3rd position in each lane into
44211 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
44212 emit_insn (gen_avx2_pshufdv3 (t1, gen_lowpart (V8SImode, t3),
44213 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
44214 emit_insn (gen_avx2_pshufdv3 (t2, gen_lowpart (V8SImode, t4),
44215 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
44216
44217 /* Now an vpunpck[lh]qdq will produce
44218 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
44219 if (odd)
44220 t3 = gen_avx2_interleave_highv4di (t5, gen_lowpart (V4DImode, t1),
44221 gen_lowpart (V4DImode, t2));
44222 else
44223 t3 = gen_avx2_interleave_lowv4di (t5, gen_lowpart (V4DImode, t1),
44224 gen_lowpart (V4DImode, t2));
44225 emit_insn (t3);
44226 emit_move_insn (d->target, gen_lowpart (V8SImode, t5));
44227 break;
44228
44229 default:
44230 gcc_unreachable ();
44231 }
44232
44233 return true;
44234 }
44235
44236 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
44237 extract-even and extract-odd permutations. */
44238
44239 static bool
44240 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
44241 {
44242 unsigned i, odd, nelt = d->nelt;
44243
44244 odd = d->perm[0];
44245 if (odd != 0 && odd != 1)
44246 return false;
44247
44248 for (i = 1; i < nelt; ++i)
44249 if (d->perm[i] != 2 * i + odd)
44250 return false;
44251
44252 return expand_vec_perm_even_odd_1 (d, odd);
44253 }
44254
44255 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
44256 permutations. We assume that expand_vec_perm_1 has already failed. */
44257
44258 static bool
44259 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
44260 {
44261 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
44262 enum machine_mode vmode = d->vmode;
44263 unsigned char perm2[4];
44264 rtx op0 = d->op0, dest;
44265 bool ok;
44266
44267 switch (vmode)
44268 {
44269 case V4DFmode:
44270 case V8SFmode:
44271 /* These are special-cased in sse.md so that we can optionally
44272 use the vbroadcast instruction. They expand to two insns
44273 if the input happens to be in a register. */
44274 gcc_unreachable ();
44275
44276 case V2DFmode:
44277 case V2DImode:
44278 case V4SFmode:
44279 case V4SImode:
44280 /* These are always implementable using standard shuffle patterns. */
44281 gcc_unreachable ();
44282
44283 case V8HImode:
44284 case V16QImode:
44285 /* These can be implemented via interleave. We save one insn by
44286 stopping once we have promoted to V4SImode and then use pshufd. */
44287 do
44288 {
44289 rtx dest;
44290 rtx (*gen) (rtx, rtx, rtx)
44291 = vmode == V16QImode ? gen_vec_interleave_lowv16qi
44292 : gen_vec_interleave_lowv8hi;
44293
44294 if (elt >= nelt2)
44295 {
44296 gen = vmode == V16QImode ? gen_vec_interleave_highv16qi
44297 : gen_vec_interleave_highv8hi;
44298 elt -= nelt2;
44299 }
44300 nelt2 /= 2;
44301
44302 dest = gen_reg_rtx (vmode);
44303 emit_insn (gen (dest, op0, op0));
44304 vmode = get_mode_wider_vector (vmode);
44305 op0 = gen_lowpart (vmode, dest);
44306 }
44307 while (vmode != V4SImode);
44308
44309 memset (perm2, elt, 4);
44310 dest = gen_reg_rtx (V4SImode);
44311 ok = expand_vselect (dest, op0, perm2, 4, d->testing_p);
44312 gcc_assert (ok);
44313 if (!d->testing_p)
44314 emit_move_insn (d->target, gen_lowpart (d->vmode, dest));
44315 return true;
44316
44317 case V32QImode:
44318 case V16HImode:
44319 case V8SImode:
44320 case V4DImode:
44321 /* For AVX2 broadcasts of the first element vpbroadcast* or
44322 vpermq should be used by expand_vec_perm_1. */
44323 gcc_assert (!TARGET_AVX2 || d->perm[0]);
44324 return false;
44325
44326 default:
44327 gcc_unreachable ();
44328 }
44329 }
44330
44331 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
44332 broadcast permutations. */
44333
44334 static bool
44335 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
44336 {
44337 unsigned i, elt, nelt = d->nelt;
44338
44339 if (!d->one_operand_p)
44340 return false;
44341
44342 elt = d->perm[0];
44343 for (i = 1; i < nelt; ++i)
44344 if (d->perm[i] != elt)
44345 return false;
44346
44347 return expand_vec_perm_broadcast_1 (d);
44348 }
44349
44350 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
44351 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
44352 all the shorter instruction sequences. */
44353
44354 static bool
44355 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
44356 {
44357 rtx rperm[4][32], vperm, l[2], h[2], op, m128;
44358 unsigned int i, nelt, eltsz;
44359 bool used[4];
44360
44361 if (!TARGET_AVX2
44362 || d->one_operand_p
44363 || (d->vmode != V32QImode && d->vmode != V16HImode))
44364 return false;
44365
44366 if (d->testing_p)
44367 return true;
44368
44369 nelt = d->nelt;
44370 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
44371
44372 /* Generate 4 permutation masks. If the required element is within
44373 the same lane, it is shuffled in. If the required element from the
44374 other lane, force a zero by setting bit 7 in the permutation mask.
44375 In the other mask the mask has non-negative elements if element
44376 is requested from the other lane, but also moved to the other lane,
44377 so that the result of vpshufb can have the two V2TImode halves
44378 swapped. */
44379 m128 = GEN_INT (-128);
44380 for (i = 0; i < 32; ++i)
44381 {
44382 rperm[0][i] = m128;
44383 rperm[1][i] = m128;
44384 rperm[2][i] = m128;
44385 rperm[3][i] = m128;
44386 }
44387 used[0] = false;
44388 used[1] = false;
44389 used[2] = false;
44390 used[3] = false;
44391 for (i = 0; i < nelt; ++i)
44392 {
44393 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
44394 unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
44395 unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0);
44396
44397 for (j = 0; j < eltsz; ++j)
44398 rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j);
44399 used[which] = true;
44400 }
44401
44402 for (i = 0; i < 2; ++i)
44403 {
44404 if (!used[2 * i + 1])
44405 {
44406 h[i] = NULL_RTX;
44407 continue;
44408 }
44409 vperm = gen_rtx_CONST_VECTOR (V32QImode,
44410 gen_rtvec_v (32, rperm[2 * i + 1]));
44411 vperm = force_reg (V32QImode, vperm);
44412 h[i] = gen_reg_rtx (V32QImode);
44413 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
44414 emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm));
44415 }
44416
44417 /* Swap the 128-byte lanes of h[X]. */
44418 for (i = 0; i < 2; ++i)
44419 {
44420 if (h[i] == NULL_RTX)
44421 continue;
44422 op = gen_reg_rtx (V4DImode);
44423 emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]),
44424 const2_rtx, GEN_INT (3), const0_rtx,
44425 const1_rtx));
44426 h[i] = gen_lowpart (V32QImode, op);
44427 }
44428
44429 for (i = 0; i < 2; ++i)
44430 {
44431 if (!used[2 * i])
44432 {
44433 l[i] = NULL_RTX;
44434 continue;
44435 }
44436 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i]));
44437 vperm = force_reg (V32QImode, vperm);
44438 l[i] = gen_reg_rtx (V32QImode);
44439 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
44440 emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm));
44441 }
44442
44443 for (i = 0; i < 2; ++i)
44444 {
44445 if (h[i] && l[i])
44446 {
44447 op = gen_reg_rtx (V32QImode);
44448 emit_insn (gen_iorv32qi3 (op, l[i], h[i]));
44449 l[i] = op;
44450 }
44451 else if (h[i])
44452 l[i] = h[i];
44453 }
44454
44455 gcc_assert (l[0] && l[1]);
44456 op = d->target;
44457 if (d->vmode != V32QImode)
44458 op = gen_reg_rtx (V32QImode);
44459 emit_insn (gen_iorv32qi3 (op, l[0], l[1]));
44460 if (op != d->target)
44461 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
44462 return true;
44463 }
44464
44465 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
44466 With all of the interface bits taken care of, perform the expansion
44467 in D and return true on success. */
44468
44469 static bool
44470 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
44471 {
44472 /* Try a single instruction expansion. */
44473 if (expand_vec_perm_1 (d))
44474 return true;
44475
44476 /* Try sequences of two instructions. */
44477
44478 if (expand_vec_perm_pshuflw_pshufhw (d))
44479 return true;
44480
44481 if (expand_vec_perm_palignr (d))
44482 return true;
44483
44484 if (expand_vec_perm_interleave2 (d))
44485 return true;
44486
44487 if (expand_vec_perm_broadcast (d))
44488 return true;
44489
44490 if (expand_vec_perm_vpermq_perm_1 (d))
44491 return true;
44492
44493 if (expand_vec_perm_vperm2f128 (d))
44494 return true;
44495
44496 /* Try sequences of three instructions. */
44497
44498 if (expand_vec_perm_2vperm2f128_vshuf (d))
44499 return true;
44500
44501 if (expand_vec_perm_pshufb2 (d))
44502 return true;
44503
44504 if (expand_vec_perm_interleave3 (d))
44505 return true;
44506
44507 if (expand_vec_perm_vperm2f128_vblend (d))
44508 return true;
44509
44510 /* Try sequences of four instructions. */
44511
44512 if (expand_vec_perm_vpshufb2_vpermq (d))
44513 return true;
44514
44515 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
44516 return true;
44517
44518 /* ??? Look for narrow permutations whose element orderings would
44519 allow the promotion to a wider mode. */
44520
44521 /* ??? Look for sequences of interleave or a wider permute that place
44522 the data into the correct lanes for a half-vector shuffle like
44523 pshuf[lh]w or vpermilps. */
44524
44525 /* ??? Look for sequences of interleave that produce the desired results.
44526 The combinatorics of punpck[lh] get pretty ugly... */
44527
44528 if (expand_vec_perm_even_odd (d))
44529 return true;
44530
44531 /* Even longer sequences. */
44532 if (expand_vec_perm_vpshufb4_vpermq2 (d))
44533 return true;
44534
44535 return false;
44536 }
44537
44538 /* If a permutation only uses one operand, make it clear. Returns true
44539 if the permutation references both operands. */
44540
44541 static bool
44542 canonicalize_perm (struct expand_vec_perm_d *d)
44543 {
44544 int i, which, nelt = d->nelt;
44545
44546 for (i = which = 0; i < nelt; ++i)
44547 which |= (d->perm[i] < nelt ? 1 : 2);
44548
44549 d->one_operand_p = true;
44550 switch (which)
44551 {
44552 default:
44553 gcc_unreachable();
44554
44555 case 3:
44556 if (!rtx_equal_p (d->op0, d->op1))
44557 {
44558 d->one_operand_p = false;
44559 break;
44560 }
44561 /* The elements of PERM do not suggest that only the first operand
44562 is used, but both operands are identical. Allow easier matching
44563 of the permutation by folding the permutation into the single
44564 input vector. */
44565 /* FALLTHRU */
44566
44567 case 2:
44568 for (i = 0; i < nelt; ++i)
44569 d->perm[i] &= nelt - 1;
44570 d->op0 = d->op1;
44571 break;
44572
44573 case 1:
44574 d->op1 = d->op0;
44575 break;
44576 }
44577
44578 return (which == 3);
44579 }
44580
44581 bool
44582 ix86_expand_vec_perm_const (rtx operands[4])
44583 {
44584 struct expand_vec_perm_d d;
44585 unsigned char perm[MAX_VECT_LEN];
44586 int i, nelt;
44587 bool two_args;
44588 rtx sel;
44589
44590 d.target = operands[0];
44591 d.op0 = operands[1];
44592 d.op1 = operands[2];
44593 sel = operands[3];
44594
44595 d.vmode = GET_MODE (d.target);
44596 gcc_assert (VECTOR_MODE_P (d.vmode));
44597 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
44598 d.testing_p = false;
44599
44600 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
44601 gcc_assert (XVECLEN (sel, 0) == nelt);
44602 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
44603
44604 for (i = 0; i < nelt; ++i)
44605 {
44606 rtx e = XVECEXP (sel, 0, i);
44607 int ei = INTVAL (e) & (2 * nelt - 1);
44608 d.perm[i] = ei;
44609 perm[i] = ei;
44610 }
44611
44612 two_args = canonicalize_perm (&d);
44613
44614 if (ix86_expand_vec_perm_const_1 (&d))
44615 return true;
44616
44617 /* If the selector says both arguments are needed, but the operands are the
44618 same, the above tried to expand with one_operand_p and flattened selector.
44619 If that didn't work, retry without one_operand_p; we succeeded with that
44620 during testing. */
44621 if (two_args && d.one_operand_p)
44622 {
44623 d.one_operand_p = false;
44624 memcpy (d.perm, perm, sizeof (perm));
44625 return ix86_expand_vec_perm_const_1 (&d);
44626 }
44627
44628 return false;
44629 }
44630
44631 /* Implement targetm.vectorize.vec_perm_const_ok. */
44632
44633 static bool
44634 ix86_vectorize_vec_perm_const_ok (enum machine_mode vmode,
44635 const unsigned char *sel)
44636 {
44637 struct expand_vec_perm_d d;
44638 unsigned int i, nelt, which;
44639 bool ret;
44640
44641 d.vmode = vmode;
44642 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
44643 d.testing_p = true;
44644
44645 /* Given sufficient ISA support we can just return true here
44646 for selected vector modes. */
44647 if (d.vmode == V16SImode || d.vmode == V16SFmode
44648 || d.vmode == V8DFmode || d.vmode == V8DImode)
44649 /* All implementable with a single vpermi2 insn. */
44650 return true;
44651 if (GET_MODE_SIZE (d.vmode) == 16)
44652 {
44653 /* All implementable with a single vpperm insn. */
44654 if (TARGET_XOP)
44655 return true;
44656 /* All implementable with 2 pshufb + 1 ior. */
44657 if (TARGET_SSSE3)
44658 return true;
44659 /* All implementable with shufpd or unpck[lh]pd. */
44660 if (d.nelt == 2)
44661 return true;
44662 }
44663
44664 /* Extract the values from the vector CST into the permutation
44665 array in D. */
44666 memcpy (d.perm, sel, nelt);
44667 for (i = which = 0; i < nelt; ++i)
44668 {
44669 unsigned char e = d.perm[i];
44670 gcc_assert (e < 2 * nelt);
44671 which |= (e < nelt ? 1 : 2);
44672 }
44673
44674 /* For all elements from second vector, fold the elements to first. */
44675 if (which == 2)
44676 for (i = 0; i < nelt; ++i)
44677 d.perm[i] -= nelt;
44678
44679 /* Check whether the mask can be applied to the vector type. */
44680 d.one_operand_p = (which != 3);
44681
44682 /* Implementable with shufps or pshufd. */
44683 if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode))
44684 return true;
44685
44686 /* Otherwise we have to go through the motions and see if we can
44687 figure out how to generate the requested permutation. */
44688 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
44689 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
44690 if (!d.one_operand_p)
44691 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
44692
44693 start_sequence ();
44694 ret = ix86_expand_vec_perm_const_1 (&d);
44695 end_sequence ();
44696
44697 return ret;
44698 }
44699
44700 void
44701 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
44702 {
44703 struct expand_vec_perm_d d;
44704 unsigned i, nelt;
44705
44706 d.target = targ;
44707 d.op0 = op0;
44708 d.op1 = op1;
44709 d.vmode = GET_MODE (targ);
44710 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
44711 d.one_operand_p = false;
44712 d.testing_p = false;
44713
44714 for (i = 0; i < nelt; ++i)
44715 d.perm[i] = i * 2 + odd;
44716
44717 /* We'll either be able to implement the permutation directly... */
44718 if (expand_vec_perm_1 (&d))
44719 return;
44720
44721 /* ... or we use the special-case patterns. */
44722 expand_vec_perm_even_odd_1 (&d, odd);
44723 }
44724
44725 static void
44726 ix86_expand_vec_interleave (rtx targ, rtx op0, rtx op1, bool high_p)
44727 {
44728 struct expand_vec_perm_d d;
44729 unsigned i, nelt, base;
44730 bool ok;
44731
44732 d.target = targ;
44733 d.op0 = op0;
44734 d.op1 = op1;
44735 d.vmode = GET_MODE (targ);
44736 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
44737 d.one_operand_p = false;
44738 d.testing_p = false;
44739
44740 base = high_p ? nelt / 2 : 0;
44741 for (i = 0; i < nelt / 2; ++i)
44742 {
44743 d.perm[i * 2] = i + base;
44744 d.perm[i * 2 + 1] = i + base + nelt;
44745 }
44746
44747 /* Note that for AVX this isn't one instruction. */
44748 ok = ix86_expand_vec_perm_const_1 (&d);
44749 gcc_assert (ok);
44750 }
44751
44752
44753 /* Expand a vector operation CODE for a V*QImode in terms of the
44754 same operation on V*HImode. */
44755
44756 void
44757 ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
44758 {
44759 enum machine_mode qimode = GET_MODE (dest);
44760 enum machine_mode himode;
44761 rtx (*gen_il) (rtx, rtx, rtx);
44762 rtx (*gen_ih) (rtx, rtx, rtx);
44763 rtx op1_l, op1_h, op2_l, op2_h, res_l, res_h;
44764 struct expand_vec_perm_d d;
44765 bool ok, full_interleave;
44766 bool uns_p = false;
44767 int i;
44768
44769 switch (qimode)
44770 {
44771 case V16QImode:
44772 himode = V8HImode;
44773 gen_il = gen_vec_interleave_lowv16qi;
44774 gen_ih = gen_vec_interleave_highv16qi;
44775 break;
44776 case V32QImode:
44777 himode = V16HImode;
44778 gen_il = gen_avx2_interleave_lowv32qi;
44779 gen_ih = gen_avx2_interleave_highv32qi;
44780 break;
44781 default:
44782 gcc_unreachable ();
44783 }
44784
44785 op2_l = op2_h = op2;
44786 switch (code)
44787 {
44788 case MULT:
44789 /* Unpack data such that we've got a source byte in each low byte of
44790 each word. We don't care what goes into the high byte of each word.
44791 Rather than trying to get zero in there, most convenient is to let
44792 it be a copy of the low byte. */
44793 op2_l = gen_reg_rtx (qimode);
44794 op2_h = gen_reg_rtx (qimode);
44795 emit_insn (gen_il (op2_l, op2, op2));
44796 emit_insn (gen_ih (op2_h, op2, op2));
44797 /* FALLTHRU */
44798
44799 op1_l = gen_reg_rtx (qimode);
44800 op1_h = gen_reg_rtx (qimode);
44801 emit_insn (gen_il (op1_l, op1, op1));
44802 emit_insn (gen_ih (op1_h, op1, op1));
44803 full_interleave = qimode == V16QImode;
44804 break;
44805
44806 case ASHIFT:
44807 case LSHIFTRT:
44808 uns_p = true;
44809 /* FALLTHRU */
44810 case ASHIFTRT:
44811 op1_l = gen_reg_rtx (himode);
44812 op1_h = gen_reg_rtx (himode);
44813 ix86_expand_sse_unpack (op1_l, op1, uns_p, false);
44814 ix86_expand_sse_unpack (op1_h, op1, uns_p, true);
44815 full_interleave = true;
44816 break;
44817 default:
44818 gcc_unreachable ();
44819 }
44820
44821 /* Perform the operation. */
44822 res_l = expand_simple_binop (himode, code, op1_l, op2_l, NULL_RTX,
44823 1, OPTAB_DIRECT);
44824 res_h = expand_simple_binop (himode, code, op1_h, op2_h, NULL_RTX,
44825 1, OPTAB_DIRECT);
44826 gcc_assert (res_l && res_h);
44827
44828 /* Merge the data back into the right place. */
44829 d.target = dest;
44830 d.op0 = gen_lowpart (qimode, res_l);
44831 d.op1 = gen_lowpart (qimode, res_h);
44832 d.vmode = qimode;
44833 d.nelt = GET_MODE_NUNITS (qimode);
44834 d.one_operand_p = false;
44835 d.testing_p = false;
44836
44837 if (full_interleave)
44838 {
44839 /* For SSE2, we used an full interleave, so the desired
44840 results are in the even elements. */
44841 for (i = 0; i < 32; ++i)
44842 d.perm[i] = i * 2;
44843 }
44844 else
44845 {
44846 /* For AVX, the interleave used above was not cross-lane. So the
44847 extraction is evens but with the second and third quarter swapped.
44848 Happily, that is even one insn shorter than even extraction. */
44849 for (i = 0; i < 32; ++i)
44850 d.perm[i] = i * 2 + ((i & 24) == 8 ? 16 : (i & 24) == 16 ? -16 : 0);
44851 }
44852
44853 ok = ix86_expand_vec_perm_const_1 (&d);
44854 gcc_assert (ok);
44855
44856 set_unique_reg_note (get_last_insn (), REG_EQUAL,
44857 gen_rtx_fmt_ee (code, qimode, op1, op2));
44858 }
44859
44860 /* Helper function of ix86_expand_mul_widen_evenodd. Return true
44861 if op is CONST_VECTOR with all odd elements equal to their
44862 preceding element. */
44863
44864 static bool
44865 const_vector_equal_evenodd_p (rtx op)
44866 {
44867 enum machine_mode mode = GET_MODE (op);
44868 int i, nunits = GET_MODE_NUNITS (mode);
44869 if (GET_CODE (op) != CONST_VECTOR
44870 || nunits != CONST_VECTOR_NUNITS (op))
44871 return false;
44872 for (i = 0; i < nunits; i += 2)
44873 if (CONST_VECTOR_ELT (op, i) != CONST_VECTOR_ELT (op, i + 1))
44874 return false;
44875 return true;
44876 }
44877
44878 void
44879 ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
44880 bool uns_p, bool odd_p)
44881 {
44882 enum machine_mode mode = GET_MODE (op1);
44883 enum machine_mode wmode = GET_MODE (dest);
44884 rtx x;
44885 rtx orig_op1 = op1, orig_op2 = op2;
44886
44887 if (!nonimmediate_operand (op1, mode))
44888 op1 = force_reg (mode, op1);
44889 if (!nonimmediate_operand (op2, mode))
44890 op2 = force_reg (mode, op2);
44891
44892 /* We only play even/odd games with vectors of SImode. */
44893 gcc_assert (mode == V4SImode || mode == V8SImode || mode == V16SImode);
44894
44895 /* If we're looking for the odd results, shift those members down to
44896 the even slots. For some cpus this is faster than a PSHUFD. */
44897 if (odd_p)
44898 {
44899 /* For XOP use vpmacsdqh, but only for smult, as it is only
44900 signed. */
44901 if (TARGET_XOP && mode == V4SImode && !uns_p)
44902 {
44903 x = force_reg (wmode, CONST0_RTX (wmode));
44904 emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x));
44905 return;
44906 }
44907
44908 x = GEN_INT (GET_MODE_UNIT_BITSIZE (mode));
44909 if (!const_vector_equal_evenodd_p (orig_op1))
44910 op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1),
44911 x, NULL, 1, OPTAB_DIRECT);
44912 if (!const_vector_equal_evenodd_p (orig_op2))
44913 op2 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op2),
44914 x, NULL, 1, OPTAB_DIRECT);
44915 op1 = gen_lowpart (mode, op1);
44916 op2 = gen_lowpart (mode, op2);
44917 }
44918
44919 if (mode == V16SImode)
44920 {
44921 if (uns_p)
44922 x = gen_vec_widen_umult_even_v16si (dest, op1, op2);
44923 else
44924 x = gen_vec_widen_smult_even_v16si (dest, op1, op2);
44925 }
44926 else if (mode == V8SImode)
44927 {
44928 if (uns_p)
44929 x = gen_vec_widen_umult_even_v8si (dest, op1, op2);
44930 else
44931 x = gen_vec_widen_smult_even_v8si (dest, op1, op2);
44932 }
44933 else if (uns_p)
44934 x = gen_vec_widen_umult_even_v4si (dest, op1, op2);
44935 else if (TARGET_SSE4_1)
44936 x = gen_sse4_1_mulv2siv2di3 (dest, op1, op2);
44937 else
44938 {
44939 rtx s1, s2, t0, t1, t2;
44940
44941 /* The easiest way to implement this without PMULDQ is to go through
44942 the motions as if we are performing a full 64-bit multiply. With
44943 the exception that we need to do less shuffling of the elements. */
44944
44945 /* Compute the sign-extension, aka highparts, of the two operands. */
44946 s1 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
44947 op1, pc_rtx, pc_rtx);
44948 s2 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
44949 op2, pc_rtx, pc_rtx);
44950
44951 /* Multiply LO(A) * HI(B), and vice-versa. */
44952 t1 = gen_reg_rtx (wmode);
44953 t2 = gen_reg_rtx (wmode);
44954 emit_insn (gen_vec_widen_umult_even_v4si (t1, s1, op2));
44955 emit_insn (gen_vec_widen_umult_even_v4si (t2, s2, op1));
44956
44957 /* Multiply LO(A) * LO(B). */
44958 t0 = gen_reg_rtx (wmode);
44959 emit_insn (gen_vec_widen_umult_even_v4si (t0, op1, op2));
44960
44961 /* Combine and shift the highparts into place. */
44962 t1 = expand_binop (wmode, add_optab, t1, t2, t1, 1, OPTAB_DIRECT);
44963 t1 = expand_binop (wmode, ashl_optab, t1, GEN_INT (32), t1,
44964 1, OPTAB_DIRECT);
44965
44966 /* Combine high and low parts. */
44967 force_expand_binop (wmode, add_optab, t0, t1, dest, 1, OPTAB_DIRECT);
44968 return;
44969 }
44970 emit_insn (x);
44971 }
44972
44973 void
44974 ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2,
44975 bool uns_p, bool high_p)
44976 {
44977 enum machine_mode wmode = GET_MODE (dest);
44978 enum machine_mode mode = GET_MODE (op1);
44979 rtx t1, t2, t3, t4, mask;
44980
44981 switch (mode)
44982 {
44983 case V4SImode:
44984 t1 = gen_reg_rtx (mode);
44985 t2 = gen_reg_rtx (mode);
44986 if (TARGET_XOP && !uns_p)
44987 {
44988 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
44989 shuffle the elements once so that all elements are in the right
44990 place for immediate use: { A C B D }. */
44991 emit_insn (gen_sse2_pshufd_1 (t1, op1, const0_rtx, const2_rtx,
44992 const1_rtx, GEN_INT (3)));
44993 emit_insn (gen_sse2_pshufd_1 (t2, op2, const0_rtx, const2_rtx,
44994 const1_rtx, GEN_INT (3)));
44995 }
44996 else
44997 {
44998 /* Put the elements into place for the multiply. */
44999 ix86_expand_vec_interleave (t1, op1, op1, high_p);
45000 ix86_expand_vec_interleave (t2, op2, op2, high_p);
45001 high_p = false;
45002 }
45003 ix86_expand_mul_widen_evenodd (dest, t1, t2, uns_p, high_p);
45004 break;
45005
45006 case V8SImode:
45007 /* Shuffle the elements between the lanes. After this we
45008 have { A B E F | C D G H } for each operand. */
45009 t1 = gen_reg_rtx (V4DImode);
45010 t2 = gen_reg_rtx (V4DImode);
45011 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, op1),
45012 const0_rtx, const2_rtx,
45013 const1_rtx, GEN_INT (3)));
45014 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, op2),
45015 const0_rtx, const2_rtx,
45016 const1_rtx, GEN_INT (3)));
45017
45018 /* Shuffle the elements within the lanes. After this we
45019 have { A A B B | C C D D } or { E E F F | G G H H }. */
45020 t3 = gen_reg_rtx (V8SImode);
45021 t4 = gen_reg_rtx (V8SImode);
45022 mask = GEN_INT (high_p
45023 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
45024 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
45025 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), mask));
45026 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), mask));
45027
45028 ix86_expand_mul_widen_evenodd (dest, t3, t4, uns_p, false);
45029 break;
45030
45031 case V8HImode:
45032 case V16HImode:
45033 t1 = expand_binop (mode, smul_optab, op1, op2, NULL_RTX,
45034 uns_p, OPTAB_DIRECT);
45035 t2 = expand_binop (mode,
45036 uns_p ? umul_highpart_optab : smul_highpart_optab,
45037 op1, op2, NULL_RTX, uns_p, OPTAB_DIRECT);
45038 gcc_assert (t1 && t2);
45039
45040 t3 = gen_reg_rtx (mode);
45041 ix86_expand_vec_interleave (t3, t1, t2, high_p);
45042 emit_move_insn (dest, gen_lowpart (wmode, t3));
45043 break;
45044
45045 case V16QImode:
45046 case V32QImode:
45047 t1 = gen_reg_rtx (wmode);
45048 t2 = gen_reg_rtx (wmode);
45049 ix86_expand_sse_unpack (t1, op1, uns_p, high_p);
45050 ix86_expand_sse_unpack (t2, op2, uns_p, high_p);
45051
45052 emit_insn (gen_rtx_SET (VOIDmode, dest, gen_rtx_MULT (wmode, t1, t2)));
45053 break;
45054
45055 default:
45056 gcc_unreachable ();
45057 }
45058 }
45059
45060 void
45061 ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2)
45062 {
45063 rtx res_1, res_2, res_3, res_4;
45064
45065 res_1 = gen_reg_rtx (V4SImode);
45066 res_2 = gen_reg_rtx (V4SImode);
45067 res_3 = gen_reg_rtx (V2DImode);
45068 res_4 = gen_reg_rtx (V2DImode);
45069 ix86_expand_mul_widen_evenodd (res_3, op1, op2, true, false);
45070 ix86_expand_mul_widen_evenodd (res_4, op1, op2, true, true);
45071
45072 /* Move the results in element 2 down to element 1; we don't care
45073 what goes in elements 2 and 3. Then we can merge the parts
45074 back together with an interleave.
45075
45076 Note that two other sequences were tried:
45077 (1) Use interleaves at the start instead of psrldq, which allows
45078 us to use a single shufps to merge things back at the end.
45079 (2) Use shufps here to combine the two vectors, then pshufd to
45080 put the elements in the correct order.
45081 In both cases the cost of the reformatting stall was too high
45082 and the overall sequence slower. */
45083
45084 emit_insn (gen_sse2_pshufd_1 (res_1, gen_lowpart (V4SImode, res_3),
45085 const0_rtx, const2_rtx,
45086 const0_rtx, const0_rtx));
45087 emit_insn (gen_sse2_pshufd_1 (res_2, gen_lowpart (V4SImode, res_4),
45088 const0_rtx, const2_rtx,
45089 const0_rtx, const0_rtx));
45090 res_1 = emit_insn (gen_vec_interleave_lowv4si (op0, res_1, res_2));
45091
45092 set_unique_reg_note (res_1, REG_EQUAL, gen_rtx_MULT (V4SImode, op1, op2));
45093 }
45094
45095 void
45096 ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
45097 {
45098 enum machine_mode mode = GET_MODE (op0);
45099 rtx t1, t2, t3, t4, t5, t6;
45100
45101 if (TARGET_XOP && mode == V2DImode)
45102 {
45103 /* op1: A,B,C,D, op2: E,F,G,H */
45104 op1 = gen_lowpart (V4SImode, op1);
45105 op2 = gen_lowpart (V4SImode, op2);
45106
45107 t1 = gen_reg_rtx (V4SImode);
45108 t2 = gen_reg_rtx (V4SImode);
45109 t3 = gen_reg_rtx (V2DImode);
45110 t4 = gen_reg_rtx (V2DImode);
45111
45112 /* t1: B,A,D,C */
45113 emit_insn (gen_sse2_pshufd_1 (t1, op1,
45114 GEN_INT (1),
45115 GEN_INT (0),
45116 GEN_INT (3),
45117 GEN_INT (2)));
45118
45119 /* t2: (B*E),(A*F),(D*G),(C*H) */
45120 emit_insn (gen_mulv4si3 (t2, t1, op2));
45121
45122 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
45123 emit_insn (gen_xop_phadddq (t3, t2));
45124
45125 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
45126 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
45127
45128 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
45129 emit_insn (gen_xop_pmacsdql (op0, op1, op2, t4));
45130 }
45131 else
45132 {
45133 enum machine_mode nmode;
45134 rtx (*umul) (rtx, rtx, rtx);
45135
45136 if (mode == V2DImode)
45137 {
45138 umul = gen_vec_widen_umult_even_v4si;
45139 nmode = V4SImode;
45140 }
45141 else if (mode == V4DImode)
45142 {
45143 umul = gen_vec_widen_umult_even_v8si;
45144 nmode = V8SImode;
45145 }
45146 else if (mode == V8DImode)
45147 {
45148 umul = gen_vec_widen_umult_even_v16si;
45149 nmode = V16SImode;
45150 }
45151 else
45152 gcc_unreachable ();
45153
45154
45155 /* Multiply low parts. */
45156 t1 = gen_reg_rtx (mode);
45157 emit_insn (umul (t1, gen_lowpart (nmode, op1), gen_lowpart (nmode, op2)));
45158
45159 /* Shift input vectors right 32 bits so we can multiply high parts. */
45160 t6 = GEN_INT (32);
45161 t2 = expand_binop (mode, lshr_optab, op1, t6, NULL, 1, OPTAB_DIRECT);
45162 t3 = expand_binop (mode, lshr_optab, op2, t6, NULL, 1, OPTAB_DIRECT);
45163
45164 /* Multiply high parts by low parts. */
45165 t4 = gen_reg_rtx (mode);
45166 t5 = gen_reg_rtx (mode);
45167 emit_insn (umul (t4, gen_lowpart (nmode, t2), gen_lowpart (nmode, op2)));
45168 emit_insn (umul (t5, gen_lowpart (nmode, t3), gen_lowpart (nmode, op1)));
45169
45170 /* Combine and shift the highparts back. */
45171 t4 = expand_binop (mode, add_optab, t4, t5, t4, 1, OPTAB_DIRECT);
45172 t4 = expand_binop (mode, ashl_optab, t4, t6, t4, 1, OPTAB_DIRECT);
45173
45174 /* Combine high and low parts. */
45175 force_expand_binop (mode, add_optab, t1, t4, op0, 1, OPTAB_DIRECT);
45176 }
45177
45178 set_unique_reg_note (get_last_insn (), REG_EQUAL,
45179 gen_rtx_MULT (mode, op1, op2));
45180 }
45181
45182 /* Calculate integer abs() using only SSE2 instructions. */
45183
45184 void
45185 ix86_expand_sse2_abs (rtx target, rtx input)
45186 {
45187 enum machine_mode mode = GET_MODE (target);
45188 rtx tmp0, tmp1, x;
45189
45190 switch (mode)
45191 {
45192 /* For 32-bit signed integer X, the best way to calculate the absolute
45193 value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)). */
45194 case V4SImode:
45195 tmp0 = expand_simple_binop (mode, ASHIFTRT, input,
45196 GEN_INT (GET_MODE_BITSIZE
45197 (GET_MODE_INNER (mode)) - 1),
45198 NULL, 0, OPTAB_DIRECT);
45199 tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
45200 NULL, 0, OPTAB_DIRECT);
45201 x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
45202 target, 0, OPTAB_DIRECT);
45203 break;
45204
45205 /* For 16-bit signed integer X, the best way to calculate the absolute
45206 value of X is max (X, -X), as SSE2 provides the PMAXSW insn. */
45207 case V8HImode:
45208 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
45209
45210 x = expand_simple_binop (mode, SMAX, tmp0, input,
45211 target, 0, OPTAB_DIRECT);
45212 break;
45213
45214 /* For 8-bit signed integer X, the best way to calculate the absolute
45215 value of X is min ((unsigned char) X, (unsigned char) (-X)),
45216 as SSE2 provides the PMINUB insn. */
45217 case V16QImode:
45218 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
45219
45220 x = expand_simple_binop (V16QImode, UMIN, tmp0, input,
45221 target, 0, OPTAB_DIRECT);
45222 break;
45223
45224 default:
45225 gcc_unreachable ();
45226 }
45227
45228 if (x != target)
45229 emit_move_insn (target, x);
45230 }
45231
45232 /* Expand an insert into a vector register through pinsr insn.
45233 Return true if successful. */
45234
45235 bool
45236 ix86_expand_pinsr (rtx *operands)
45237 {
45238 rtx dst = operands[0];
45239 rtx src = operands[3];
45240
45241 unsigned int size = INTVAL (operands[1]);
45242 unsigned int pos = INTVAL (operands[2]);
45243
45244 if (GET_CODE (dst) == SUBREG)
45245 {
45246 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
45247 dst = SUBREG_REG (dst);
45248 }
45249
45250 if (GET_CODE (src) == SUBREG)
45251 src = SUBREG_REG (src);
45252
45253 switch (GET_MODE (dst))
45254 {
45255 case V16QImode:
45256 case V8HImode:
45257 case V4SImode:
45258 case V2DImode:
45259 {
45260 enum machine_mode srcmode, dstmode;
45261 rtx (*pinsr)(rtx, rtx, rtx, rtx);
45262
45263 srcmode = mode_for_size (size, MODE_INT, 0);
45264
45265 switch (srcmode)
45266 {
45267 case QImode:
45268 if (!TARGET_SSE4_1)
45269 return false;
45270 dstmode = V16QImode;
45271 pinsr = gen_sse4_1_pinsrb;
45272 break;
45273
45274 case HImode:
45275 if (!TARGET_SSE2)
45276 return false;
45277 dstmode = V8HImode;
45278 pinsr = gen_sse2_pinsrw;
45279 break;
45280
45281 case SImode:
45282 if (!TARGET_SSE4_1)
45283 return false;
45284 dstmode = V4SImode;
45285 pinsr = gen_sse4_1_pinsrd;
45286 break;
45287
45288 case DImode:
45289 gcc_assert (TARGET_64BIT);
45290 if (!TARGET_SSE4_1)
45291 return false;
45292 dstmode = V2DImode;
45293 pinsr = gen_sse4_1_pinsrq;
45294 break;
45295
45296 default:
45297 return false;
45298 }
45299
45300 rtx d = dst;
45301 if (GET_MODE (dst) != dstmode)
45302 d = gen_reg_rtx (dstmode);
45303 src = gen_lowpart (srcmode, src);
45304
45305 pos /= size;
45306
45307 emit_insn (pinsr (d, gen_lowpart (dstmode, dst), src,
45308 GEN_INT (1 << pos)));
45309 if (d != dst)
45310 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
45311 return true;
45312 }
45313
45314 default:
45315 return false;
45316 }
45317 }
45318 \f
45319 /* This function returns the calling abi specific va_list type node.
45320 It returns the FNDECL specific va_list type. */
45321
45322 static tree
45323 ix86_fn_abi_va_list (tree fndecl)
45324 {
45325 if (!TARGET_64BIT)
45326 return va_list_type_node;
45327 gcc_assert (fndecl != NULL_TREE);
45328
45329 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
45330 return ms_va_list_type_node;
45331 else
45332 return sysv_va_list_type_node;
45333 }
45334
45335 /* Returns the canonical va_list type specified by TYPE. If there
45336 is no valid TYPE provided, it return NULL_TREE. */
45337
45338 static tree
45339 ix86_canonical_va_list_type (tree type)
45340 {
45341 tree wtype, htype;
45342
45343 /* Resolve references and pointers to va_list type. */
45344 if (TREE_CODE (type) == MEM_REF)
45345 type = TREE_TYPE (type);
45346 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
45347 type = TREE_TYPE (type);
45348 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
45349 type = TREE_TYPE (type);
45350
45351 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
45352 {
45353 wtype = va_list_type_node;
45354 gcc_assert (wtype != NULL_TREE);
45355 htype = type;
45356 if (TREE_CODE (wtype) == ARRAY_TYPE)
45357 {
45358 /* If va_list is an array type, the argument may have decayed
45359 to a pointer type, e.g. by being passed to another function.
45360 In that case, unwrap both types so that we can compare the
45361 underlying records. */
45362 if (TREE_CODE (htype) == ARRAY_TYPE
45363 || POINTER_TYPE_P (htype))
45364 {
45365 wtype = TREE_TYPE (wtype);
45366 htype = TREE_TYPE (htype);
45367 }
45368 }
45369 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
45370 return va_list_type_node;
45371 wtype = sysv_va_list_type_node;
45372 gcc_assert (wtype != NULL_TREE);
45373 htype = type;
45374 if (TREE_CODE (wtype) == ARRAY_TYPE)
45375 {
45376 /* If va_list is an array type, the argument may have decayed
45377 to a pointer type, e.g. by being passed to another function.
45378 In that case, unwrap both types so that we can compare the
45379 underlying records. */
45380 if (TREE_CODE (htype) == ARRAY_TYPE
45381 || POINTER_TYPE_P (htype))
45382 {
45383 wtype = TREE_TYPE (wtype);
45384 htype = TREE_TYPE (htype);
45385 }
45386 }
45387 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
45388 return sysv_va_list_type_node;
45389 wtype = ms_va_list_type_node;
45390 gcc_assert (wtype != NULL_TREE);
45391 htype = type;
45392 if (TREE_CODE (wtype) == ARRAY_TYPE)
45393 {
45394 /* If va_list is an array type, the argument may have decayed
45395 to a pointer type, e.g. by being passed to another function.
45396 In that case, unwrap both types so that we can compare the
45397 underlying records. */
45398 if (TREE_CODE (htype) == ARRAY_TYPE
45399 || POINTER_TYPE_P (htype))
45400 {
45401 wtype = TREE_TYPE (wtype);
45402 htype = TREE_TYPE (htype);
45403 }
45404 }
45405 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
45406 return ms_va_list_type_node;
45407 return NULL_TREE;
45408 }
45409 return std_canonical_va_list_type (type);
45410 }
45411
45412 /* Iterate through the target-specific builtin types for va_list.
45413 IDX denotes the iterator, *PTREE is set to the result type of
45414 the va_list builtin, and *PNAME to its internal type.
45415 Returns zero if there is no element for this index, otherwise
45416 IDX should be increased upon the next call.
45417 Note, do not iterate a base builtin's name like __builtin_va_list.
45418 Used from c_common_nodes_and_builtins. */
45419
45420 static int
45421 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
45422 {
45423 if (TARGET_64BIT)
45424 {
45425 switch (idx)
45426 {
45427 default:
45428 break;
45429
45430 case 0:
45431 *ptree = ms_va_list_type_node;
45432 *pname = "__builtin_ms_va_list";
45433 return 1;
45434
45435 case 1:
45436 *ptree = sysv_va_list_type_node;
45437 *pname = "__builtin_sysv_va_list";
45438 return 1;
45439 }
45440 }
45441
45442 return 0;
45443 }
45444
45445 #undef TARGET_SCHED_DISPATCH
45446 #define TARGET_SCHED_DISPATCH has_dispatch
45447 #undef TARGET_SCHED_DISPATCH_DO
45448 #define TARGET_SCHED_DISPATCH_DO do_dispatch
45449 #undef TARGET_SCHED_REASSOCIATION_WIDTH
45450 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
45451 #undef TARGET_SCHED_REORDER
45452 #define TARGET_SCHED_REORDER ix86_sched_reorder
45453 #undef TARGET_SCHED_ADJUST_PRIORITY
45454 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
45455 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
45456 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
45457 ix86_dependencies_evaluation_hook
45458
45459 /* The size of the dispatch window is the total number of bytes of
45460 object code allowed in a window. */
45461 #define DISPATCH_WINDOW_SIZE 16
45462
45463 /* Number of dispatch windows considered for scheduling. */
45464 #define MAX_DISPATCH_WINDOWS 3
45465
45466 /* Maximum number of instructions in a window. */
45467 #define MAX_INSN 4
45468
45469 /* Maximum number of immediate operands in a window. */
45470 #define MAX_IMM 4
45471
45472 /* Maximum number of immediate bits allowed in a window. */
45473 #define MAX_IMM_SIZE 128
45474
45475 /* Maximum number of 32 bit immediates allowed in a window. */
45476 #define MAX_IMM_32 4
45477
45478 /* Maximum number of 64 bit immediates allowed in a window. */
45479 #define MAX_IMM_64 2
45480
45481 /* Maximum total of loads or prefetches allowed in a window. */
45482 #define MAX_LOAD 2
45483
45484 /* Maximum total of stores allowed in a window. */
45485 #define MAX_STORE 1
45486
45487 #undef BIG
45488 #define BIG 100
45489
45490
45491 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
45492 enum dispatch_group {
45493 disp_no_group = 0,
45494 disp_load,
45495 disp_store,
45496 disp_load_store,
45497 disp_prefetch,
45498 disp_imm,
45499 disp_imm_32,
45500 disp_imm_64,
45501 disp_branch,
45502 disp_cmp,
45503 disp_jcc,
45504 disp_last
45505 };
45506
45507 /* Number of allowable groups in a dispatch window. It is an array
45508 indexed by dispatch_group enum. 100 is used as a big number,
45509 because the number of these kind of operations does not have any
45510 effect in dispatch window, but we need them for other reasons in
45511 the table. */
45512 static unsigned int num_allowable_groups[disp_last] = {
45513 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
45514 };
45515
45516 char group_name[disp_last + 1][16] = {
45517 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
45518 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
45519 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
45520 };
45521
45522 /* Instruction path. */
45523 enum insn_path {
45524 no_path = 0,
45525 path_single, /* Single micro op. */
45526 path_double, /* Double micro op. */
45527 path_multi, /* Instructions with more than 2 micro op.. */
45528 last_path
45529 };
45530
45531 /* sched_insn_info defines a window to the instructions scheduled in
45532 the basic block. It contains a pointer to the insn_info table and
45533 the instruction scheduled.
45534
45535 Windows are allocated for each basic block and are linked
45536 together. */
45537 typedef struct sched_insn_info_s {
45538 rtx insn;
45539 enum dispatch_group group;
45540 enum insn_path path;
45541 int byte_len;
45542 int imm_bytes;
45543 } sched_insn_info;
45544
45545 /* Linked list of dispatch windows. This is a two way list of
45546 dispatch windows of a basic block. It contains information about
45547 the number of uops in the window and the total number of
45548 instructions and of bytes in the object code for this dispatch
45549 window. */
45550 typedef struct dispatch_windows_s {
45551 int num_insn; /* Number of insn in the window. */
45552 int num_uops; /* Number of uops in the window. */
45553 int window_size; /* Number of bytes in the window. */
45554 int window_num; /* Window number between 0 or 1. */
45555 int num_imm; /* Number of immediates in an insn. */
45556 int num_imm_32; /* Number of 32 bit immediates in an insn. */
45557 int num_imm_64; /* Number of 64 bit immediates in an insn. */
45558 int imm_size; /* Total immediates in the window. */
45559 int num_loads; /* Total memory loads in the window. */
45560 int num_stores; /* Total memory stores in the window. */
45561 int violation; /* Violation exists in window. */
45562 sched_insn_info *window; /* Pointer to the window. */
45563 struct dispatch_windows_s *next;
45564 struct dispatch_windows_s *prev;
45565 } dispatch_windows;
45566
45567 /* Immediate valuse used in an insn. */
45568 typedef struct imm_info_s
45569 {
45570 int imm;
45571 int imm32;
45572 int imm64;
45573 } imm_info;
45574
45575 static dispatch_windows *dispatch_window_list;
45576 static dispatch_windows *dispatch_window_list1;
45577
45578 /* Get dispatch group of insn. */
45579
45580 static enum dispatch_group
45581 get_mem_group (rtx insn)
45582 {
45583 enum attr_memory memory;
45584
45585 if (INSN_CODE (insn) < 0)
45586 return disp_no_group;
45587 memory = get_attr_memory (insn);
45588 if (memory == MEMORY_STORE)
45589 return disp_store;
45590
45591 if (memory == MEMORY_LOAD)
45592 return disp_load;
45593
45594 if (memory == MEMORY_BOTH)
45595 return disp_load_store;
45596
45597 return disp_no_group;
45598 }
45599
45600 /* Return true if insn is a compare instruction. */
45601
45602 static bool
45603 is_cmp (rtx insn)
45604 {
45605 enum attr_type type;
45606
45607 type = get_attr_type (insn);
45608 return (type == TYPE_TEST
45609 || type == TYPE_ICMP
45610 || type == TYPE_FCMP
45611 || GET_CODE (PATTERN (insn)) == COMPARE);
45612 }
45613
45614 /* Return true if a dispatch violation encountered. */
45615
45616 static bool
45617 dispatch_violation (void)
45618 {
45619 if (dispatch_window_list->next)
45620 return dispatch_window_list->next->violation;
45621 return dispatch_window_list->violation;
45622 }
45623
45624 /* Return true if insn is a branch instruction. */
45625
45626 static bool
45627 is_branch (rtx insn)
45628 {
45629 return (CALL_P (insn) || JUMP_P (insn));
45630 }
45631
45632 /* Return true if insn is a prefetch instruction. */
45633
45634 static bool
45635 is_prefetch (rtx insn)
45636 {
45637 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
45638 }
45639
45640 /* This function initializes a dispatch window and the list container holding a
45641 pointer to the window. */
45642
45643 static void
45644 init_window (int window_num)
45645 {
45646 int i;
45647 dispatch_windows *new_list;
45648
45649 if (window_num == 0)
45650 new_list = dispatch_window_list;
45651 else
45652 new_list = dispatch_window_list1;
45653
45654 new_list->num_insn = 0;
45655 new_list->num_uops = 0;
45656 new_list->window_size = 0;
45657 new_list->next = NULL;
45658 new_list->prev = NULL;
45659 new_list->window_num = window_num;
45660 new_list->num_imm = 0;
45661 new_list->num_imm_32 = 0;
45662 new_list->num_imm_64 = 0;
45663 new_list->imm_size = 0;
45664 new_list->num_loads = 0;
45665 new_list->num_stores = 0;
45666 new_list->violation = false;
45667
45668 for (i = 0; i < MAX_INSN; i++)
45669 {
45670 new_list->window[i].insn = NULL;
45671 new_list->window[i].group = disp_no_group;
45672 new_list->window[i].path = no_path;
45673 new_list->window[i].byte_len = 0;
45674 new_list->window[i].imm_bytes = 0;
45675 }
45676 return;
45677 }
45678
45679 /* This function allocates and initializes a dispatch window and the
45680 list container holding a pointer to the window. */
45681
45682 static dispatch_windows *
45683 allocate_window (void)
45684 {
45685 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
45686 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
45687
45688 return new_list;
45689 }
45690
45691 /* This routine initializes the dispatch scheduling information. It
45692 initiates building dispatch scheduler tables and constructs the
45693 first dispatch window. */
45694
45695 static void
45696 init_dispatch_sched (void)
45697 {
45698 /* Allocate a dispatch list and a window. */
45699 dispatch_window_list = allocate_window ();
45700 dispatch_window_list1 = allocate_window ();
45701 init_window (0);
45702 init_window (1);
45703 }
45704
45705 /* This function returns true if a branch is detected. End of a basic block
45706 does not have to be a branch, but here we assume only branches end a
45707 window. */
45708
45709 static bool
45710 is_end_basic_block (enum dispatch_group group)
45711 {
45712 return group == disp_branch;
45713 }
45714
45715 /* This function is called when the end of a window processing is reached. */
45716
45717 static void
45718 process_end_window (void)
45719 {
45720 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
45721 if (dispatch_window_list->next)
45722 {
45723 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
45724 gcc_assert (dispatch_window_list->window_size
45725 + dispatch_window_list1->window_size <= 48);
45726 init_window (1);
45727 }
45728 init_window (0);
45729 }
45730
45731 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
45732 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
45733 for 48 bytes of instructions. Note that these windows are not dispatch
45734 windows that their sizes are DISPATCH_WINDOW_SIZE. */
45735
45736 static dispatch_windows *
45737 allocate_next_window (int window_num)
45738 {
45739 if (window_num == 0)
45740 {
45741 if (dispatch_window_list->next)
45742 init_window (1);
45743 init_window (0);
45744 return dispatch_window_list;
45745 }
45746
45747 dispatch_window_list->next = dispatch_window_list1;
45748 dispatch_window_list1->prev = dispatch_window_list;
45749
45750 return dispatch_window_list1;
45751 }
45752
45753 /* Increment the number of immediate operands of an instruction. */
45754
45755 static int
45756 find_constant_1 (rtx *in_rtx, imm_info *imm_values)
45757 {
45758 if (*in_rtx == 0)
45759 return 0;
45760
45761 switch ( GET_CODE (*in_rtx))
45762 {
45763 case CONST:
45764 case SYMBOL_REF:
45765 case CONST_INT:
45766 (imm_values->imm)++;
45767 if (x86_64_immediate_operand (*in_rtx, SImode))
45768 (imm_values->imm32)++;
45769 else
45770 (imm_values->imm64)++;
45771 break;
45772
45773 case CONST_DOUBLE:
45774 (imm_values->imm)++;
45775 (imm_values->imm64)++;
45776 break;
45777
45778 case CODE_LABEL:
45779 if (LABEL_KIND (*in_rtx) == LABEL_NORMAL)
45780 {
45781 (imm_values->imm)++;
45782 (imm_values->imm32)++;
45783 }
45784 break;
45785
45786 default:
45787 break;
45788 }
45789
45790 return 0;
45791 }
45792
45793 /* Compute number of immediate operands of an instruction. */
45794
45795 static void
45796 find_constant (rtx in_rtx, imm_info *imm_values)
45797 {
45798 for_each_rtx (INSN_P (in_rtx) ? &PATTERN (in_rtx) : &in_rtx,
45799 (rtx_function) find_constant_1, (void *) imm_values);
45800 }
45801
45802 /* Return total size of immediate operands of an instruction along with number
45803 of corresponding immediate-operands. It initializes its parameters to zero
45804 befor calling FIND_CONSTANT.
45805 INSN is the input instruction. IMM is the total of immediates.
45806 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
45807 bit immediates. */
45808
45809 static int
45810 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
45811 {
45812 imm_info imm_values = {0, 0, 0};
45813
45814 find_constant (insn, &imm_values);
45815 *imm = imm_values.imm;
45816 *imm32 = imm_values.imm32;
45817 *imm64 = imm_values.imm64;
45818 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
45819 }
45820
45821 /* This function indicates if an operand of an instruction is an
45822 immediate. */
45823
45824 static bool
45825 has_immediate (rtx insn)
45826 {
45827 int num_imm_operand;
45828 int num_imm32_operand;
45829 int num_imm64_operand;
45830
45831 if (insn)
45832 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
45833 &num_imm64_operand);
45834 return false;
45835 }
45836
45837 /* Return single or double path for instructions. */
45838
45839 static enum insn_path
45840 get_insn_path (rtx insn)
45841 {
45842 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
45843
45844 if ((int)path == 0)
45845 return path_single;
45846
45847 if ((int)path == 1)
45848 return path_double;
45849
45850 return path_multi;
45851 }
45852
45853 /* Return insn dispatch group. */
45854
45855 static enum dispatch_group
45856 get_insn_group (rtx insn)
45857 {
45858 enum dispatch_group group = get_mem_group (insn);
45859 if (group)
45860 return group;
45861
45862 if (is_branch (insn))
45863 return disp_branch;
45864
45865 if (is_cmp (insn))
45866 return disp_cmp;
45867
45868 if (has_immediate (insn))
45869 return disp_imm;
45870
45871 if (is_prefetch (insn))
45872 return disp_prefetch;
45873
45874 return disp_no_group;
45875 }
45876
45877 /* Count number of GROUP restricted instructions in a dispatch
45878 window WINDOW_LIST. */
45879
45880 static int
45881 count_num_restricted (rtx insn, dispatch_windows *window_list)
45882 {
45883 enum dispatch_group group = get_insn_group (insn);
45884 int imm_size;
45885 int num_imm_operand;
45886 int num_imm32_operand;
45887 int num_imm64_operand;
45888
45889 if (group == disp_no_group)
45890 return 0;
45891
45892 if (group == disp_imm)
45893 {
45894 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
45895 &num_imm64_operand);
45896 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
45897 || num_imm_operand + window_list->num_imm > MAX_IMM
45898 || (num_imm32_operand > 0
45899 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
45900 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
45901 || (num_imm64_operand > 0
45902 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
45903 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
45904 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
45905 && num_imm64_operand > 0
45906 && ((window_list->num_imm_64 > 0
45907 && window_list->num_insn >= 2)
45908 || window_list->num_insn >= 3)))
45909 return BIG;
45910
45911 return 1;
45912 }
45913
45914 if ((group == disp_load_store
45915 && (window_list->num_loads >= MAX_LOAD
45916 || window_list->num_stores >= MAX_STORE))
45917 || ((group == disp_load
45918 || group == disp_prefetch)
45919 && window_list->num_loads >= MAX_LOAD)
45920 || (group == disp_store
45921 && window_list->num_stores >= MAX_STORE))
45922 return BIG;
45923
45924 return 1;
45925 }
45926
45927 /* This function returns true if insn satisfies dispatch rules on the
45928 last window scheduled. */
45929
45930 static bool
45931 fits_dispatch_window (rtx insn)
45932 {
45933 dispatch_windows *window_list = dispatch_window_list;
45934 dispatch_windows *window_list_next = dispatch_window_list->next;
45935 unsigned int num_restrict;
45936 enum dispatch_group group = get_insn_group (insn);
45937 enum insn_path path = get_insn_path (insn);
45938 int sum;
45939
45940 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
45941 instructions should be given the lowest priority in the
45942 scheduling process in Haifa scheduler to make sure they will be
45943 scheduled in the same dispatch window as the reference to them. */
45944 if (group == disp_jcc || group == disp_cmp)
45945 return false;
45946
45947 /* Check nonrestricted. */
45948 if (group == disp_no_group || group == disp_branch)
45949 return true;
45950
45951 /* Get last dispatch window. */
45952 if (window_list_next)
45953 window_list = window_list_next;
45954
45955 if (window_list->window_num == 1)
45956 {
45957 sum = window_list->prev->window_size + window_list->window_size;
45958
45959 if (sum == 32
45960 || (min_insn_size (insn) + sum) >= 48)
45961 /* Window 1 is full. Go for next window. */
45962 return true;
45963 }
45964
45965 num_restrict = count_num_restricted (insn, window_list);
45966
45967 if (num_restrict > num_allowable_groups[group])
45968 return false;
45969
45970 /* See if it fits in the first window. */
45971 if (window_list->window_num == 0)
45972 {
45973 /* The first widow should have only single and double path
45974 uops. */
45975 if (path == path_double
45976 && (window_list->num_uops + 2) > MAX_INSN)
45977 return false;
45978 else if (path != path_single)
45979 return false;
45980 }
45981 return true;
45982 }
45983
45984 /* Add an instruction INSN with NUM_UOPS micro-operations to the
45985 dispatch window WINDOW_LIST. */
45986
45987 static void
45988 add_insn_window (rtx insn, dispatch_windows *window_list, int num_uops)
45989 {
45990 int byte_len = min_insn_size (insn);
45991 int num_insn = window_list->num_insn;
45992 int imm_size;
45993 sched_insn_info *window = window_list->window;
45994 enum dispatch_group group = get_insn_group (insn);
45995 enum insn_path path = get_insn_path (insn);
45996 int num_imm_operand;
45997 int num_imm32_operand;
45998 int num_imm64_operand;
45999
46000 if (!window_list->violation && group != disp_cmp
46001 && !fits_dispatch_window (insn))
46002 window_list->violation = true;
46003
46004 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
46005 &num_imm64_operand);
46006
46007 /* Initialize window with new instruction. */
46008 window[num_insn].insn = insn;
46009 window[num_insn].byte_len = byte_len;
46010 window[num_insn].group = group;
46011 window[num_insn].path = path;
46012 window[num_insn].imm_bytes = imm_size;
46013
46014 window_list->window_size += byte_len;
46015 window_list->num_insn = num_insn + 1;
46016 window_list->num_uops = window_list->num_uops + num_uops;
46017 window_list->imm_size += imm_size;
46018 window_list->num_imm += num_imm_operand;
46019 window_list->num_imm_32 += num_imm32_operand;
46020 window_list->num_imm_64 += num_imm64_operand;
46021
46022 if (group == disp_store)
46023 window_list->num_stores += 1;
46024 else if (group == disp_load
46025 || group == disp_prefetch)
46026 window_list->num_loads += 1;
46027 else if (group == disp_load_store)
46028 {
46029 window_list->num_stores += 1;
46030 window_list->num_loads += 1;
46031 }
46032 }
46033
46034 /* Adds a scheduled instruction, INSN, to the current dispatch window.
46035 If the total bytes of instructions or the number of instructions in
46036 the window exceed allowable, it allocates a new window. */
46037
46038 static void
46039 add_to_dispatch_window (rtx insn)
46040 {
46041 int byte_len;
46042 dispatch_windows *window_list;
46043 dispatch_windows *next_list;
46044 dispatch_windows *window0_list;
46045 enum insn_path path;
46046 enum dispatch_group insn_group;
46047 bool insn_fits;
46048 int num_insn;
46049 int num_uops;
46050 int window_num;
46051 int insn_num_uops;
46052 int sum;
46053
46054 if (INSN_CODE (insn) < 0)
46055 return;
46056
46057 byte_len = min_insn_size (insn);
46058 window_list = dispatch_window_list;
46059 next_list = window_list->next;
46060 path = get_insn_path (insn);
46061 insn_group = get_insn_group (insn);
46062
46063 /* Get the last dispatch window. */
46064 if (next_list)
46065 window_list = dispatch_window_list->next;
46066
46067 if (path == path_single)
46068 insn_num_uops = 1;
46069 else if (path == path_double)
46070 insn_num_uops = 2;
46071 else
46072 insn_num_uops = (int) path;
46073
46074 /* If current window is full, get a new window.
46075 Window number zero is full, if MAX_INSN uops are scheduled in it.
46076 Window number one is full, if window zero's bytes plus window
46077 one's bytes is 32, or if the bytes of the new instruction added
46078 to the total makes it greater than 48, or it has already MAX_INSN
46079 instructions in it. */
46080 num_insn = window_list->num_insn;
46081 num_uops = window_list->num_uops;
46082 window_num = window_list->window_num;
46083 insn_fits = fits_dispatch_window (insn);
46084
46085 if (num_insn >= MAX_INSN
46086 || num_uops + insn_num_uops > MAX_INSN
46087 || !(insn_fits))
46088 {
46089 window_num = ~window_num & 1;
46090 window_list = allocate_next_window (window_num);
46091 }
46092
46093 if (window_num == 0)
46094 {
46095 add_insn_window (insn, window_list, insn_num_uops);
46096 if (window_list->num_insn >= MAX_INSN
46097 && insn_group == disp_branch)
46098 {
46099 process_end_window ();
46100 return;
46101 }
46102 }
46103 else if (window_num == 1)
46104 {
46105 window0_list = window_list->prev;
46106 sum = window0_list->window_size + window_list->window_size;
46107 if (sum == 32
46108 || (byte_len + sum) >= 48)
46109 {
46110 process_end_window ();
46111 window_list = dispatch_window_list;
46112 }
46113
46114 add_insn_window (insn, window_list, insn_num_uops);
46115 }
46116 else
46117 gcc_unreachable ();
46118
46119 if (is_end_basic_block (insn_group))
46120 {
46121 /* End of basic block is reached do end-basic-block process. */
46122 process_end_window ();
46123 return;
46124 }
46125 }
46126
46127 /* Print the dispatch window, WINDOW_NUM, to FILE. */
46128
46129 DEBUG_FUNCTION static void
46130 debug_dispatch_window_file (FILE *file, int window_num)
46131 {
46132 dispatch_windows *list;
46133 int i;
46134
46135 if (window_num == 0)
46136 list = dispatch_window_list;
46137 else
46138 list = dispatch_window_list1;
46139
46140 fprintf (file, "Window #%d:\n", list->window_num);
46141 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
46142 list->num_insn, list->num_uops, list->window_size);
46143 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
46144 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
46145
46146 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
46147 list->num_stores);
46148 fprintf (file, " insn info:\n");
46149
46150 for (i = 0; i < MAX_INSN; i++)
46151 {
46152 if (!list->window[i].insn)
46153 break;
46154 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
46155 i, group_name[list->window[i].group],
46156 i, (void *)list->window[i].insn,
46157 i, list->window[i].path,
46158 i, list->window[i].byte_len,
46159 i, list->window[i].imm_bytes);
46160 }
46161 }
46162
46163 /* Print to stdout a dispatch window. */
46164
46165 DEBUG_FUNCTION void
46166 debug_dispatch_window (int window_num)
46167 {
46168 debug_dispatch_window_file (stdout, window_num);
46169 }
46170
46171 /* Print INSN dispatch information to FILE. */
46172
46173 DEBUG_FUNCTION static void
46174 debug_insn_dispatch_info_file (FILE *file, rtx insn)
46175 {
46176 int byte_len;
46177 enum insn_path path;
46178 enum dispatch_group group;
46179 int imm_size;
46180 int num_imm_operand;
46181 int num_imm32_operand;
46182 int num_imm64_operand;
46183
46184 if (INSN_CODE (insn) < 0)
46185 return;
46186
46187 byte_len = min_insn_size (insn);
46188 path = get_insn_path (insn);
46189 group = get_insn_group (insn);
46190 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
46191 &num_imm64_operand);
46192
46193 fprintf (file, " insn info:\n");
46194 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
46195 group_name[group], path, byte_len);
46196 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
46197 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
46198 }
46199
46200 /* Print to STDERR the status of the ready list with respect to
46201 dispatch windows. */
46202
46203 DEBUG_FUNCTION void
46204 debug_ready_dispatch (void)
46205 {
46206 int i;
46207 int no_ready = number_in_ready ();
46208
46209 fprintf (stdout, "Number of ready: %d\n", no_ready);
46210
46211 for (i = 0; i < no_ready; i++)
46212 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
46213 }
46214
46215 /* This routine is the driver of the dispatch scheduler. */
46216
46217 static void
46218 do_dispatch (rtx insn, int mode)
46219 {
46220 if (mode == DISPATCH_INIT)
46221 init_dispatch_sched ();
46222 else if (mode == ADD_TO_DISPATCH_WINDOW)
46223 add_to_dispatch_window (insn);
46224 }
46225
46226 /* Return TRUE if Dispatch Scheduling is supported. */
46227
46228 static bool
46229 has_dispatch (rtx insn, int action)
46230 {
46231 if ((TARGET_BDVER1 || TARGET_BDVER2 || TARGET_BDVER3 || TARGET_BDVER4)
46232 && flag_dispatch_scheduler)
46233 switch (action)
46234 {
46235 default:
46236 return false;
46237
46238 case IS_DISPATCH_ON:
46239 return true;
46240 break;
46241
46242 case IS_CMP:
46243 return is_cmp (insn);
46244
46245 case DISPATCH_VIOLATION:
46246 return dispatch_violation ();
46247
46248 case FITS_DISPATCH_WINDOW:
46249 return fits_dispatch_window (insn);
46250 }
46251
46252 return false;
46253 }
46254
46255 /* Implementation of reassociation_width target hook used by
46256 reassoc phase to identify parallelism level in reassociated
46257 tree. Statements tree_code is passed in OPC. Arguments type
46258 is passed in MODE.
46259
46260 Currently parallel reassociation is enabled for Atom
46261 processors only and we set reassociation width to be 2
46262 because Atom may issue up to 2 instructions per cycle.
46263
46264 Return value should be fixed if parallel reassociation is
46265 enabled for other processors. */
46266
46267 static int
46268 ix86_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
46269 enum machine_mode mode)
46270 {
46271 int res = 1;
46272
46273 if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL)
46274 res = 2;
46275 else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL)
46276 res = 2;
46277
46278 return res;
46279 }
46280
46281 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
46282 place emms and femms instructions. */
46283
46284 static enum machine_mode
46285 ix86_preferred_simd_mode (enum machine_mode mode)
46286 {
46287 if (!TARGET_SSE)
46288 return word_mode;
46289
46290 switch (mode)
46291 {
46292 case QImode:
46293 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V32QImode : V16QImode;
46294 case HImode:
46295 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V16HImode : V8HImode;
46296 case SImode:
46297 return TARGET_AVX512F ? V16SImode :
46298 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode;
46299 case DImode:
46300 return TARGET_AVX512F ? V8DImode :
46301 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V4DImode : V2DImode;
46302
46303 case SFmode:
46304 if (TARGET_AVX512F)
46305 return V16SFmode;
46306 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
46307 return V8SFmode;
46308 else
46309 return V4SFmode;
46310
46311 case DFmode:
46312 if (!TARGET_VECTORIZE_DOUBLE)
46313 return word_mode;
46314 else if (TARGET_AVX512F)
46315 return V8DFmode;
46316 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
46317 return V4DFmode;
46318 else if (TARGET_SSE2)
46319 return V2DFmode;
46320 /* FALLTHRU */
46321
46322 default:
46323 return word_mode;
46324 }
46325 }
46326
46327 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
46328 vectors. If AVX512F is enabled then try vectorizing with 512bit,
46329 256bit and 128bit vectors. */
46330
46331 static unsigned int
46332 ix86_autovectorize_vector_sizes (void)
46333 {
46334 return TARGET_AVX512F ? 64 | 32 | 16 :
46335 (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
46336 }
46337
46338 \f
46339
46340 /* Return class of registers which could be used for pseudo of MODE
46341 and of class RCLASS for spilling instead of memory. Return NO_REGS
46342 if it is not possible or non-profitable. */
46343 static reg_class_t
46344 ix86_spill_class (reg_class_t rclass, enum machine_mode mode)
46345 {
46346 if (TARGET_SSE && TARGET_GENERAL_REGS_SSE_SPILL && ! TARGET_MMX
46347 && (mode == SImode || (TARGET_64BIT && mode == DImode))
46348 && INTEGER_CLASS_P (rclass))
46349 return ALL_SSE_REGS;
46350 return NO_REGS;
46351 }
46352
46353 /* Implement targetm.vectorize.init_cost. */
46354
46355 static void *
46356 ix86_init_cost (struct loop *loop_info ATTRIBUTE_UNUSED)
46357 {
46358 unsigned *cost = XNEWVEC (unsigned, 3);
46359 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
46360 return cost;
46361 }
46362
46363 /* Implement targetm.vectorize.add_stmt_cost. */
46364
46365 static unsigned
46366 ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
46367 struct _stmt_vec_info *stmt_info, int misalign,
46368 enum vect_cost_model_location where)
46369 {
46370 unsigned *cost = (unsigned *) data;
46371 unsigned retval = 0;
46372
46373 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
46374 int stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
46375
46376 /* Statements in an inner loop relative to the loop being
46377 vectorized are weighted more heavily. The value here is
46378 arbitrary and could potentially be improved with analysis. */
46379 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
46380 count *= 50; /* FIXME. */
46381
46382 retval = (unsigned) (count * stmt_cost);
46383 cost[where] += retval;
46384
46385 return retval;
46386 }
46387
46388 /* Implement targetm.vectorize.finish_cost. */
46389
46390 static void
46391 ix86_finish_cost (void *data, unsigned *prologue_cost,
46392 unsigned *body_cost, unsigned *epilogue_cost)
46393 {
46394 unsigned *cost = (unsigned *) data;
46395 *prologue_cost = cost[vect_prologue];
46396 *body_cost = cost[vect_body];
46397 *epilogue_cost = cost[vect_epilogue];
46398 }
46399
46400 /* Implement targetm.vectorize.destroy_cost_data. */
46401
46402 static void
46403 ix86_destroy_cost_data (void *data)
46404 {
46405 free (data);
46406 }
46407
46408 /* Validate target specific memory model bits in VAL. */
46409
46410 static unsigned HOST_WIDE_INT
46411 ix86_memmodel_check (unsigned HOST_WIDE_INT val)
46412 {
46413 unsigned HOST_WIDE_INT model = val & MEMMODEL_MASK;
46414 bool strong;
46415
46416 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
46417 |MEMMODEL_MASK)
46418 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
46419 {
46420 warning (OPT_Winvalid_memory_model,
46421 "Unknown architecture specific memory model");
46422 return MEMMODEL_SEQ_CST;
46423 }
46424 strong = (model == MEMMODEL_ACQ_REL || model == MEMMODEL_SEQ_CST);
46425 if (val & IX86_HLE_ACQUIRE && !(model == MEMMODEL_ACQUIRE || strong))
46426 {
46427 warning (OPT_Winvalid_memory_model,
46428 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
46429 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
46430 }
46431 if (val & IX86_HLE_RELEASE && !(model == MEMMODEL_RELEASE || strong))
46432 {
46433 warning (OPT_Winvalid_memory_model,
46434 "HLE_RELEASE not used with RELEASE or stronger memory model");
46435 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
46436 }
46437 return val;
46438 }
46439
46440 /* Set CLONEI->vecsize_mangle, CLONEI->vecsize_int,
46441 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
46442 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
46443 or number of vecsize_mangle variants that should be emitted. */
46444
46445 static int
46446 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
46447 struct cgraph_simd_clone *clonei,
46448 tree base_type, int num)
46449 {
46450 int ret = 1;
46451
46452 if (clonei->simdlen
46453 && (clonei->simdlen < 2
46454 || clonei->simdlen > 16
46455 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
46456 {
46457 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
46458 "unsupported simdlen %d", clonei->simdlen);
46459 return 0;
46460 }
46461
46462 tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
46463 if (TREE_CODE (ret_type) != VOID_TYPE)
46464 switch (TYPE_MODE (ret_type))
46465 {
46466 case QImode:
46467 case HImode:
46468 case SImode:
46469 case DImode:
46470 case SFmode:
46471 case DFmode:
46472 /* case SCmode: */
46473 /* case DCmode: */
46474 break;
46475 default:
46476 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
46477 "unsupported return type %qT for simd\n", ret_type);
46478 return 0;
46479 }
46480
46481 tree t;
46482 int i;
46483
46484 for (t = DECL_ARGUMENTS (node->decl), i = 0; t; t = DECL_CHAIN (t), i++)
46485 /* FIXME: Shouldn't we allow such arguments if they are uniform? */
46486 switch (TYPE_MODE (TREE_TYPE (t)))
46487 {
46488 case QImode:
46489 case HImode:
46490 case SImode:
46491 case DImode:
46492 case SFmode:
46493 case DFmode:
46494 /* case SCmode: */
46495 /* case DCmode: */
46496 break;
46497 default:
46498 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
46499 "unsupported argument type %qT for simd\n", TREE_TYPE (t));
46500 return 0;
46501 }
46502
46503 if (clonei->cilk_elemental)
46504 {
46505 /* Parse here processor clause. If not present, default to 'b'. */
46506 clonei->vecsize_mangle = 'b';
46507 }
46508 else if (!TREE_PUBLIC (node->decl))
46509 {
46510 /* If the function isn't exported, we can pick up just one ISA
46511 for the clones. */
46512 if (TARGET_AVX2)
46513 clonei->vecsize_mangle = 'd';
46514 else if (TARGET_AVX)
46515 clonei->vecsize_mangle = 'c';
46516 else
46517 clonei->vecsize_mangle = 'b';
46518 ret = 1;
46519 }
46520 else
46521 {
46522 clonei->vecsize_mangle = "bcd"[num];
46523 ret = 3;
46524 }
46525 switch (clonei->vecsize_mangle)
46526 {
46527 case 'b':
46528 clonei->vecsize_int = 128;
46529 clonei->vecsize_float = 128;
46530 break;
46531 case 'c':
46532 clonei->vecsize_int = 128;
46533 clonei->vecsize_float = 256;
46534 break;
46535 case 'd':
46536 clonei->vecsize_int = 256;
46537 clonei->vecsize_float = 256;
46538 break;
46539 }
46540 if (clonei->simdlen == 0)
46541 {
46542 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
46543 clonei->simdlen = clonei->vecsize_int;
46544 else
46545 clonei->simdlen = clonei->vecsize_float;
46546 clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
46547 if (clonei->simdlen > 16)
46548 clonei->simdlen = 16;
46549 }
46550 return ret;
46551 }
46552
46553 /* Add target attribute to SIMD clone NODE if needed. */
46554
46555 static void
46556 ix86_simd_clone_adjust (struct cgraph_node *node)
46557 {
46558 const char *str = NULL;
46559 gcc_assert (node->decl == cfun->decl);
46560 switch (node->simdclone->vecsize_mangle)
46561 {
46562 case 'b':
46563 if (!TARGET_SSE2)
46564 str = "sse2";
46565 break;
46566 case 'c':
46567 if (!TARGET_AVX)
46568 str = "avx";
46569 break;
46570 case 'd':
46571 if (!TARGET_AVX2)
46572 str = "avx2";
46573 break;
46574 default:
46575 gcc_unreachable ();
46576 }
46577 if (str == NULL)
46578 return;
46579 push_cfun (NULL);
46580 tree args = build_tree_list (NULL_TREE, build_string (strlen (str), str));
46581 bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0);
46582 gcc_assert (ok);
46583 pop_cfun ();
46584 ix86_previous_fndecl = NULL_TREE;
46585 ix86_set_current_function (node->decl);
46586 }
46587
46588 /* If SIMD clone NODE can't be used in a vectorized loop
46589 in current function, return -1, otherwise return a badness of using it
46590 (0 if it is most desirable from vecsize_mangle point of view, 1
46591 slightly less desirable, etc.). */
46592
46593 static int
46594 ix86_simd_clone_usable (struct cgraph_node *node)
46595 {
46596 switch (node->simdclone->vecsize_mangle)
46597 {
46598 case 'b':
46599 if (!TARGET_SSE2)
46600 return -1;
46601 if (!TARGET_AVX)
46602 return 0;
46603 return TARGET_AVX2 ? 2 : 1;
46604 case 'c':
46605 if (!TARGET_AVX)
46606 return -1;
46607 return TARGET_AVX2 ? 1 : 0;
46608 break;
46609 case 'd':
46610 if (!TARGET_AVX2)
46611 return -1;
46612 return 0;
46613 default:
46614 gcc_unreachable ();
46615 }
46616 }
46617
46618 /* This function gives out the number of memory references.
46619 This value determines the unrolling factor for
46620 bdver3 and bdver4 architectures. */
46621
46622 static int
46623 ix86_loop_memcount (rtx *x, unsigned *mem_count)
46624 {
46625 if (*x != NULL_RTX && MEM_P (*x))
46626 {
46627 enum machine_mode mode;
46628 unsigned int n_words;
46629
46630 mode = GET_MODE (*x);
46631 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
46632
46633 if (n_words > 4)
46634 (*mem_count)+=2;
46635 else
46636 (*mem_count)+=1;
46637 }
46638 return 0;
46639 }
46640
46641 /* This function adjusts the unroll factor based on
46642 the hardware capabilities. For ex, bdver3 has
46643 a loop buffer which makes unrolling of smaller
46644 loops less important. This function decides the
46645 unroll factor using number of memory references
46646 (value 32 is used) as a heuristic. */
46647
46648 static unsigned
46649 ix86_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
46650 {
46651 basic_block *bbs;
46652 rtx insn;
46653 unsigned i;
46654 unsigned mem_count = 0;
46655
46656 if (!TARGET_ADJUST_UNROLL)
46657 return nunroll;
46658
46659 /* Count the number of memory references within the loop body. */
46660 bbs = get_loop_body (loop);
46661 for (i = 0; i < loop->num_nodes; i++)
46662 {
46663 for (insn = BB_HEAD (bbs[i]); insn != BB_END (bbs[i]); insn = NEXT_INSN (insn))
46664 if (NONDEBUG_INSN_P (insn))
46665 for_each_rtx (&insn, (rtx_function) ix86_loop_memcount, &mem_count);
46666 }
46667 free (bbs);
46668
46669 if (mem_count && mem_count <=32)
46670 return 32/mem_count;
46671
46672 return nunroll;
46673 }
46674
46675
46676 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
46677
46678 static bool
46679 ix86_float_exceptions_rounding_supported_p (void)
46680 {
46681 /* For x87 floating point with standard excess precision handling,
46682 there is no adddf3 pattern (since x87 floating point only has
46683 XFmode operations) so the default hook implementation gets this
46684 wrong. */
46685 return TARGET_80387 || TARGET_SSE_MATH;
46686 }
46687
46688 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
46689
46690 static void
46691 ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
46692 {
46693 if (!TARGET_80387 && !TARGET_SSE_MATH)
46694 return;
46695 tree exceptions_var = create_tmp_var (integer_type_node, NULL);
46696 if (TARGET_80387)
46697 {
46698 tree fenv_index_type = build_index_type (size_int (6));
46699 tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
46700 tree fenv_var = create_tmp_var (fenv_type, NULL);
46701 mark_addressable (fenv_var);
46702 tree fenv_ptr = build_pointer_type (fenv_type);
46703 tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
46704 fenv_addr = fold_convert (ptr_type_node, fenv_addr);
46705 tree fnstenv = ix86_builtins[IX86_BUILTIN_FNSTENV];
46706 tree fldenv = ix86_builtins[IX86_BUILTIN_FLDENV];
46707 tree fnstsw = ix86_builtins[IX86_BUILTIN_FNSTSW];
46708 tree fnclex = ix86_builtins[IX86_BUILTIN_FNCLEX];
46709 tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
46710 tree hold_fnclex = build_call_expr (fnclex, 0);
46711 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_fnstenv,
46712 hold_fnclex);
46713 *clear = build_call_expr (fnclex, 0);
46714 tree sw_var = create_tmp_var (short_unsigned_type_node, NULL);
46715 mark_addressable (sw_var);
46716 tree su_ptr = build_pointer_type (short_unsigned_type_node);
46717 tree sw_addr = build1 (ADDR_EXPR, su_ptr, sw_var);
46718 tree fnstsw_call = build_call_expr (fnstsw, 1, sw_addr);
46719 tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
46720 tree update_mod = build2 (MODIFY_EXPR, integer_type_node,
46721 exceptions_var, exceptions_x87);
46722 *update = build2 (COMPOUND_EXPR, integer_type_node,
46723 fnstsw_call, update_mod);
46724 tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
46725 *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
46726 }
46727 if (TARGET_SSE_MATH)
46728 {
46729 tree mxcsr_orig_var = create_tmp_var (unsigned_type_node, NULL);
46730 tree mxcsr_mod_var = create_tmp_var (unsigned_type_node, NULL);
46731 tree stmxcsr = ix86_builtins[IX86_BUILTIN_STMXCSR];
46732 tree ldmxcsr = ix86_builtins[IX86_BUILTIN_LDMXCSR];
46733 tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
46734 tree hold_assign_orig = build2 (MODIFY_EXPR, unsigned_type_node,
46735 mxcsr_orig_var, stmxcsr_hold_call);
46736 tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
46737 mxcsr_orig_var,
46738 build_int_cst (unsigned_type_node, 0x1f80));
46739 hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
46740 build_int_cst (unsigned_type_node, 0xffffffc0));
46741 tree hold_assign_mod = build2 (MODIFY_EXPR, unsigned_type_node,
46742 mxcsr_mod_var, hold_mod_val);
46743 tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
46744 tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
46745 hold_assign_orig, hold_assign_mod);
46746 hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
46747 ldmxcsr_hold_call);
46748 if (*hold)
46749 *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
46750 else
46751 *hold = hold_all;
46752 tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
46753 if (*clear)
46754 *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
46755 ldmxcsr_clear_call);
46756 else
46757 *clear = ldmxcsr_clear_call;
46758 tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
46759 tree exceptions_sse = fold_convert (integer_type_node,
46760 stxmcsr_update_call);
46761 if (*update)
46762 {
46763 tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
46764 exceptions_var, exceptions_sse);
46765 tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
46766 exceptions_var, exceptions_mod);
46767 *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
46768 exceptions_assign);
46769 }
46770 else
46771 *update = build2 (MODIFY_EXPR, integer_type_node,
46772 exceptions_var, exceptions_sse);
46773 tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
46774 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
46775 ldmxcsr_update_call);
46776 }
46777 tree atomic_feraiseexcept
46778 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
46779 tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
46780 1, exceptions_var);
46781 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
46782 atomic_feraiseexcept_call);
46783 }
46784
46785 /* Initialize the GCC target structure. */
46786 #undef TARGET_RETURN_IN_MEMORY
46787 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
46788
46789 #undef TARGET_LEGITIMIZE_ADDRESS
46790 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
46791
46792 #undef TARGET_ATTRIBUTE_TABLE
46793 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
46794 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
46795 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
46796 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
46797 # undef TARGET_MERGE_DECL_ATTRIBUTES
46798 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
46799 #endif
46800
46801 #undef TARGET_COMP_TYPE_ATTRIBUTES
46802 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
46803
46804 #undef TARGET_INIT_BUILTINS
46805 #define TARGET_INIT_BUILTINS ix86_init_builtins
46806 #undef TARGET_BUILTIN_DECL
46807 #define TARGET_BUILTIN_DECL ix86_builtin_decl
46808 #undef TARGET_EXPAND_BUILTIN
46809 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
46810
46811 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
46812 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
46813 ix86_builtin_vectorized_function
46814
46815 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
46816 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
46817
46818 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
46819 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
46820
46821 #undef TARGET_VECTORIZE_BUILTIN_GATHER
46822 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
46823
46824 #undef TARGET_BUILTIN_RECIPROCAL
46825 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
46826
46827 #undef TARGET_ASM_FUNCTION_EPILOGUE
46828 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
46829
46830 #undef TARGET_ENCODE_SECTION_INFO
46831 #ifndef SUBTARGET_ENCODE_SECTION_INFO
46832 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
46833 #else
46834 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
46835 #endif
46836
46837 #undef TARGET_ASM_OPEN_PAREN
46838 #define TARGET_ASM_OPEN_PAREN ""
46839 #undef TARGET_ASM_CLOSE_PAREN
46840 #define TARGET_ASM_CLOSE_PAREN ""
46841
46842 #undef TARGET_ASM_BYTE_OP
46843 #define TARGET_ASM_BYTE_OP ASM_BYTE
46844
46845 #undef TARGET_ASM_ALIGNED_HI_OP
46846 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
46847 #undef TARGET_ASM_ALIGNED_SI_OP
46848 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
46849 #ifdef ASM_QUAD
46850 #undef TARGET_ASM_ALIGNED_DI_OP
46851 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
46852 #endif
46853
46854 #undef TARGET_PROFILE_BEFORE_PROLOGUE
46855 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
46856
46857 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
46858 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
46859
46860 #undef TARGET_ASM_UNALIGNED_HI_OP
46861 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
46862 #undef TARGET_ASM_UNALIGNED_SI_OP
46863 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
46864 #undef TARGET_ASM_UNALIGNED_DI_OP
46865 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
46866
46867 #undef TARGET_PRINT_OPERAND
46868 #define TARGET_PRINT_OPERAND ix86_print_operand
46869 #undef TARGET_PRINT_OPERAND_ADDRESS
46870 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
46871 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
46872 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
46873 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
46874 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
46875
46876 #undef TARGET_SCHED_INIT_GLOBAL
46877 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
46878 #undef TARGET_SCHED_ADJUST_COST
46879 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
46880 #undef TARGET_SCHED_ISSUE_RATE
46881 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
46882 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
46883 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
46884 ia32_multipass_dfa_lookahead
46885 #undef TARGET_SCHED_MACRO_FUSION_P
46886 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
46887 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
46888 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
46889
46890 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
46891 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
46892
46893 #undef TARGET_MEMMODEL_CHECK
46894 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
46895
46896 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
46897 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
46898
46899 #ifdef HAVE_AS_TLS
46900 #undef TARGET_HAVE_TLS
46901 #define TARGET_HAVE_TLS true
46902 #endif
46903 #undef TARGET_CANNOT_FORCE_CONST_MEM
46904 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
46905 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
46906 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
46907
46908 #undef TARGET_DELEGITIMIZE_ADDRESS
46909 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
46910
46911 #undef TARGET_MS_BITFIELD_LAYOUT_P
46912 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
46913
46914 #if TARGET_MACHO
46915 #undef TARGET_BINDS_LOCAL_P
46916 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
46917 #endif
46918 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
46919 #undef TARGET_BINDS_LOCAL_P
46920 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
46921 #endif
46922
46923 #undef TARGET_ASM_OUTPUT_MI_THUNK
46924 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
46925 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
46926 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
46927
46928 #undef TARGET_ASM_FILE_START
46929 #define TARGET_ASM_FILE_START x86_file_start
46930
46931 #undef TARGET_OPTION_OVERRIDE
46932 #define TARGET_OPTION_OVERRIDE ix86_option_override
46933
46934 #undef TARGET_REGISTER_MOVE_COST
46935 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
46936 #undef TARGET_MEMORY_MOVE_COST
46937 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
46938 #undef TARGET_RTX_COSTS
46939 #define TARGET_RTX_COSTS ix86_rtx_costs
46940 #undef TARGET_ADDRESS_COST
46941 #define TARGET_ADDRESS_COST ix86_address_cost
46942
46943 #undef TARGET_FIXED_CONDITION_CODE_REGS
46944 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
46945 #undef TARGET_CC_MODES_COMPATIBLE
46946 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
46947
46948 #undef TARGET_MACHINE_DEPENDENT_REORG
46949 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
46950
46951 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
46952 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
46953
46954 #undef TARGET_BUILD_BUILTIN_VA_LIST
46955 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
46956
46957 #undef TARGET_FOLD_BUILTIN
46958 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
46959
46960 #undef TARGET_COMPARE_VERSION_PRIORITY
46961 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
46962
46963 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
46964 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
46965 ix86_generate_version_dispatcher_body
46966
46967 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
46968 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
46969 ix86_get_function_versions_dispatcher
46970
46971 #undef TARGET_ENUM_VA_LIST_P
46972 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
46973
46974 #undef TARGET_FN_ABI_VA_LIST
46975 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
46976
46977 #undef TARGET_CANONICAL_VA_LIST_TYPE
46978 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
46979
46980 #undef TARGET_EXPAND_BUILTIN_VA_START
46981 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
46982
46983 #undef TARGET_MD_ASM_CLOBBERS
46984 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
46985
46986 #undef TARGET_PROMOTE_PROTOTYPES
46987 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
46988 #undef TARGET_SETUP_INCOMING_VARARGS
46989 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
46990 #undef TARGET_MUST_PASS_IN_STACK
46991 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
46992 #undef TARGET_FUNCTION_ARG_ADVANCE
46993 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
46994 #undef TARGET_FUNCTION_ARG
46995 #define TARGET_FUNCTION_ARG ix86_function_arg
46996 #undef TARGET_FUNCTION_ARG_BOUNDARY
46997 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
46998 #undef TARGET_PASS_BY_REFERENCE
46999 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
47000 #undef TARGET_INTERNAL_ARG_POINTER
47001 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
47002 #undef TARGET_UPDATE_STACK_BOUNDARY
47003 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
47004 #undef TARGET_GET_DRAP_RTX
47005 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
47006 #undef TARGET_STRICT_ARGUMENT_NAMING
47007 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
47008 #undef TARGET_STATIC_CHAIN
47009 #define TARGET_STATIC_CHAIN ix86_static_chain
47010 #undef TARGET_TRAMPOLINE_INIT
47011 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
47012 #undef TARGET_RETURN_POPS_ARGS
47013 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
47014
47015 #undef TARGET_LEGITIMATE_COMBINED_INSN
47016 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
47017
47018 #undef TARGET_ASAN_SHADOW_OFFSET
47019 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
47020
47021 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
47022 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
47023
47024 #undef TARGET_SCALAR_MODE_SUPPORTED_P
47025 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
47026
47027 #undef TARGET_VECTOR_MODE_SUPPORTED_P
47028 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
47029
47030 #undef TARGET_C_MODE_FOR_SUFFIX
47031 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
47032
47033 #ifdef HAVE_AS_TLS
47034 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
47035 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
47036 #endif
47037
47038 #ifdef SUBTARGET_INSERT_ATTRIBUTES
47039 #undef TARGET_INSERT_ATTRIBUTES
47040 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
47041 #endif
47042
47043 #undef TARGET_MANGLE_TYPE
47044 #define TARGET_MANGLE_TYPE ix86_mangle_type
47045
47046 #if !TARGET_MACHO
47047 #undef TARGET_STACK_PROTECT_FAIL
47048 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
47049 #endif
47050
47051 #undef TARGET_FUNCTION_VALUE
47052 #define TARGET_FUNCTION_VALUE ix86_function_value
47053
47054 #undef TARGET_FUNCTION_VALUE_REGNO_P
47055 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
47056
47057 #undef TARGET_PROMOTE_FUNCTION_MODE
47058 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
47059
47060 #undef TARGET_MEMBER_TYPE_FORCES_BLK
47061 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
47062
47063 #undef TARGET_INSTANTIATE_DECLS
47064 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
47065
47066 #undef TARGET_SECONDARY_RELOAD
47067 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
47068
47069 #undef TARGET_CLASS_MAX_NREGS
47070 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
47071
47072 #undef TARGET_PREFERRED_RELOAD_CLASS
47073 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
47074 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
47075 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
47076 #undef TARGET_CLASS_LIKELY_SPILLED_P
47077 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
47078
47079 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
47080 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
47081 ix86_builtin_vectorization_cost
47082 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
47083 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
47084 ix86_vectorize_vec_perm_const_ok
47085 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
47086 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
47087 ix86_preferred_simd_mode
47088 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
47089 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
47090 ix86_autovectorize_vector_sizes
47091 #undef TARGET_VECTORIZE_INIT_COST
47092 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
47093 #undef TARGET_VECTORIZE_ADD_STMT_COST
47094 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
47095 #undef TARGET_VECTORIZE_FINISH_COST
47096 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
47097 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
47098 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
47099
47100 #undef TARGET_SET_CURRENT_FUNCTION
47101 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
47102
47103 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
47104 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
47105
47106 #undef TARGET_OPTION_SAVE
47107 #define TARGET_OPTION_SAVE ix86_function_specific_save
47108
47109 #undef TARGET_OPTION_RESTORE
47110 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
47111
47112 #undef TARGET_OPTION_PRINT
47113 #define TARGET_OPTION_PRINT ix86_function_specific_print
47114
47115 #undef TARGET_OPTION_FUNCTION_VERSIONS
47116 #define TARGET_OPTION_FUNCTION_VERSIONS ix86_function_versions
47117
47118 #undef TARGET_CAN_INLINE_P
47119 #define TARGET_CAN_INLINE_P ix86_can_inline_p
47120
47121 #undef TARGET_EXPAND_TO_RTL_HOOK
47122 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
47123
47124 #undef TARGET_LEGITIMATE_ADDRESS_P
47125 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
47126
47127 #undef TARGET_LRA_P
47128 #define TARGET_LRA_P hook_bool_void_true
47129
47130 #undef TARGET_REGISTER_PRIORITY
47131 #define TARGET_REGISTER_PRIORITY ix86_register_priority
47132
47133 #undef TARGET_REGISTER_USAGE_LEVELING_P
47134 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
47135
47136 #undef TARGET_LEGITIMATE_CONSTANT_P
47137 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
47138
47139 #undef TARGET_FRAME_POINTER_REQUIRED
47140 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
47141
47142 #undef TARGET_CAN_ELIMINATE
47143 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
47144
47145 #undef TARGET_EXTRA_LIVE_ON_ENTRY
47146 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
47147
47148 #undef TARGET_ASM_CODE_END
47149 #define TARGET_ASM_CODE_END ix86_code_end
47150
47151 #undef TARGET_CONDITIONAL_REGISTER_USAGE
47152 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
47153
47154 #if TARGET_MACHO
47155 #undef TARGET_INIT_LIBFUNCS
47156 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
47157 #endif
47158
47159 #undef TARGET_LOOP_UNROLL_ADJUST
47160 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
47161
47162 #undef TARGET_SPILL_CLASS
47163 #define TARGET_SPILL_CLASS ix86_spill_class
47164
47165 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
47166 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
47167 ix86_simd_clone_compute_vecsize_and_simdlen
47168
47169 #undef TARGET_SIMD_CLONE_ADJUST
47170 #define TARGET_SIMD_CLONE_ADJUST \
47171 ix86_simd_clone_adjust
47172
47173 #undef TARGET_SIMD_CLONE_USABLE
47174 #define TARGET_SIMD_CLONE_USABLE \
47175 ix86_simd_clone_usable
47176
47177 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
47178 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
47179 ix86_float_exceptions_rounding_supported_p
47180
47181 struct gcc_target targetm = TARGET_INITIALIZER;
47182 \f
47183 #include "gt-i386.h"