Add MOVBE and RDRND for AMD bdver4
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2014 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
10
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "stringpool.h"
27 #include "attribs.h"
28 #include "calls.h"
29 #include "stor-layout.h"
30 #include "varasm.h"
31 #include "tm_p.h"
32 #include "regs.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
36 #include "output.h"
37 #include "insn-codes.h"
38 #include "insn-attr.h"
39 #include "flags.h"
40 #include "except.h"
41 #include "function.h"
42 #include "recog.h"
43 #include "expr.h"
44 #include "optabs.h"
45 #include "diagnostic-core.h"
46 #include "toplev.h"
47 #include "basic-block.h"
48 #include "ggc.h"
49 #include "target.h"
50 #include "target-def.h"
51 #include "common/common-target.h"
52 #include "langhooks.h"
53 #include "reload.h"
54 #include "cgraph.h"
55 #include "pointer-set.h"
56 #include "hash-table.h"
57 #include "vec.h"
58 #include "basic-block.h"
59 #include "tree-ssa-alias.h"
60 #include "internal-fn.h"
61 #include "gimple-fold.h"
62 #include "tree-eh.h"
63 #include "gimple-expr.h"
64 #include "is-a.h"
65 #include "gimple.h"
66 #include "gimplify.h"
67 #include "cfgloop.h"
68 #include "dwarf2.h"
69 #include "df.h"
70 #include "tm-constrs.h"
71 #include "params.h"
72 #include "cselib.h"
73 #include "debug.h"
74 #include "sched-int.h"
75 #include "sbitmap.h"
76 #include "fibheap.h"
77 #include "opts.h"
78 #include "diagnostic.h"
79 #include "dumpfile.h"
80 #include "tree-pass.h"
81 #include "wide-int.h"
82 #include "context.h"
83 #include "pass_manager.h"
84 #include "target-globals.h"
85 #include "tree-vectorizer.h"
86 #include "shrink-wrap.h"
87 #include "builtins.h"
88
89 static rtx legitimize_dllimport_symbol (rtx, bool);
90 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
91 static rtx legitimize_pe_coff_symbol (rtx, bool);
92
93 #ifndef CHECK_STACK_LIMIT
94 #define CHECK_STACK_LIMIT (-1)
95 #endif
96
97 /* Return index of given mode in mult and division cost tables. */
98 #define MODE_INDEX(mode) \
99 ((mode) == QImode ? 0 \
100 : (mode) == HImode ? 1 \
101 : (mode) == SImode ? 2 \
102 : (mode) == DImode ? 3 \
103 : 4)
104
105 /* Processor costs (relative to an add) */
106 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
107 #define COSTS_N_BYTES(N) ((N) * 2)
108
109 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
110
111 static stringop_algs ix86_size_memcpy[2] = {
112 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
113 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
114 static stringop_algs ix86_size_memset[2] = {
115 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
116 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
117
118 const
119 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
120 COSTS_N_BYTES (2), /* cost of an add instruction */
121 COSTS_N_BYTES (3), /* cost of a lea instruction */
122 COSTS_N_BYTES (2), /* variable shift costs */
123 COSTS_N_BYTES (3), /* constant shift costs */
124 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
125 COSTS_N_BYTES (3), /* HI */
126 COSTS_N_BYTES (3), /* SI */
127 COSTS_N_BYTES (3), /* DI */
128 COSTS_N_BYTES (5)}, /* other */
129 0, /* cost of multiply per each bit set */
130 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
131 COSTS_N_BYTES (3), /* HI */
132 COSTS_N_BYTES (3), /* SI */
133 COSTS_N_BYTES (3), /* DI */
134 COSTS_N_BYTES (5)}, /* other */
135 COSTS_N_BYTES (3), /* cost of movsx */
136 COSTS_N_BYTES (3), /* cost of movzx */
137 0, /* "large" insn */
138 2, /* MOVE_RATIO */
139 2, /* cost for loading QImode using movzbl */
140 {2, 2, 2}, /* cost of loading integer registers
141 in QImode, HImode and SImode.
142 Relative to reg-reg move (2). */
143 {2, 2, 2}, /* cost of storing integer registers */
144 2, /* cost of reg,reg fld/fst */
145 {2, 2, 2}, /* cost of loading fp registers
146 in SFmode, DFmode and XFmode */
147 {2, 2, 2}, /* cost of storing fp registers
148 in SFmode, DFmode and XFmode */
149 3, /* cost of moving MMX register */
150 {3, 3}, /* cost of loading MMX registers
151 in SImode and DImode */
152 {3, 3}, /* cost of storing MMX registers
153 in SImode and DImode */
154 3, /* cost of moving SSE register */
155 {3, 3, 3}, /* cost of loading SSE registers
156 in SImode, DImode and TImode */
157 {3, 3, 3}, /* cost of storing SSE registers
158 in SImode, DImode and TImode */
159 3, /* MMX or SSE register to integer */
160 0, /* size of l1 cache */
161 0, /* size of l2 cache */
162 0, /* size of prefetch block */
163 0, /* number of parallel prefetches */
164 2, /* Branch cost */
165 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
166 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
167 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
168 COSTS_N_BYTES (2), /* cost of FABS instruction. */
169 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
170 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
171 ix86_size_memcpy,
172 ix86_size_memset,
173 1, /* scalar_stmt_cost. */
174 1, /* scalar load_cost. */
175 1, /* scalar_store_cost. */
176 1, /* vec_stmt_cost. */
177 1, /* vec_to_scalar_cost. */
178 1, /* scalar_to_vec_cost. */
179 1, /* vec_align_load_cost. */
180 1, /* vec_unalign_load_cost. */
181 1, /* vec_store_cost. */
182 1, /* cond_taken_branch_cost. */
183 1, /* cond_not_taken_branch_cost. */
184 };
185
186 /* Processor costs (relative to an add) */
187 static stringop_algs i386_memcpy[2] = {
188 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
189 DUMMY_STRINGOP_ALGS};
190 static stringop_algs i386_memset[2] = {
191 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
192 DUMMY_STRINGOP_ALGS};
193
194 static const
195 struct processor_costs i386_cost = { /* 386 specific costs */
196 COSTS_N_INSNS (1), /* cost of an add instruction */
197 COSTS_N_INSNS (1), /* cost of a lea instruction */
198 COSTS_N_INSNS (3), /* variable shift costs */
199 COSTS_N_INSNS (2), /* constant shift costs */
200 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
201 COSTS_N_INSNS (6), /* HI */
202 COSTS_N_INSNS (6), /* SI */
203 COSTS_N_INSNS (6), /* DI */
204 COSTS_N_INSNS (6)}, /* other */
205 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
206 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
207 COSTS_N_INSNS (23), /* HI */
208 COSTS_N_INSNS (23), /* SI */
209 COSTS_N_INSNS (23), /* DI */
210 COSTS_N_INSNS (23)}, /* other */
211 COSTS_N_INSNS (3), /* cost of movsx */
212 COSTS_N_INSNS (2), /* cost of movzx */
213 15, /* "large" insn */
214 3, /* MOVE_RATIO */
215 4, /* cost for loading QImode using movzbl */
216 {2, 4, 2}, /* cost of loading integer registers
217 in QImode, HImode and SImode.
218 Relative to reg-reg move (2). */
219 {2, 4, 2}, /* cost of storing integer registers */
220 2, /* cost of reg,reg fld/fst */
221 {8, 8, 8}, /* cost of loading fp registers
222 in SFmode, DFmode and XFmode */
223 {8, 8, 8}, /* cost of storing fp registers
224 in SFmode, DFmode and XFmode */
225 2, /* cost of moving MMX register */
226 {4, 8}, /* cost of loading MMX registers
227 in SImode and DImode */
228 {4, 8}, /* cost of storing MMX registers
229 in SImode and DImode */
230 2, /* cost of moving SSE register */
231 {4, 8, 16}, /* cost of loading SSE registers
232 in SImode, DImode and TImode */
233 {4, 8, 16}, /* cost of storing SSE registers
234 in SImode, DImode and TImode */
235 3, /* MMX or SSE register to integer */
236 0, /* size of l1 cache */
237 0, /* size of l2 cache */
238 0, /* size of prefetch block */
239 0, /* number of parallel prefetches */
240 1, /* Branch cost */
241 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
242 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
243 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
244 COSTS_N_INSNS (22), /* cost of FABS instruction. */
245 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
246 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
247 i386_memcpy,
248 i386_memset,
249 1, /* scalar_stmt_cost. */
250 1, /* scalar load_cost. */
251 1, /* scalar_store_cost. */
252 1, /* vec_stmt_cost. */
253 1, /* vec_to_scalar_cost. */
254 1, /* scalar_to_vec_cost. */
255 1, /* vec_align_load_cost. */
256 2, /* vec_unalign_load_cost. */
257 1, /* vec_store_cost. */
258 3, /* cond_taken_branch_cost. */
259 1, /* cond_not_taken_branch_cost. */
260 };
261
262 static stringop_algs i486_memcpy[2] = {
263 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
264 DUMMY_STRINGOP_ALGS};
265 static stringop_algs i486_memset[2] = {
266 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
267 DUMMY_STRINGOP_ALGS};
268
269 static const
270 struct processor_costs i486_cost = { /* 486 specific costs */
271 COSTS_N_INSNS (1), /* cost of an add instruction */
272 COSTS_N_INSNS (1), /* cost of a lea instruction */
273 COSTS_N_INSNS (3), /* variable shift costs */
274 COSTS_N_INSNS (2), /* constant shift costs */
275 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
276 COSTS_N_INSNS (12), /* HI */
277 COSTS_N_INSNS (12), /* SI */
278 COSTS_N_INSNS (12), /* DI */
279 COSTS_N_INSNS (12)}, /* other */
280 1, /* cost of multiply per each bit set */
281 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
282 COSTS_N_INSNS (40), /* HI */
283 COSTS_N_INSNS (40), /* SI */
284 COSTS_N_INSNS (40), /* DI */
285 COSTS_N_INSNS (40)}, /* other */
286 COSTS_N_INSNS (3), /* cost of movsx */
287 COSTS_N_INSNS (2), /* cost of movzx */
288 15, /* "large" insn */
289 3, /* MOVE_RATIO */
290 4, /* cost for loading QImode using movzbl */
291 {2, 4, 2}, /* cost of loading integer registers
292 in QImode, HImode and SImode.
293 Relative to reg-reg move (2). */
294 {2, 4, 2}, /* cost of storing integer registers */
295 2, /* cost of reg,reg fld/fst */
296 {8, 8, 8}, /* cost of loading fp registers
297 in SFmode, DFmode and XFmode */
298 {8, 8, 8}, /* cost of storing fp registers
299 in SFmode, DFmode and XFmode */
300 2, /* cost of moving MMX register */
301 {4, 8}, /* cost of loading MMX registers
302 in SImode and DImode */
303 {4, 8}, /* cost of storing MMX registers
304 in SImode and DImode */
305 2, /* cost of moving SSE register */
306 {4, 8, 16}, /* cost of loading SSE registers
307 in SImode, DImode and TImode */
308 {4, 8, 16}, /* cost of storing SSE registers
309 in SImode, DImode and TImode */
310 3, /* MMX or SSE register to integer */
311 4, /* size of l1 cache. 486 has 8kB cache
312 shared for code and data, so 4kB is
313 not really precise. */
314 4, /* size of l2 cache */
315 0, /* size of prefetch block */
316 0, /* number of parallel prefetches */
317 1, /* Branch cost */
318 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
319 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
320 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
321 COSTS_N_INSNS (3), /* cost of FABS instruction. */
322 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
323 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
324 i486_memcpy,
325 i486_memset,
326 1, /* scalar_stmt_cost. */
327 1, /* scalar load_cost. */
328 1, /* scalar_store_cost. */
329 1, /* vec_stmt_cost. */
330 1, /* vec_to_scalar_cost. */
331 1, /* scalar_to_vec_cost. */
332 1, /* vec_align_load_cost. */
333 2, /* vec_unalign_load_cost. */
334 1, /* vec_store_cost. */
335 3, /* cond_taken_branch_cost. */
336 1, /* cond_not_taken_branch_cost. */
337 };
338
339 static stringop_algs pentium_memcpy[2] = {
340 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
341 DUMMY_STRINGOP_ALGS};
342 static stringop_algs pentium_memset[2] = {
343 {libcall, {{-1, rep_prefix_4_byte, false}}},
344 DUMMY_STRINGOP_ALGS};
345
346 static const
347 struct processor_costs pentium_cost = {
348 COSTS_N_INSNS (1), /* cost of an add instruction */
349 COSTS_N_INSNS (1), /* cost of a lea instruction */
350 COSTS_N_INSNS (4), /* variable shift costs */
351 COSTS_N_INSNS (1), /* constant shift costs */
352 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
353 COSTS_N_INSNS (11), /* HI */
354 COSTS_N_INSNS (11), /* SI */
355 COSTS_N_INSNS (11), /* DI */
356 COSTS_N_INSNS (11)}, /* other */
357 0, /* cost of multiply per each bit set */
358 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
359 COSTS_N_INSNS (25), /* HI */
360 COSTS_N_INSNS (25), /* SI */
361 COSTS_N_INSNS (25), /* DI */
362 COSTS_N_INSNS (25)}, /* other */
363 COSTS_N_INSNS (3), /* cost of movsx */
364 COSTS_N_INSNS (2), /* cost of movzx */
365 8, /* "large" insn */
366 6, /* MOVE_RATIO */
367 6, /* cost for loading QImode using movzbl */
368 {2, 4, 2}, /* cost of loading integer registers
369 in QImode, HImode and SImode.
370 Relative to reg-reg move (2). */
371 {2, 4, 2}, /* cost of storing integer registers */
372 2, /* cost of reg,reg fld/fst */
373 {2, 2, 6}, /* cost of loading fp registers
374 in SFmode, DFmode and XFmode */
375 {4, 4, 6}, /* cost of storing fp registers
376 in SFmode, DFmode and XFmode */
377 8, /* cost of moving MMX register */
378 {8, 8}, /* cost of loading MMX registers
379 in SImode and DImode */
380 {8, 8}, /* cost of storing MMX registers
381 in SImode and DImode */
382 2, /* cost of moving SSE register */
383 {4, 8, 16}, /* cost of loading SSE registers
384 in SImode, DImode and TImode */
385 {4, 8, 16}, /* cost of storing SSE registers
386 in SImode, DImode and TImode */
387 3, /* MMX or SSE register to integer */
388 8, /* size of l1 cache. */
389 8, /* size of l2 cache */
390 0, /* size of prefetch block */
391 0, /* number of parallel prefetches */
392 2, /* Branch cost */
393 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
394 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
395 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
396 COSTS_N_INSNS (1), /* cost of FABS instruction. */
397 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
398 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
399 pentium_memcpy,
400 pentium_memset,
401 1, /* scalar_stmt_cost. */
402 1, /* scalar load_cost. */
403 1, /* scalar_store_cost. */
404 1, /* vec_stmt_cost. */
405 1, /* vec_to_scalar_cost. */
406 1, /* scalar_to_vec_cost. */
407 1, /* vec_align_load_cost. */
408 2, /* vec_unalign_load_cost. */
409 1, /* vec_store_cost. */
410 3, /* cond_taken_branch_cost. */
411 1, /* cond_not_taken_branch_cost. */
412 };
413
414 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
415 (we ensure the alignment). For small blocks inline loop is still a
416 noticeable win, for bigger blocks either rep movsl or rep movsb is
417 way to go. Rep movsb has apparently more expensive startup time in CPU,
418 but after 4K the difference is down in the noise. */
419 static stringop_algs pentiumpro_memcpy[2] = {
420 {rep_prefix_4_byte, {{128, loop, false}, {1024, unrolled_loop, false},
421 {8192, rep_prefix_4_byte, false},
422 {-1, rep_prefix_1_byte, false}}},
423 DUMMY_STRINGOP_ALGS};
424 static stringop_algs pentiumpro_memset[2] = {
425 {rep_prefix_4_byte, {{1024, unrolled_loop, false},
426 {8192, rep_prefix_4_byte, false},
427 {-1, libcall, false}}},
428 DUMMY_STRINGOP_ALGS};
429 static const
430 struct processor_costs pentiumpro_cost = {
431 COSTS_N_INSNS (1), /* cost of an add instruction */
432 COSTS_N_INSNS (1), /* cost of a lea instruction */
433 COSTS_N_INSNS (1), /* variable shift costs */
434 COSTS_N_INSNS (1), /* constant shift costs */
435 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
436 COSTS_N_INSNS (4), /* HI */
437 COSTS_N_INSNS (4), /* SI */
438 COSTS_N_INSNS (4), /* DI */
439 COSTS_N_INSNS (4)}, /* other */
440 0, /* cost of multiply per each bit set */
441 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
442 COSTS_N_INSNS (17), /* HI */
443 COSTS_N_INSNS (17), /* SI */
444 COSTS_N_INSNS (17), /* DI */
445 COSTS_N_INSNS (17)}, /* other */
446 COSTS_N_INSNS (1), /* cost of movsx */
447 COSTS_N_INSNS (1), /* cost of movzx */
448 8, /* "large" insn */
449 6, /* MOVE_RATIO */
450 2, /* cost for loading QImode using movzbl */
451 {4, 4, 4}, /* cost of loading integer registers
452 in QImode, HImode and SImode.
453 Relative to reg-reg move (2). */
454 {2, 2, 2}, /* cost of storing integer registers */
455 2, /* cost of reg,reg fld/fst */
456 {2, 2, 6}, /* cost of loading fp registers
457 in SFmode, DFmode and XFmode */
458 {4, 4, 6}, /* cost of storing fp registers
459 in SFmode, DFmode and XFmode */
460 2, /* cost of moving MMX register */
461 {2, 2}, /* cost of loading MMX registers
462 in SImode and DImode */
463 {2, 2}, /* cost of storing MMX registers
464 in SImode and DImode */
465 2, /* cost of moving SSE register */
466 {2, 2, 8}, /* cost of loading SSE registers
467 in SImode, DImode and TImode */
468 {2, 2, 8}, /* cost of storing SSE registers
469 in SImode, DImode and TImode */
470 3, /* MMX or SSE register to integer */
471 8, /* size of l1 cache. */
472 256, /* size of l2 cache */
473 32, /* size of prefetch block */
474 6, /* number of parallel prefetches */
475 2, /* Branch cost */
476 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
477 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
478 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
479 COSTS_N_INSNS (2), /* cost of FABS instruction. */
480 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
481 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
482 pentiumpro_memcpy,
483 pentiumpro_memset,
484 1, /* scalar_stmt_cost. */
485 1, /* scalar load_cost. */
486 1, /* scalar_store_cost. */
487 1, /* vec_stmt_cost. */
488 1, /* vec_to_scalar_cost. */
489 1, /* scalar_to_vec_cost. */
490 1, /* vec_align_load_cost. */
491 2, /* vec_unalign_load_cost. */
492 1, /* vec_store_cost. */
493 3, /* cond_taken_branch_cost. */
494 1, /* cond_not_taken_branch_cost. */
495 };
496
497 static stringop_algs geode_memcpy[2] = {
498 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
499 DUMMY_STRINGOP_ALGS};
500 static stringop_algs geode_memset[2] = {
501 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
502 DUMMY_STRINGOP_ALGS};
503 static const
504 struct processor_costs geode_cost = {
505 COSTS_N_INSNS (1), /* cost of an add instruction */
506 COSTS_N_INSNS (1), /* cost of a lea instruction */
507 COSTS_N_INSNS (2), /* variable shift costs */
508 COSTS_N_INSNS (1), /* constant shift costs */
509 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
510 COSTS_N_INSNS (4), /* HI */
511 COSTS_N_INSNS (7), /* SI */
512 COSTS_N_INSNS (7), /* DI */
513 COSTS_N_INSNS (7)}, /* other */
514 0, /* cost of multiply per each bit set */
515 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
516 COSTS_N_INSNS (23), /* HI */
517 COSTS_N_INSNS (39), /* SI */
518 COSTS_N_INSNS (39), /* DI */
519 COSTS_N_INSNS (39)}, /* other */
520 COSTS_N_INSNS (1), /* cost of movsx */
521 COSTS_N_INSNS (1), /* cost of movzx */
522 8, /* "large" insn */
523 4, /* MOVE_RATIO */
524 1, /* cost for loading QImode using movzbl */
525 {1, 1, 1}, /* cost of loading integer registers
526 in QImode, HImode and SImode.
527 Relative to reg-reg move (2). */
528 {1, 1, 1}, /* cost of storing integer registers */
529 1, /* cost of reg,reg fld/fst */
530 {1, 1, 1}, /* cost of loading fp registers
531 in SFmode, DFmode and XFmode */
532 {4, 6, 6}, /* cost of storing fp registers
533 in SFmode, DFmode and XFmode */
534
535 1, /* cost of moving MMX register */
536 {1, 1}, /* cost of loading MMX registers
537 in SImode and DImode */
538 {1, 1}, /* cost of storing MMX registers
539 in SImode and DImode */
540 1, /* cost of moving SSE register */
541 {1, 1, 1}, /* cost of loading SSE registers
542 in SImode, DImode and TImode */
543 {1, 1, 1}, /* cost of storing SSE registers
544 in SImode, DImode and TImode */
545 1, /* MMX or SSE register to integer */
546 64, /* size of l1 cache. */
547 128, /* size of l2 cache. */
548 32, /* size of prefetch block */
549 1, /* number of parallel prefetches */
550 1, /* Branch cost */
551 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
552 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
553 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
554 COSTS_N_INSNS (1), /* cost of FABS instruction. */
555 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
556 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
557 geode_memcpy,
558 geode_memset,
559 1, /* scalar_stmt_cost. */
560 1, /* scalar load_cost. */
561 1, /* scalar_store_cost. */
562 1, /* vec_stmt_cost. */
563 1, /* vec_to_scalar_cost. */
564 1, /* scalar_to_vec_cost. */
565 1, /* vec_align_load_cost. */
566 2, /* vec_unalign_load_cost. */
567 1, /* vec_store_cost. */
568 3, /* cond_taken_branch_cost. */
569 1, /* cond_not_taken_branch_cost. */
570 };
571
572 static stringop_algs k6_memcpy[2] = {
573 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
574 DUMMY_STRINGOP_ALGS};
575 static stringop_algs k6_memset[2] = {
576 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
577 DUMMY_STRINGOP_ALGS};
578 static const
579 struct processor_costs k6_cost = {
580 COSTS_N_INSNS (1), /* cost of an add instruction */
581 COSTS_N_INSNS (2), /* cost of a lea instruction */
582 COSTS_N_INSNS (1), /* variable shift costs */
583 COSTS_N_INSNS (1), /* constant shift costs */
584 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
585 COSTS_N_INSNS (3), /* HI */
586 COSTS_N_INSNS (3), /* SI */
587 COSTS_N_INSNS (3), /* DI */
588 COSTS_N_INSNS (3)}, /* other */
589 0, /* cost of multiply per each bit set */
590 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
591 COSTS_N_INSNS (18), /* HI */
592 COSTS_N_INSNS (18), /* SI */
593 COSTS_N_INSNS (18), /* DI */
594 COSTS_N_INSNS (18)}, /* other */
595 COSTS_N_INSNS (2), /* cost of movsx */
596 COSTS_N_INSNS (2), /* cost of movzx */
597 8, /* "large" insn */
598 4, /* MOVE_RATIO */
599 3, /* cost for loading QImode using movzbl */
600 {4, 5, 4}, /* cost of loading integer registers
601 in QImode, HImode and SImode.
602 Relative to reg-reg move (2). */
603 {2, 3, 2}, /* cost of storing integer registers */
604 4, /* cost of reg,reg fld/fst */
605 {6, 6, 6}, /* cost of loading fp registers
606 in SFmode, DFmode and XFmode */
607 {4, 4, 4}, /* cost of storing fp registers
608 in SFmode, DFmode and XFmode */
609 2, /* cost of moving MMX register */
610 {2, 2}, /* cost of loading MMX registers
611 in SImode and DImode */
612 {2, 2}, /* cost of storing MMX registers
613 in SImode and DImode */
614 2, /* cost of moving SSE register */
615 {2, 2, 8}, /* cost of loading SSE registers
616 in SImode, DImode and TImode */
617 {2, 2, 8}, /* cost of storing SSE registers
618 in SImode, DImode and TImode */
619 6, /* MMX or SSE register to integer */
620 32, /* size of l1 cache. */
621 32, /* size of l2 cache. Some models
622 have integrated l2 cache, but
623 optimizing for k6 is not important
624 enough to worry about that. */
625 32, /* size of prefetch block */
626 1, /* number of parallel prefetches */
627 1, /* Branch cost */
628 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
629 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
630 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
631 COSTS_N_INSNS (2), /* cost of FABS instruction. */
632 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
633 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
634 k6_memcpy,
635 k6_memset,
636 1, /* scalar_stmt_cost. */
637 1, /* scalar load_cost. */
638 1, /* scalar_store_cost. */
639 1, /* vec_stmt_cost. */
640 1, /* vec_to_scalar_cost. */
641 1, /* scalar_to_vec_cost. */
642 1, /* vec_align_load_cost. */
643 2, /* vec_unalign_load_cost. */
644 1, /* vec_store_cost. */
645 3, /* cond_taken_branch_cost. */
646 1, /* cond_not_taken_branch_cost. */
647 };
648
649 /* For some reason, Athlon deals better with REP prefix (relative to loops)
650 compared to K8. Alignment becomes important after 8 bytes for memcpy and
651 128 bytes for memset. */
652 static stringop_algs athlon_memcpy[2] = {
653 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
654 DUMMY_STRINGOP_ALGS};
655 static stringop_algs athlon_memset[2] = {
656 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
657 DUMMY_STRINGOP_ALGS};
658 static const
659 struct processor_costs athlon_cost = {
660 COSTS_N_INSNS (1), /* cost of an add instruction */
661 COSTS_N_INSNS (2), /* cost of a lea instruction */
662 COSTS_N_INSNS (1), /* variable shift costs */
663 COSTS_N_INSNS (1), /* constant shift costs */
664 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
665 COSTS_N_INSNS (5), /* HI */
666 COSTS_N_INSNS (5), /* SI */
667 COSTS_N_INSNS (5), /* DI */
668 COSTS_N_INSNS (5)}, /* other */
669 0, /* cost of multiply per each bit set */
670 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
671 COSTS_N_INSNS (26), /* HI */
672 COSTS_N_INSNS (42), /* SI */
673 COSTS_N_INSNS (74), /* DI */
674 COSTS_N_INSNS (74)}, /* other */
675 COSTS_N_INSNS (1), /* cost of movsx */
676 COSTS_N_INSNS (1), /* cost of movzx */
677 8, /* "large" insn */
678 9, /* MOVE_RATIO */
679 4, /* cost for loading QImode using movzbl */
680 {3, 4, 3}, /* cost of loading integer registers
681 in QImode, HImode and SImode.
682 Relative to reg-reg move (2). */
683 {3, 4, 3}, /* cost of storing integer registers */
684 4, /* cost of reg,reg fld/fst */
685 {4, 4, 12}, /* cost of loading fp registers
686 in SFmode, DFmode and XFmode */
687 {6, 6, 8}, /* cost of storing fp registers
688 in SFmode, DFmode and XFmode */
689 2, /* cost of moving MMX register */
690 {4, 4}, /* cost of loading MMX registers
691 in SImode and DImode */
692 {4, 4}, /* cost of storing MMX registers
693 in SImode and DImode */
694 2, /* cost of moving SSE register */
695 {4, 4, 6}, /* cost of loading SSE registers
696 in SImode, DImode and TImode */
697 {4, 4, 5}, /* cost of storing SSE registers
698 in SImode, DImode and TImode */
699 5, /* MMX or SSE register to integer */
700 64, /* size of l1 cache. */
701 256, /* size of l2 cache. */
702 64, /* size of prefetch block */
703 6, /* number of parallel prefetches */
704 5, /* Branch cost */
705 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
706 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
707 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
708 COSTS_N_INSNS (2), /* cost of FABS instruction. */
709 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
710 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
711 athlon_memcpy,
712 athlon_memset,
713 1, /* scalar_stmt_cost. */
714 1, /* scalar load_cost. */
715 1, /* scalar_store_cost. */
716 1, /* vec_stmt_cost. */
717 1, /* vec_to_scalar_cost. */
718 1, /* scalar_to_vec_cost. */
719 1, /* vec_align_load_cost. */
720 2, /* vec_unalign_load_cost. */
721 1, /* vec_store_cost. */
722 3, /* cond_taken_branch_cost. */
723 1, /* cond_not_taken_branch_cost. */
724 };
725
726 /* K8 has optimized REP instruction for medium sized blocks, but for very
727 small blocks it is better to use loop. For large blocks, libcall can
728 do nontemporary accesses and beat inline considerably. */
729 static stringop_algs k8_memcpy[2] = {
730 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
731 {-1, rep_prefix_4_byte, false}}},
732 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
733 {-1, libcall, false}}}};
734 static stringop_algs k8_memset[2] = {
735 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
736 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
737 {libcall, {{48, unrolled_loop, false},
738 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
739 static const
740 struct processor_costs k8_cost = {
741 COSTS_N_INSNS (1), /* cost of an add instruction */
742 COSTS_N_INSNS (2), /* cost of a lea instruction */
743 COSTS_N_INSNS (1), /* variable shift costs */
744 COSTS_N_INSNS (1), /* constant shift costs */
745 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
746 COSTS_N_INSNS (4), /* HI */
747 COSTS_N_INSNS (3), /* SI */
748 COSTS_N_INSNS (4), /* DI */
749 COSTS_N_INSNS (5)}, /* other */
750 0, /* cost of multiply per each bit set */
751 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
752 COSTS_N_INSNS (26), /* HI */
753 COSTS_N_INSNS (42), /* SI */
754 COSTS_N_INSNS (74), /* DI */
755 COSTS_N_INSNS (74)}, /* other */
756 COSTS_N_INSNS (1), /* cost of movsx */
757 COSTS_N_INSNS (1), /* cost of movzx */
758 8, /* "large" insn */
759 9, /* MOVE_RATIO */
760 4, /* cost for loading QImode using movzbl */
761 {3, 4, 3}, /* cost of loading integer registers
762 in QImode, HImode and SImode.
763 Relative to reg-reg move (2). */
764 {3, 4, 3}, /* cost of storing integer registers */
765 4, /* cost of reg,reg fld/fst */
766 {4, 4, 12}, /* cost of loading fp registers
767 in SFmode, DFmode and XFmode */
768 {6, 6, 8}, /* cost of storing fp registers
769 in SFmode, DFmode and XFmode */
770 2, /* cost of moving MMX register */
771 {3, 3}, /* cost of loading MMX registers
772 in SImode and DImode */
773 {4, 4}, /* cost of storing MMX registers
774 in SImode and DImode */
775 2, /* cost of moving SSE register */
776 {4, 3, 6}, /* cost of loading SSE registers
777 in SImode, DImode and TImode */
778 {4, 4, 5}, /* cost of storing SSE registers
779 in SImode, DImode and TImode */
780 5, /* MMX or SSE register to integer */
781 64, /* size of l1 cache. */
782 512, /* size of l2 cache. */
783 64, /* size of prefetch block */
784 /* New AMD processors never drop prefetches; if they cannot be performed
785 immediately, they are queued. We set number of simultaneous prefetches
786 to a large constant to reflect this (it probably is not a good idea not
787 to limit number of prefetches at all, as their execution also takes some
788 time). */
789 100, /* number of parallel prefetches */
790 3, /* Branch cost */
791 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
792 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
793 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
794 COSTS_N_INSNS (2), /* cost of FABS instruction. */
795 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
796 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
797
798 k8_memcpy,
799 k8_memset,
800 4, /* scalar_stmt_cost. */
801 2, /* scalar load_cost. */
802 2, /* scalar_store_cost. */
803 5, /* vec_stmt_cost. */
804 0, /* vec_to_scalar_cost. */
805 2, /* scalar_to_vec_cost. */
806 2, /* vec_align_load_cost. */
807 3, /* vec_unalign_load_cost. */
808 3, /* vec_store_cost. */
809 3, /* cond_taken_branch_cost. */
810 2, /* cond_not_taken_branch_cost. */
811 };
812
813 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
814 very small blocks it is better to use loop. For large blocks, libcall can
815 do nontemporary accesses and beat inline considerably. */
816 static stringop_algs amdfam10_memcpy[2] = {
817 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
818 {-1, rep_prefix_4_byte, false}}},
819 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
820 {-1, libcall, false}}}};
821 static stringop_algs amdfam10_memset[2] = {
822 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
823 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
824 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
825 {-1, libcall, false}}}};
826 struct processor_costs amdfam10_cost = {
827 COSTS_N_INSNS (1), /* cost of an add instruction */
828 COSTS_N_INSNS (2), /* cost of a lea instruction */
829 COSTS_N_INSNS (1), /* variable shift costs */
830 COSTS_N_INSNS (1), /* constant shift costs */
831 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
832 COSTS_N_INSNS (4), /* HI */
833 COSTS_N_INSNS (3), /* SI */
834 COSTS_N_INSNS (4), /* DI */
835 COSTS_N_INSNS (5)}, /* other */
836 0, /* cost of multiply per each bit set */
837 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
838 COSTS_N_INSNS (35), /* HI */
839 COSTS_N_INSNS (51), /* SI */
840 COSTS_N_INSNS (83), /* DI */
841 COSTS_N_INSNS (83)}, /* other */
842 COSTS_N_INSNS (1), /* cost of movsx */
843 COSTS_N_INSNS (1), /* cost of movzx */
844 8, /* "large" insn */
845 9, /* MOVE_RATIO */
846 4, /* cost for loading QImode using movzbl */
847 {3, 4, 3}, /* cost of loading integer registers
848 in QImode, HImode and SImode.
849 Relative to reg-reg move (2). */
850 {3, 4, 3}, /* cost of storing integer registers */
851 4, /* cost of reg,reg fld/fst */
852 {4, 4, 12}, /* cost of loading fp registers
853 in SFmode, DFmode and XFmode */
854 {6, 6, 8}, /* cost of storing fp registers
855 in SFmode, DFmode and XFmode */
856 2, /* cost of moving MMX register */
857 {3, 3}, /* cost of loading MMX registers
858 in SImode and DImode */
859 {4, 4}, /* cost of storing MMX registers
860 in SImode and DImode */
861 2, /* cost of moving SSE register */
862 {4, 4, 3}, /* cost of loading SSE registers
863 in SImode, DImode and TImode */
864 {4, 4, 5}, /* cost of storing SSE registers
865 in SImode, DImode and TImode */
866 3, /* MMX or SSE register to integer */
867 /* On K8:
868 MOVD reg64, xmmreg Double FSTORE 4
869 MOVD reg32, xmmreg Double FSTORE 4
870 On AMDFAM10:
871 MOVD reg64, xmmreg Double FADD 3
872 1/1 1/1
873 MOVD reg32, xmmreg Double FADD 3
874 1/1 1/1 */
875 64, /* size of l1 cache. */
876 512, /* size of l2 cache. */
877 64, /* size of prefetch block */
878 /* New AMD processors never drop prefetches; if they cannot be performed
879 immediately, they are queued. We set number of simultaneous prefetches
880 to a large constant to reflect this (it probably is not a good idea not
881 to limit number of prefetches at all, as their execution also takes some
882 time). */
883 100, /* number of parallel prefetches */
884 2, /* Branch cost */
885 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
886 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
887 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
888 COSTS_N_INSNS (2), /* cost of FABS instruction. */
889 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
890 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
891
892 amdfam10_memcpy,
893 amdfam10_memset,
894 4, /* scalar_stmt_cost. */
895 2, /* scalar load_cost. */
896 2, /* scalar_store_cost. */
897 6, /* vec_stmt_cost. */
898 0, /* vec_to_scalar_cost. */
899 2, /* scalar_to_vec_cost. */
900 2, /* vec_align_load_cost. */
901 2, /* vec_unalign_load_cost. */
902 2, /* vec_store_cost. */
903 2, /* cond_taken_branch_cost. */
904 1, /* cond_not_taken_branch_cost. */
905 };
906
907 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
908 very small blocks it is better to use loop. For large blocks, libcall
909 can do nontemporary accesses and beat inline considerably. */
910 static stringop_algs bdver1_memcpy[2] = {
911 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
912 {-1, rep_prefix_4_byte, false}}},
913 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
914 {-1, libcall, false}}}};
915 static stringop_algs bdver1_memset[2] = {
916 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
917 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
918 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
919 {-1, libcall, false}}}};
920
921 const struct processor_costs bdver1_cost = {
922 COSTS_N_INSNS (1), /* cost of an add instruction */
923 COSTS_N_INSNS (1), /* cost of a lea instruction */
924 COSTS_N_INSNS (1), /* variable shift costs */
925 COSTS_N_INSNS (1), /* constant shift costs */
926 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
927 COSTS_N_INSNS (4), /* HI */
928 COSTS_N_INSNS (4), /* SI */
929 COSTS_N_INSNS (6), /* DI */
930 COSTS_N_INSNS (6)}, /* other */
931 0, /* cost of multiply per each bit set */
932 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
933 COSTS_N_INSNS (35), /* HI */
934 COSTS_N_INSNS (51), /* SI */
935 COSTS_N_INSNS (83), /* DI */
936 COSTS_N_INSNS (83)}, /* other */
937 COSTS_N_INSNS (1), /* cost of movsx */
938 COSTS_N_INSNS (1), /* cost of movzx */
939 8, /* "large" insn */
940 9, /* MOVE_RATIO */
941 4, /* cost for loading QImode using movzbl */
942 {5, 5, 4}, /* cost of loading integer registers
943 in QImode, HImode and SImode.
944 Relative to reg-reg move (2). */
945 {4, 4, 4}, /* cost of storing integer registers */
946 2, /* cost of reg,reg fld/fst */
947 {5, 5, 12}, /* cost of loading fp registers
948 in SFmode, DFmode and XFmode */
949 {4, 4, 8}, /* cost of storing fp registers
950 in SFmode, DFmode and XFmode */
951 2, /* cost of moving MMX register */
952 {4, 4}, /* cost of loading MMX registers
953 in SImode and DImode */
954 {4, 4}, /* cost of storing MMX registers
955 in SImode and DImode */
956 2, /* cost of moving SSE register */
957 {4, 4, 4}, /* cost of loading SSE registers
958 in SImode, DImode and TImode */
959 {4, 4, 4}, /* cost of storing SSE registers
960 in SImode, DImode and TImode */
961 2, /* MMX or SSE register to integer */
962 /* On K8:
963 MOVD reg64, xmmreg Double FSTORE 4
964 MOVD reg32, xmmreg Double FSTORE 4
965 On AMDFAM10:
966 MOVD reg64, xmmreg Double FADD 3
967 1/1 1/1
968 MOVD reg32, xmmreg Double FADD 3
969 1/1 1/1 */
970 16, /* size of l1 cache. */
971 2048, /* size of l2 cache. */
972 64, /* size of prefetch block */
973 /* New AMD processors never drop prefetches; if they cannot be performed
974 immediately, they are queued. We set number of simultaneous prefetches
975 to a large constant to reflect this (it probably is not a good idea not
976 to limit number of prefetches at all, as their execution also takes some
977 time). */
978 100, /* number of parallel prefetches */
979 2, /* Branch cost */
980 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
981 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
982 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
983 COSTS_N_INSNS (2), /* cost of FABS instruction. */
984 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
985 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
986
987 bdver1_memcpy,
988 bdver1_memset,
989 6, /* scalar_stmt_cost. */
990 4, /* scalar load_cost. */
991 4, /* scalar_store_cost. */
992 6, /* vec_stmt_cost. */
993 0, /* vec_to_scalar_cost. */
994 2, /* scalar_to_vec_cost. */
995 4, /* vec_align_load_cost. */
996 4, /* vec_unalign_load_cost. */
997 4, /* vec_store_cost. */
998 2, /* cond_taken_branch_cost. */
999 1, /* cond_not_taken_branch_cost. */
1000 };
1001
1002 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1003 very small blocks it is better to use loop. For large blocks, libcall
1004 can do nontemporary accesses and beat inline considerably. */
1005
1006 static stringop_algs bdver2_memcpy[2] = {
1007 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1008 {-1, rep_prefix_4_byte, false}}},
1009 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1010 {-1, libcall, false}}}};
1011 static stringop_algs bdver2_memset[2] = {
1012 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1013 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1014 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1015 {-1, libcall, false}}}};
1016
1017 const struct processor_costs bdver2_cost = {
1018 COSTS_N_INSNS (1), /* cost of an add instruction */
1019 COSTS_N_INSNS (1), /* cost of a lea instruction */
1020 COSTS_N_INSNS (1), /* variable shift costs */
1021 COSTS_N_INSNS (1), /* constant shift costs */
1022 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1023 COSTS_N_INSNS (4), /* HI */
1024 COSTS_N_INSNS (4), /* SI */
1025 COSTS_N_INSNS (6), /* DI */
1026 COSTS_N_INSNS (6)}, /* other */
1027 0, /* cost of multiply per each bit set */
1028 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1029 COSTS_N_INSNS (35), /* HI */
1030 COSTS_N_INSNS (51), /* SI */
1031 COSTS_N_INSNS (83), /* DI */
1032 COSTS_N_INSNS (83)}, /* other */
1033 COSTS_N_INSNS (1), /* cost of movsx */
1034 COSTS_N_INSNS (1), /* cost of movzx */
1035 8, /* "large" insn */
1036 9, /* MOVE_RATIO */
1037 4, /* cost for loading QImode using movzbl */
1038 {5, 5, 4}, /* cost of loading integer registers
1039 in QImode, HImode and SImode.
1040 Relative to reg-reg move (2). */
1041 {4, 4, 4}, /* cost of storing integer registers */
1042 2, /* cost of reg,reg fld/fst */
1043 {5, 5, 12}, /* cost of loading fp registers
1044 in SFmode, DFmode and XFmode */
1045 {4, 4, 8}, /* cost of storing fp registers
1046 in SFmode, DFmode and XFmode */
1047 2, /* cost of moving MMX register */
1048 {4, 4}, /* cost of loading MMX registers
1049 in SImode and DImode */
1050 {4, 4}, /* cost of storing MMX registers
1051 in SImode and DImode */
1052 2, /* cost of moving SSE register */
1053 {4, 4, 4}, /* cost of loading SSE registers
1054 in SImode, DImode and TImode */
1055 {4, 4, 4}, /* cost of storing SSE registers
1056 in SImode, DImode and TImode */
1057 2, /* MMX or SSE register to integer */
1058 /* On K8:
1059 MOVD reg64, xmmreg Double FSTORE 4
1060 MOVD reg32, xmmreg Double FSTORE 4
1061 On AMDFAM10:
1062 MOVD reg64, xmmreg Double FADD 3
1063 1/1 1/1
1064 MOVD reg32, xmmreg Double FADD 3
1065 1/1 1/1 */
1066 16, /* size of l1 cache. */
1067 2048, /* size of l2 cache. */
1068 64, /* size of prefetch block */
1069 /* New AMD processors never drop prefetches; if they cannot be performed
1070 immediately, they are queued. We set number of simultaneous prefetches
1071 to a large constant to reflect this (it probably is not a good idea not
1072 to limit number of prefetches at all, as their execution also takes some
1073 time). */
1074 100, /* number of parallel prefetches */
1075 2, /* Branch cost */
1076 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1077 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1078 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1079 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1080 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1081 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1082
1083 bdver2_memcpy,
1084 bdver2_memset,
1085 6, /* scalar_stmt_cost. */
1086 4, /* scalar load_cost. */
1087 4, /* scalar_store_cost. */
1088 6, /* vec_stmt_cost. */
1089 0, /* vec_to_scalar_cost. */
1090 2, /* scalar_to_vec_cost. */
1091 4, /* vec_align_load_cost. */
1092 4, /* vec_unalign_load_cost. */
1093 4, /* vec_store_cost. */
1094 2, /* cond_taken_branch_cost. */
1095 1, /* cond_not_taken_branch_cost. */
1096 };
1097
1098
1099 /* BDVER3 has optimized REP instruction for medium sized blocks, but for
1100 very small blocks it is better to use loop. For large blocks, libcall
1101 can do nontemporary accesses and beat inline considerably. */
1102 static stringop_algs bdver3_memcpy[2] = {
1103 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1104 {-1, rep_prefix_4_byte, false}}},
1105 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1106 {-1, libcall, false}}}};
1107 static stringop_algs bdver3_memset[2] = {
1108 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1109 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1110 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1111 {-1, libcall, false}}}};
1112 struct processor_costs bdver3_cost = {
1113 COSTS_N_INSNS (1), /* cost of an add instruction */
1114 COSTS_N_INSNS (1), /* cost of a lea instruction */
1115 COSTS_N_INSNS (1), /* variable shift costs */
1116 COSTS_N_INSNS (1), /* constant shift costs */
1117 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1118 COSTS_N_INSNS (4), /* HI */
1119 COSTS_N_INSNS (4), /* SI */
1120 COSTS_N_INSNS (6), /* DI */
1121 COSTS_N_INSNS (6)}, /* other */
1122 0, /* cost of multiply per each bit set */
1123 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1124 COSTS_N_INSNS (35), /* HI */
1125 COSTS_N_INSNS (51), /* SI */
1126 COSTS_N_INSNS (83), /* DI */
1127 COSTS_N_INSNS (83)}, /* other */
1128 COSTS_N_INSNS (1), /* cost of movsx */
1129 COSTS_N_INSNS (1), /* cost of movzx */
1130 8, /* "large" insn */
1131 9, /* MOVE_RATIO */
1132 4, /* cost for loading QImode using movzbl */
1133 {5, 5, 4}, /* cost of loading integer registers
1134 in QImode, HImode and SImode.
1135 Relative to reg-reg move (2). */
1136 {4, 4, 4}, /* cost of storing integer registers */
1137 2, /* cost of reg,reg fld/fst */
1138 {5, 5, 12}, /* cost of loading fp registers
1139 in SFmode, DFmode and XFmode */
1140 {4, 4, 8}, /* cost of storing fp registers
1141 in SFmode, DFmode and XFmode */
1142 2, /* cost of moving MMX register */
1143 {4, 4}, /* cost of loading MMX registers
1144 in SImode and DImode */
1145 {4, 4}, /* cost of storing MMX registers
1146 in SImode and DImode */
1147 2, /* cost of moving SSE register */
1148 {4, 4, 4}, /* cost of loading SSE registers
1149 in SImode, DImode and TImode */
1150 {4, 4, 4}, /* cost of storing SSE registers
1151 in SImode, DImode and TImode */
1152 2, /* MMX or SSE register to integer */
1153 16, /* size of l1 cache. */
1154 2048, /* size of l2 cache. */
1155 64, /* size of prefetch block */
1156 /* New AMD processors never drop prefetches; if they cannot be performed
1157 immediately, they are queued. We set number of simultaneous prefetches
1158 to a large constant to reflect this (it probably is not a good idea not
1159 to limit number of prefetches at all, as their execution also takes some
1160 time). */
1161 100, /* number of parallel prefetches */
1162 2, /* Branch cost */
1163 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1164 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1165 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1166 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1167 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1168 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1169
1170 bdver3_memcpy,
1171 bdver3_memset,
1172 6, /* scalar_stmt_cost. */
1173 4, /* scalar load_cost. */
1174 4, /* scalar_store_cost. */
1175 6, /* vec_stmt_cost. */
1176 0, /* vec_to_scalar_cost. */
1177 2, /* scalar_to_vec_cost. */
1178 4, /* vec_align_load_cost. */
1179 4, /* vec_unalign_load_cost. */
1180 4, /* vec_store_cost. */
1181 2, /* cond_taken_branch_cost. */
1182 1, /* cond_not_taken_branch_cost. */
1183 };
1184
1185 /* BDVER4 has optimized REP instruction for medium sized blocks, but for
1186 very small blocks it is better to use loop. For large blocks, libcall
1187 can do nontemporary accesses and beat inline considerably. */
1188 static stringop_algs bdver4_memcpy[2] = {
1189 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1190 {-1, rep_prefix_4_byte, false}}},
1191 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1192 {-1, libcall, false}}}};
1193 static stringop_algs bdver4_memset[2] = {
1194 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1195 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1196 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1197 {-1, libcall, false}}}};
1198 struct processor_costs bdver4_cost = {
1199 COSTS_N_INSNS (1), /* cost of an add instruction */
1200 COSTS_N_INSNS (1), /* cost of a lea instruction */
1201 COSTS_N_INSNS (1), /* variable shift costs */
1202 COSTS_N_INSNS (1), /* constant shift costs */
1203 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1204 COSTS_N_INSNS (4), /* HI */
1205 COSTS_N_INSNS (4), /* SI */
1206 COSTS_N_INSNS (6), /* DI */
1207 COSTS_N_INSNS (6)}, /* other */
1208 0, /* cost of multiply per each bit set */
1209 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1210 COSTS_N_INSNS (35), /* HI */
1211 COSTS_N_INSNS (51), /* SI */
1212 COSTS_N_INSNS (83), /* DI */
1213 COSTS_N_INSNS (83)}, /* other */
1214 COSTS_N_INSNS (1), /* cost of movsx */
1215 COSTS_N_INSNS (1), /* cost of movzx */
1216 8, /* "large" insn */
1217 9, /* MOVE_RATIO */
1218 4, /* cost for loading QImode using movzbl */
1219 {5, 5, 4}, /* cost of loading integer registers
1220 in QImode, HImode and SImode.
1221 Relative to reg-reg move (2). */
1222 {4, 4, 4}, /* cost of storing integer registers */
1223 2, /* cost of reg,reg fld/fst */
1224 {5, 5, 12}, /* cost of loading fp registers
1225 in SFmode, DFmode and XFmode */
1226 {4, 4, 8}, /* cost of storing fp registers
1227 in SFmode, DFmode and XFmode */
1228 2, /* cost of moving MMX register */
1229 {4, 4}, /* cost of loading MMX registers
1230 in SImode and DImode */
1231 {4, 4}, /* cost of storing MMX registers
1232 in SImode and DImode */
1233 2, /* cost of moving SSE register */
1234 {4, 4, 4}, /* cost of loading SSE registers
1235 in SImode, DImode and TImode */
1236 {4, 4, 4}, /* cost of storing SSE registers
1237 in SImode, DImode and TImode */
1238 2, /* MMX or SSE register to integer */
1239 16, /* size of l1 cache. */
1240 2048, /* size of l2 cache. */
1241 64, /* size of prefetch block */
1242 /* New AMD processors never drop prefetches; if they cannot be performed
1243 immediately, they are queued. We set number of simultaneous prefetches
1244 to a large constant to reflect this (it probably is not a good idea not
1245 to limit number of prefetches at all, as their execution also takes some
1246 time). */
1247 100, /* number of parallel prefetches */
1248 2, /* Branch cost */
1249 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1250 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1251 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1252 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1253 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1254 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1255
1256 bdver4_memcpy,
1257 bdver4_memset,
1258 6, /* scalar_stmt_cost. */
1259 4, /* scalar load_cost. */
1260 4, /* scalar_store_cost. */
1261 6, /* vec_stmt_cost. */
1262 0, /* vec_to_scalar_cost. */
1263 2, /* scalar_to_vec_cost. */
1264 4, /* vec_align_load_cost. */
1265 4, /* vec_unalign_load_cost. */
1266 4, /* vec_store_cost. */
1267 2, /* cond_taken_branch_cost. */
1268 1, /* cond_not_taken_branch_cost. */
1269 };
1270
1271 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1272 very small blocks it is better to use loop. For large blocks, libcall can
1273 do nontemporary accesses and beat inline considerably. */
1274 static stringop_algs btver1_memcpy[2] = {
1275 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1276 {-1, rep_prefix_4_byte, false}}},
1277 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1278 {-1, libcall, false}}}};
1279 static stringop_algs btver1_memset[2] = {
1280 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1281 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1282 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1283 {-1, libcall, false}}}};
1284 const struct processor_costs btver1_cost = {
1285 COSTS_N_INSNS (1), /* cost of an add instruction */
1286 COSTS_N_INSNS (2), /* cost of a lea instruction */
1287 COSTS_N_INSNS (1), /* variable shift costs */
1288 COSTS_N_INSNS (1), /* constant shift costs */
1289 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1290 COSTS_N_INSNS (4), /* HI */
1291 COSTS_N_INSNS (3), /* SI */
1292 COSTS_N_INSNS (4), /* DI */
1293 COSTS_N_INSNS (5)}, /* other */
1294 0, /* cost of multiply per each bit set */
1295 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1296 COSTS_N_INSNS (35), /* HI */
1297 COSTS_N_INSNS (51), /* SI */
1298 COSTS_N_INSNS (83), /* DI */
1299 COSTS_N_INSNS (83)}, /* other */
1300 COSTS_N_INSNS (1), /* cost of movsx */
1301 COSTS_N_INSNS (1), /* cost of movzx */
1302 8, /* "large" insn */
1303 9, /* MOVE_RATIO */
1304 4, /* cost for loading QImode using movzbl */
1305 {3, 4, 3}, /* cost of loading integer registers
1306 in QImode, HImode and SImode.
1307 Relative to reg-reg move (2). */
1308 {3, 4, 3}, /* cost of storing integer registers */
1309 4, /* cost of reg,reg fld/fst */
1310 {4, 4, 12}, /* cost of loading fp registers
1311 in SFmode, DFmode and XFmode */
1312 {6, 6, 8}, /* cost of storing fp registers
1313 in SFmode, DFmode and XFmode */
1314 2, /* cost of moving MMX register */
1315 {3, 3}, /* cost of loading MMX registers
1316 in SImode and DImode */
1317 {4, 4}, /* cost of storing MMX registers
1318 in SImode and DImode */
1319 2, /* cost of moving SSE register */
1320 {4, 4, 3}, /* cost of loading SSE registers
1321 in SImode, DImode and TImode */
1322 {4, 4, 5}, /* cost of storing SSE registers
1323 in SImode, DImode and TImode */
1324 3, /* MMX or SSE register to integer */
1325 /* On K8:
1326 MOVD reg64, xmmreg Double FSTORE 4
1327 MOVD reg32, xmmreg Double FSTORE 4
1328 On AMDFAM10:
1329 MOVD reg64, xmmreg Double FADD 3
1330 1/1 1/1
1331 MOVD reg32, xmmreg Double FADD 3
1332 1/1 1/1 */
1333 32, /* size of l1 cache. */
1334 512, /* size of l2 cache. */
1335 64, /* size of prefetch block */
1336 100, /* number of parallel prefetches */
1337 2, /* Branch cost */
1338 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1339 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1340 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1341 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1342 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1343 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1344
1345 btver1_memcpy,
1346 btver1_memset,
1347 4, /* scalar_stmt_cost. */
1348 2, /* scalar load_cost. */
1349 2, /* scalar_store_cost. */
1350 6, /* vec_stmt_cost. */
1351 0, /* vec_to_scalar_cost. */
1352 2, /* scalar_to_vec_cost. */
1353 2, /* vec_align_load_cost. */
1354 2, /* vec_unalign_load_cost. */
1355 2, /* vec_store_cost. */
1356 2, /* cond_taken_branch_cost. */
1357 1, /* cond_not_taken_branch_cost. */
1358 };
1359
1360 static stringop_algs btver2_memcpy[2] = {
1361 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1362 {-1, rep_prefix_4_byte, false}}},
1363 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1364 {-1, libcall, false}}}};
1365 static stringop_algs btver2_memset[2] = {
1366 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1367 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1368 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1369 {-1, libcall, false}}}};
1370 const struct processor_costs btver2_cost = {
1371 COSTS_N_INSNS (1), /* cost of an add instruction */
1372 COSTS_N_INSNS (2), /* cost of a lea instruction */
1373 COSTS_N_INSNS (1), /* variable shift costs */
1374 COSTS_N_INSNS (1), /* constant shift costs */
1375 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1376 COSTS_N_INSNS (4), /* HI */
1377 COSTS_N_INSNS (3), /* SI */
1378 COSTS_N_INSNS (4), /* DI */
1379 COSTS_N_INSNS (5)}, /* other */
1380 0, /* cost of multiply per each bit set */
1381 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1382 COSTS_N_INSNS (35), /* HI */
1383 COSTS_N_INSNS (51), /* SI */
1384 COSTS_N_INSNS (83), /* DI */
1385 COSTS_N_INSNS (83)}, /* other */
1386 COSTS_N_INSNS (1), /* cost of movsx */
1387 COSTS_N_INSNS (1), /* cost of movzx */
1388 8, /* "large" insn */
1389 9, /* MOVE_RATIO */
1390 4, /* cost for loading QImode using movzbl */
1391 {3, 4, 3}, /* cost of loading integer registers
1392 in QImode, HImode and SImode.
1393 Relative to reg-reg move (2). */
1394 {3, 4, 3}, /* cost of storing integer registers */
1395 4, /* cost of reg,reg fld/fst */
1396 {4, 4, 12}, /* cost of loading fp registers
1397 in SFmode, DFmode and XFmode */
1398 {6, 6, 8}, /* cost of storing fp registers
1399 in SFmode, DFmode and XFmode */
1400 2, /* cost of moving MMX register */
1401 {3, 3}, /* cost of loading MMX registers
1402 in SImode and DImode */
1403 {4, 4}, /* cost of storing MMX registers
1404 in SImode and DImode */
1405 2, /* cost of moving SSE register */
1406 {4, 4, 3}, /* cost of loading SSE registers
1407 in SImode, DImode and TImode */
1408 {4, 4, 5}, /* cost of storing SSE registers
1409 in SImode, DImode and TImode */
1410 3, /* MMX or SSE register to integer */
1411 /* On K8:
1412 MOVD reg64, xmmreg Double FSTORE 4
1413 MOVD reg32, xmmreg Double FSTORE 4
1414 On AMDFAM10:
1415 MOVD reg64, xmmreg Double FADD 3
1416 1/1 1/1
1417 MOVD reg32, xmmreg Double FADD 3
1418 1/1 1/1 */
1419 32, /* size of l1 cache. */
1420 2048, /* size of l2 cache. */
1421 64, /* size of prefetch block */
1422 100, /* number of parallel prefetches */
1423 2, /* Branch cost */
1424 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1425 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1426 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1427 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1428 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1429 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1430 btver2_memcpy,
1431 btver2_memset,
1432 4, /* scalar_stmt_cost. */
1433 2, /* scalar load_cost. */
1434 2, /* scalar_store_cost. */
1435 6, /* vec_stmt_cost. */
1436 0, /* vec_to_scalar_cost. */
1437 2, /* scalar_to_vec_cost. */
1438 2, /* vec_align_load_cost. */
1439 2, /* vec_unalign_load_cost. */
1440 2, /* vec_store_cost. */
1441 2, /* cond_taken_branch_cost. */
1442 1, /* cond_not_taken_branch_cost. */
1443 };
1444
1445 static stringop_algs pentium4_memcpy[2] = {
1446 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1447 DUMMY_STRINGOP_ALGS};
1448 static stringop_algs pentium4_memset[2] = {
1449 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1450 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1451 DUMMY_STRINGOP_ALGS};
1452
1453 static const
1454 struct processor_costs pentium4_cost = {
1455 COSTS_N_INSNS (1), /* cost of an add instruction */
1456 COSTS_N_INSNS (3), /* cost of a lea instruction */
1457 COSTS_N_INSNS (4), /* variable shift costs */
1458 COSTS_N_INSNS (4), /* constant shift costs */
1459 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1460 COSTS_N_INSNS (15), /* HI */
1461 COSTS_N_INSNS (15), /* SI */
1462 COSTS_N_INSNS (15), /* DI */
1463 COSTS_N_INSNS (15)}, /* other */
1464 0, /* cost of multiply per each bit set */
1465 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1466 COSTS_N_INSNS (56), /* HI */
1467 COSTS_N_INSNS (56), /* SI */
1468 COSTS_N_INSNS (56), /* DI */
1469 COSTS_N_INSNS (56)}, /* other */
1470 COSTS_N_INSNS (1), /* cost of movsx */
1471 COSTS_N_INSNS (1), /* cost of movzx */
1472 16, /* "large" insn */
1473 6, /* MOVE_RATIO */
1474 2, /* cost for loading QImode using movzbl */
1475 {4, 5, 4}, /* cost of loading integer registers
1476 in QImode, HImode and SImode.
1477 Relative to reg-reg move (2). */
1478 {2, 3, 2}, /* cost of storing integer registers */
1479 2, /* cost of reg,reg fld/fst */
1480 {2, 2, 6}, /* cost of loading fp registers
1481 in SFmode, DFmode and XFmode */
1482 {4, 4, 6}, /* cost of storing fp registers
1483 in SFmode, DFmode and XFmode */
1484 2, /* cost of moving MMX register */
1485 {2, 2}, /* cost of loading MMX registers
1486 in SImode and DImode */
1487 {2, 2}, /* cost of storing MMX registers
1488 in SImode and DImode */
1489 12, /* cost of moving SSE register */
1490 {12, 12, 12}, /* cost of loading SSE registers
1491 in SImode, DImode and TImode */
1492 {2, 2, 8}, /* cost of storing SSE registers
1493 in SImode, DImode and TImode */
1494 10, /* MMX or SSE register to integer */
1495 8, /* size of l1 cache. */
1496 256, /* size of l2 cache. */
1497 64, /* size of prefetch block */
1498 6, /* number of parallel prefetches */
1499 2, /* Branch cost */
1500 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1501 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1502 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1503 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1504 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1505 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1506 pentium4_memcpy,
1507 pentium4_memset,
1508 1, /* scalar_stmt_cost. */
1509 1, /* scalar load_cost. */
1510 1, /* scalar_store_cost. */
1511 1, /* vec_stmt_cost. */
1512 1, /* vec_to_scalar_cost. */
1513 1, /* scalar_to_vec_cost. */
1514 1, /* vec_align_load_cost. */
1515 2, /* vec_unalign_load_cost. */
1516 1, /* vec_store_cost. */
1517 3, /* cond_taken_branch_cost. */
1518 1, /* cond_not_taken_branch_cost. */
1519 };
1520
1521 static stringop_algs nocona_memcpy[2] = {
1522 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1523 {libcall, {{32, loop, false}, {20000, rep_prefix_8_byte, false},
1524 {100000, unrolled_loop, false}, {-1, libcall, false}}}};
1525
1526 static stringop_algs nocona_memset[2] = {
1527 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1528 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1529 {libcall, {{24, loop, false}, {64, unrolled_loop, false},
1530 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1531
1532 static const
1533 struct processor_costs nocona_cost = {
1534 COSTS_N_INSNS (1), /* cost of an add instruction */
1535 COSTS_N_INSNS (1), /* cost of a lea instruction */
1536 COSTS_N_INSNS (1), /* variable shift costs */
1537 COSTS_N_INSNS (1), /* constant shift costs */
1538 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1539 COSTS_N_INSNS (10), /* HI */
1540 COSTS_N_INSNS (10), /* SI */
1541 COSTS_N_INSNS (10), /* DI */
1542 COSTS_N_INSNS (10)}, /* other */
1543 0, /* cost of multiply per each bit set */
1544 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1545 COSTS_N_INSNS (66), /* HI */
1546 COSTS_N_INSNS (66), /* SI */
1547 COSTS_N_INSNS (66), /* DI */
1548 COSTS_N_INSNS (66)}, /* other */
1549 COSTS_N_INSNS (1), /* cost of movsx */
1550 COSTS_N_INSNS (1), /* cost of movzx */
1551 16, /* "large" insn */
1552 17, /* MOVE_RATIO */
1553 4, /* cost for loading QImode using movzbl */
1554 {4, 4, 4}, /* cost of loading integer registers
1555 in QImode, HImode and SImode.
1556 Relative to reg-reg move (2). */
1557 {4, 4, 4}, /* cost of storing integer registers */
1558 3, /* cost of reg,reg fld/fst */
1559 {12, 12, 12}, /* cost of loading fp registers
1560 in SFmode, DFmode and XFmode */
1561 {4, 4, 4}, /* cost of storing fp registers
1562 in SFmode, DFmode and XFmode */
1563 6, /* cost of moving MMX register */
1564 {12, 12}, /* cost of loading MMX registers
1565 in SImode and DImode */
1566 {12, 12}, /* cost of storing MMX registers
1567 in SImode and DImode */
1568 6, /* cost of moving SSE register */
1569 {12, 12, 12}, /* cost of loading SSE registers
1570 in SImode, DImode and TImode */
1571 {12, 12, 12}, /* cost of storing SSE registers
1572 in SImode, DImode and TImode */
1573 8, /* MMX or SSE register to integer */
1574 8, /* size of l1 cache. */
1575 1024, /* size of l2 cache. */
1576 64, /* size of prefetch block */
1577 8, /* number of parallel prefetches */
1578 1, /* Branch cost */
1579 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1580 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1581 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1582 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1583 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1584 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1585 nocona_memcpy,
1586 nocona_memset,
1587 1, /* scalar_stmt_cost. */
1588 1, /* scalar load_cost. */
1589 1, /* scalar_store_cost. */
1590 1, /* vec_stmt_cost. */
1591 1, /* vec_to_scalar_cost. */
1592 1, /* scalar_to_vec_cost. */
1593 1, /* vec_align_load_cost. */
1594 2, /* vec_unalign_load_cost. */
1595 1, /* vec_store_cost. */
1596 3, /* cond_taken_branch_cost. */
1597 1, /* cond_not_taken_branch_cost. */
1598 };
1599
1600 static stringop_algs atom_memcpy[2] = {
1601 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1602 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1603 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1604 static stringop_algs atom_memset[2] = {
1605 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1606 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1607 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1608 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1609 static const
1610 struct processor_costs atom_cost = {
1611 COSTS_N_INSNS (1), /* cost of an add instruction */
1612 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1613 COSTS_N_INSNS (1), /* variable shift costs */
1614 COSTS_N_INSNS (1), /* constant shift costs */
1615 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1616 COSTS_N_INSNS (4), /* HI */
1617 COSTS_N_INSNS (3), /* SI */
1618 COSTS_N_INSNS (4), /* DI */
1619 COSTS_N_INSNS (2)}, /* other */
1620 0, /* cost of multiply per each bit set */
1621 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1622 COSTS_N_INSNS (26), /* HI */
1623 COSTS_N_INSNS (42), /* SI */
1624 COSTS_N_INSNS (74), /* DI */
1625 COSTS_N_INSNS (74)}, /* other */
1626 COSTS_N_INSNS (1), /* cost of movsx */
1627 COSTS_N_INSNS (1), /* cost of movzx */
1628 8, /* "large" insn */
1629 17, /* MOVE_RATIO */
1630 4, /* cost for loading QImode using movzbl */
1631 {4, 4, 4}, /* cost of loading integer registers
1632 in QImode, HImode and SImode.
1633 Relative to reg-reg move (2). */
1634 {4, 4, 4}, /* cost of storing integer registers */
1635 4, /* cost of reg,reg fld/fst */
1636 {12, 12, 12}, /* cost of loading fp registers
1637 in SFmode, DFmode and XFmode */
1638 {6, 6, 8}, /* cost of storing fp registers
1639 in SFmode, DFmode and XFmode */
1640 2, /* cost of moving MMX register */
1641 {8, 8}, /* cost of loading MMX registers
1642 in SImode and DImode */
1643 {8, 8}, /* cost of storing MMX registers
1644 in SImode and DImode */
1645 2, /* cost of moving SSE register */
1646 {8, 8, 8}, /* cost of loading SSE registers
1647 in SImode, DImode and TImode */
1648 {8, 8, 8}, /* cost of storing SSE registers
1649 in SImode, DImode and TImode */
1650 5, /* MMX or SSE register to integer */
1651 32, /* size of l1 cache. */
1652 256, /* size of l2 cache. */
1653 64, /* size of prefetch block */
1654 6, /* number of parallel prefetches */
1655 3, /* Branch cost */
1656 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1657 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1658 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1659 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1660 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1661 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1662 atom_memcpy,
1663 atom_memset,
1664 1, /* scalar_stmt_cost. */
1665 1, /* scalar load_cost. */
1666 1, /* scalar_store_cost. */
1667 1, /* vec_stmt_cost. */
1668 1, /* vec_to_scalar_cost. */
1669 1, /* scalar_to_vec_cost. */
1670 1, /* vec_align_load_cost. */
1671 2, /* vec_unalign_load_cost. */
1672 1, /* vec_store_cost. */
1673 3, /* cond_taken_branch_cost. */
1674 1, /* cond_not_taken_branch_cost. */
1675 };
1676
1677 static stringop_algs slm_memcpy[2] = {
1678 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1679 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1680 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1681 static stringop_algs slm_memset[2] = {
1682 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1683 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1684 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1685 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1686 static const
1687 struct processor_costs slm_cost = {
1688 COSTS_N_INSNS (1), /* cost of an add instruction */
1689 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1690 COSTS_N_INSNS (1), /* variable shift costs */
1691 COSTS_N_INSNS (1), /* constant shift costs */
1692 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1693 COSTS_N_INSNS (3), /* HI */
1694 COSTS_N_INSNS (3), /* SI */
1695 COSTS_N_INSNS (4), /* DI */
1696 COSTS_N_INSNS (2)}, /* other */
1697 0, /* cost of multiply per each bit set */
1698 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1699 COSTS_N_INSNS (26), /* HI */
1700 COSTS_N_INSNS (42), /* SI */
1701 COSTS_N_INSNS (74), /* DI */
1702 COSTS_N_INSNS (74)}, /* other */
1703 COSTS_N_INSNS (1), /* cost of movsx */
1704 COSTS_N_INSNS (1), /* cost of movzx */
1705 8, /* "large" insn */
1706 17, /* MOVE_RATIO */
1707 4, /* cost for loading QImode using movzbl */
1708 {4, 4, 4}, /* cost of loading integer registers
1709 in QImode, HImode and SImode.
1710 Relative to reg-reg move (2). */
1711 {4, 4, 4}, /* cost of storing integer registers */
1712 4, /* cost of reg,reg fld/fst */
1713 {12, 12, 12}, /* cost of loading fp registers
1714 in SFmode, DFmode and XFmode */
1715 {6, 6, 8}, /* cost of storing fp registers
1716 in SFmode, DFmode and XFmode */
1717 2, /* cost of moving MMX register */
1718 {8, 8}, /* cost of loading MMX registers
1719 in SImode and DImode */
1720 {8, 8}, /* cost of storing MMX registers
1721 in SImode and DImode */
1722 2, /* cost of moving SSE register */
1723 {8, 8, 8}, /* cost of loading SSE registers
1724 in SImode, DImode and TImode */
1725 {8, 8, 8}, /* cost of storing SSE registers
1726 in SImode, DImode and TImode */
1727 5, /* MMX or SSE register to integer */
1728 32, /* size of l1 cache. */
1729 256, /* size of l2 cache. */
1730 64, /* size of prefetch block */
1731 6, /* number of parallel prefetches */
1732 3, /* Branch cost */
1733 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1734 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1735 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1736 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1737 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1738 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1739 slm_memcpy,
1740 slm_memset,
1741 1, /* scalar_stmt_cost. */
1742 1, /* scalar load_cost. */
1743 1, /* scalar_store_cost. */
1744 1, /* vec_stmt_cost. */
1745 4, /* vec_to_scalar_cost. */
1746 1, /* scalar_to_vec_cost. */
1747 1, /* vec_align_load_cost. */
1748 2, /* vec_unalign_load_cost. */
1749 1, /* vec_store_cost. */
1750 3, /* cond_taken_branch_cost. */
1751 1, /* cond_not_taken_branch_cost. */
1752 };
1753
1754 static stringop_algs intel_memcpy[2] = {
1755 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1756 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1757 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1758 static stringop_algs intel_memset[2] = {
1759 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1760 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1761 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1762 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1763 static const
1764 struct processor_costs intel_cost = {
1765 COSTS_N_INSNS (1), /* cost of an add instruction */
1766 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1767 COSTS_N_INSNS (1), /* variable shift costs */
1768 COSTS_N_INSNS (1), /* constant shift costs */
1769 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1770 COSTS_N_INSNS (3), /* HI */
1771 COSTS_N_INSNS (3), /* SI */
1772 COSTS_N_INSNS (4), /* DI */
1773 COSTS_N_INSNS (2)}, /* other */
1774 0, /* cost of multiply per each bit set */
1775 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1776 COSTS_N_INSNS (26), /* HI */
1777 COSTS_N_INSNS (42), /* SI */
1778 COSTS_N_INSNS (74), /* DI */
1779 COSTS_N_INSNS (74)}, /* other */
1780 COSTS_N_INSNS (1), /* cost of movsx */
1781 COSTS_N_INSNS (1), /* cost of movzx */
1782 8, /* "large" insn */
1783 17, /* MOVE_RATIO */
1784 4, /* cost for loading QImode using movzbl */
1785 {4, 4, 4}, /* cost of loading integer registers
1786 in QImode, HImode and SImode.
1787 Relative to reg-reg move (2). */
1788 {4, 4, 4}, /* cost of storing integer registers */
1789 4, /* cost of reg,reg fld/fst */
1790 {12, 12, 12}, /* cost of loading fp registers
1791 in SFmode, DFmode and XFmode */
1792 {6, 6, 8}, /* cost of storing fp registers
1793 in SFmode, DFmode and XFmode */
1794 2, /* cost of moving MMX register */
1795 {8, 8}, /* cost of loading MMX registers
1796 in SImode and DImode */
1797 {8, 8}, /* cost of storing MMX registers
1798 in SImode and DImode */
1799 2, /* cost of moving SSE register */
1800 {8, 8, 8}, /* cost of loading SSE registers
1801 in SImode, DImode and TImode */
1802 {8, 8, 8}, /* cost of storing SSE registers
1803 in SImode, DImode and TImode */
1804 5, /* MMX or SSE register to integer */
1805 32, /* size of l1 cache. */
1806 256, /* size of l2 cache. */
1807 64, /* size of prefetch block */
1808 6, /* number of parallel prefetches */
1809 3, /* Branch cost */
1810 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1811 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1812 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1813 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1814 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1815 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1816 intel_memcpy,
1817 intel_memset,
1818 1, /* scalar_stmt_cost. */
1819 1, /* scalar load_cost. */
1820 1, /* scalar_store_cost. */
1821 1, /* vec_stmt_cost. */
1822 4, /* vec_to_scalar_cost. */
1823 1, /* scalar_to_vec_cost. */
1824 1, /* vec_align_load_cost. */
1825 2, /* vec_unalign_load_cost. */
1826 1, /* vec_store_cost. */
1827 3, /* cond_taken_branch_cost. */
1828 1, /* cond_not_taken_branch_cost. */
1829 };
1830
1831 /* Generic should produce code tuned for Core-i7 (and newer chips)
1832 and btver1 (and newer chips). */
1833
1834 static stringop_algs generic_memcpy[2] = {
1835 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1836 {-1, libcall, false}}},
1837 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1838 {-1, libcall, false}}}};
1839 static stringop_algs generic_memset[2] = {
1840 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1841 {-1, libcall, false}}},
1842 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1843 {-1, libcall, false}}}};
1844 static const
1845 struct processor_costs generic_cost = {
1846 COSTS_N_INSNS (1), /* cost of an add instruction */
1847 /* On all chips taken into consideration lea is 2 cycles and more. With
1848 this cost however our current implementation of synth_mult results in
1849 use of unnecessary temporary registers causing regression on several
1850 SPECfp benchmarks. */
1851 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1852 COSTS_N_INSNS (1), /* variable shift costs */
1853 COSTS_N_INSNS (1), /* constant shift costs */
1854 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1855 COSTS_N_INSNS (4), /* HI */
1856 COSTS_N_INSNS (3), /* SI */
1857 COSTS_N_INSNS (4), /* DI */
1858 COSTS_N_INSNS (2)}, /* other */
1859 0, /* cost of multiply per each bit set */
1860 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1861 COSTS_N_INSNS (26), /* HI */
1862 COSTS_N_INSNS (42), /* SI */
1863 COSTS_N_INSNS (74), /* DI */
1864 COSTS_N_INSNS (74)}, /* other */
1865 COSTS_N_INSNS (1), /* cost of movsx */
1866 COSTS_N_INSNS (1), /* cost of movzx */
1867 8, /* "large" insn */
1868 17, /* MOVE_RATIO */
1869 4, /* cost for loading QImode using movzbl */
1870 {4, 4, 4}, /* cost of loading integer registers
1871 in QImode, HImode and SImode.
1872 Relative to reg-reg move (2). */
1873 {4, 4, 4}, /* cost of storing integer registers */
1874 4, /* cost of reg,reg fld/fst */
1875 {12, 12, 12}, /* cost of loading fp registers
1876 in SFmode, DFmode and XFmode */
1877 {6, 6, 8}, /* cost of storing fp registers
1878 in SFmode, DFmode and XFmode */
1879 2, /* cost of moving MMX register */
1880 {8, 8}, /* cost of loading MMX registers
1881 in SImode and DImode */
1882 {8, 8}, /* cost of storing MMX registers
1883 in SImode and DImode */
1884 2, /* cost of moving SSE register */
1885 {8, 8, 8}, /* cost of loading SSE registers
1886 in SImode, DImode and TImode */
1887 {8, 8, 8}, /* cost of storing SSE registers
1888 in SImode, DImode and TImode */
1889 5, /* MMX or SSE register to integer */
1890 32, /* size of l1 cache. */
1891 512, /* size of l2 cache. */
1892 64, /* size of prefetch block */
1893 6, /* number of parallel prefetches */
1894 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1895 value is increased to perhaps more appropriate value of 5. */
1896 3, /* Branch cost */
1897 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1898 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1899 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1900 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1901 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1902 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1903 generic_memcpy,
1904 generic_memset,
1905 1, /* scalar_stmt_cost. */
1906 1, /* scalar load_cost. */
1907 1, /* scalar_store_cost. */
1908 1, /* vec_stmt_cost. */
1909 1, /* vec_to_scalar_cost. */
1910 1, /* scalar_to_vec_cost. */
1911 1, /* vec_align_load_cost. */
1912 2, /* vec_unalign_load_cost. */
1913 1, /* vec_store_cost. */
1914 3, /* cond_taken_branch_cost. */
1915 1, /* cond_not_taken_branch_cost. */
1916 };
1917
1918 /* core_cost should produce code tuned for Core familly of CPUs. */
1919 static stringop_algs core_memcpy[2] = {
1920 {libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}},
1921 {libcall, {{24, loop, true}, {128, rep_prefix_8_byte, true},
1922 {-1, libcall, false}}}};
1923 static stringop_algs core_memset[2] = {
1924 {libcall, {{6, loop_1_byte, true},
1925 {24, loop, true},
1926 {8192, rep_prefix_4_byte, true},
1927 {-1, libcall, false}}},
1928 {libcall, {{24, loop, true}, {512, rep_prefix_8_byte, true},
1929 {-1, libcall, false}}}};
1930
1931 static const
1932 struct processor_costs core_cost = {
1933 COSTS_N_INSNS (1), /* cost of an add instruction */
1934 /* On all chips taken into consideration lea is 2 cycles and more. With
1935 this cost however our current implementation of synth_mult results in
1936 use of unnecessary temporary registers causing regression on several
1937 SPECfp benchmarks. */
1938 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1939 COSTS_N_INSNS (1), /* variable shift costs */
1940 COSTS_N_INSNS (1), /* constant shift costs */
1941 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1942 COSTS_N_INSNS (4), /* HI */
1943 COSTS_N_INSNS (3), /* SI */
1944 COSTS_N_INSNS (4), /* DI */
1945 COSTS_N_INSNS (2)}, /* other */
1946 0, /* cost of multiply per each bit set */
1947 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1948 COSTS_N_INSNS (26), /* HI */
1949 COSTS_N_INSNS (42), /* SI */
1950 COSTS_N_INSNS (74), /* DI */
1951 COSTS_N_INSNS (74)}, /* other */
1952 COSTS_N_INSNS (1), /* cost of movsx */
1953 COSTS_N_INSNS (1), /* cost of movzx */
1954 8, /* "large" insn */
1955 17, /* MOVE_RATIO */
1956 4, /* cost for loading QImode using movzbl */
1957 {4, 4, 4}, /* cost of loading integer registers
1958 in QImode, HImode and SImode.
1959 Relative to reg-reg move (2). */
1960 {4, 4, 4}, /* cost of storing integer registers */
1961 4, /* cost of reg,reg fld/fst */
1962 {12, 12, 12}, /* cost of loading fp registers
1963 in SFmode, DFmode and XFmode */
1964 {6, 6, 8}, /* cost of storing fp registers
1965 in SFmode, DFmode and XFmode */
1966 2, /* cost of moving MMX register */
1967 {8, 8}, /* cost of loading MMX registers
1968 in SImode and DImode */
1969 {8, 8}, /* cost of storing MMX registers
1970 in SImode and DImode */
1971 2, /* cost of moving SSE register */
1972 {8, 8, 8}, /* cost of loading SSE registers
1973 in SImode, DImode and TImode */
1974 {8, 8, 8}, /* cost of storing SSE registers
1975 in SImode, DImode and TImode */
1976 5, /* MMX or SSE register to integer */
1977 64, /* size of l1 cache. */
1978 512, /* size of l2 cache. */
1979 64, /* size of prefetch block */
1980 6, /* number of parallel prefetches */
1981 /* FIXME perhaps more appropriate value is 5. */
1982 3, /* Branch cost */
1983 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1984 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1985 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1986 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1987 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1988 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1989 core_memcpy,
1990 core_memset,
1991 1, /* scalar_stmt_cost. */
1992 1, /* scalar load_cost. */
1993 1, /* scalar_store_cost. */
1994 1, /* vec_stmt_cost. */
1995 1, /* vec_to_scalar_cost. */
1996 1, /* scalar_to_vec_cost. */
1997 1, /* vec_align_load_cost. */
1998 2, /* vec_unalign_load_cost. */
1999 1, /* vec_store_cost. */
2000 3, /* cond_taken_branch_cost. */
2001 1, /* cond_not_taken_branch_cost. */
2002 };
2003
2004
2005 /* Set by -mtune. */
2006 const struct processor_costs *ix86_tune_cost = &pentium_cost;
2007
2008 /* Set by -mtune or -Os. */
2009 const struct processor_costs *ix86_cost = &pentium_cost;
2010
2011 /* Processor feature/optimization bitmasks. */
2012 #define m_386 (1<<PROCESSOR_I386)
2013 #define m_486 (1<<PROCESSOR_I486)
2014 #define m_PENT (1<<PROCESSOR_PENTIUM)
2015 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
2016 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
2017 #define m_NOCONA (1<<PROCESSOR_NOCONA)
2018 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
2019 #define m_CORE2 (1<<PROCESSOR_CORE2)
2020 #define m_NEHALEM (1<<PROCESSOR_NEHALEM)
2021 #define m_SANDYBRIDGE (1<<PROCESSOR_SANDYBRIDGE)
2022 #define m_HASWELL (1<<PROCESSOR_HASWELL)
2023 #define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL)
2024 #define m_BONNELL (1<<PROCESSOR_BONNELL)
2025 #define m_SILVERMONT (1<<PROCESSOR_SILVERMONT)
2026 #define m_INTEL (1<<PROCESSOR_INTEL)
2027
2028 #define m_GEODE (1<<PROCESSOR_GEODE)
2029 #define m_K6 (1<<PROCESSOR_K6)
2030 #define m_K6_GEODE (m_K6 | m_GEODE)
2031 #define m_K8 (1<<PROCESSOR_K8)
2032 #define m_ATHLON (1<<PROCESSOR_ATHLON)
2033 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
2034 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
2035 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
2036 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
2037 #define m_BDVER3 (1<<PROCESSOR_BDVER3)
2038 #define m_BDVER4 (1<<PROCESSOR_BDVER4)
2039 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
2040 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
2041 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
2042 #define m_BTVER (m_BTVER1 | m_BTVER2)
2043 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER)
2044
2045 #define m_GENERIC (1<<PROCESSOR_GENERIC)
2046
2047 const char* ix86_tune_feature_names[X86_TUNE_LAST] = {
2048 #undef DEF_TUNE
2049 #define DEF_TUNE(tune, name, selector) name,
2050 #include "x86-tune.def"
2051 #undef DEF_TUNE
2052 };
2053
2054 /* Feature tests against the various tunings. */
2055 unsigned char ix86_tune_features[X86_TUNE_LAST];
2056
2057 /* Feature tests against the various tunings used to create ix86_tune_features
2058 based on the processor mask. */
2059 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
2060 #undef DEF_TUNE
2061 #define DEF_TUNE(tune, name, selector) selector,
2062 #include "x86-tune.def"
2063 #undef DEF_TUNE
2064 };
2065
2066 /* Feature tests against the various architecture variations. */
2067 unsigned char ix86_arch_features[X86_ARCH_LAST];
2068
2069 /* Feature tests against the various architecture variations, used to create
2070 ix86_arch_features based on the processor mask. */
2071 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2072 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
2073 ~(m_386 | m_486 | m_PENT | m_K6),
2074
2075 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2076 ~m_386,
2077
2078 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2079 ~(m_386 | m_486),
2080
2081 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2082 ~m_386,
2083
2084 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2085 ~m_386,
2086 };
2087
2088 /* In case the average insn count for single function invocation is
2089 lower than this constant, emit fast (but longer) prologue and
2090 epilogue code. */
2091 #define FAST_PROLOGUE_INSN_COUNT 20
2092
2093 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2094 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2095 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2096 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2097
2098 /* Array of the smallest class containing reg number REGNO, indexed by
2099 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2100
2101 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2102 {
2103 /* ax, dx, cx, bx */
2104 AREG, DREG, CREG, BREG,
2105 /* si, di, bp, sp */
2106 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2107 /* FP registers */
2108 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2109 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2110 /* arg pointer */
2111 NON_Q_REGS,
2112 /* flags, fpsr, fpcr, frame */
2113 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2114 /* SSE registers */
2115 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2116 SSE_REGS, SSE_REGS,
2117 /* MMX registers */
2118 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2119 MMX_REGS, MMX_REGS,
2120 /* REX registers */
2121 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2122 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2123 /* SSE REX registers */
2124 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2125 SSE_REGS, SSE_REGS,
2126 /* AVX-512 SSE registers */
2127 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2128 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2129 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2130 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2131 /* Mask registers. */
2132 MASK_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2133 MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2134 };
2135
2136 /* The "default" register map used in 32bit mode. */
2137
2138 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2139 {
2140 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2141 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2142 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2143 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2144 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2145 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2146 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2147 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2148 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2149 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2150 };
2151
2152 /* The "default" register map used in 64bit mode. */
2153
2154 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2155 {
2156 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2157 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2158 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2159 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2160 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2161 8,9,10,11,12,13,14,15, /* extended integer registers */
2162 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2163 67, 68, 69, 70, 71, 72, 73, 74, /* AVX-512 registers 16-23 */
2164 75, 76, 77, 78, 79, 80, 81, 82, /* AVX-512 registers 24-31 */
2165 118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
2166 };
2167
2168 /* Define the register numbers to be used in Dwarf debugging information.
2169 The SVR4 reference port C compiler uses the following register numbers
2170 in its Dwarf output code:
2171 0 for %eax (gcc regno = 0)
2172 1 for %ecx (gcc regno = 2)
2173 2 for %edx (gcc regno = 1)
2174 3 for %ebx (gcc regno = 3)
2175 4 for %esp (gcc regno = 7)
2176 5 for %ebp (gcc regno = 6)
2177 6 for %esi (gcc regno = 4)
2178 7 for %edi (gcc regno = 5)
2179 The following three DWARF register numbers are never generated by
2180 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2181 believes these numbers have these meanings.
2182 8 for %eip (no gcc equivalent)
2183 9 for %eflags (gcc regno = 17)
2184 10 for %trapno (no gcc equivalent)
2185 It is not at all clear how we should number the FP stack registers
2186 for the x86 architecture. If the version of SDB on x86/svr4 were
2187 a bit less brain dead with respect to floating-point then we would
2188 have a precedent to follow with respect to DWARF register numbers
2189 for x86 FP registers, but the SDB on x86/svr4 is so completely
2190 broken with respect to FP registers that it is hardly worth thinking
2191 of it as something to strive for compatibility with.
2192 The version of x86/svr4 SDB I have at the moment does (partially)
2193 seem to believe that DWARF register number 11 is associated with
2194 the x86 register %st(0), but that's about all. Higher DWARF
2195 register numbers don't seem to be associated with anything in
2196 particular, and even for DWARF regno 11, SDB only seems to under-
2197 stand that it should say that a variable lives in %st(0) (when
2198 asked via an `=' command) if we said it was in DWARF regno 11,
2199 but SDB still prints garbage when asked for the value of the
2200 variable in question (via a `/' command).
2201 (Also note that the labels SDB prints for various FP stack regs
2202 when doing an `x' command are all wrong.)
2203 Note that these problems generally don't affect the native SVR4
2204 C compiler because it doesn't allow the use of -O with -g and
2205 because when it is *not* optimizing, it allocates a memory
2206 location for each floating-point variable, and the memory
2207 location is what gets described in the DWARF AT_location
2208 attribute for the variable in question.
2209 Regardless of the severe mental illness of the x86/svr4 SDB, we
2210 do something sensible here and we use the following DWARF
2211 register numbers. Note that these are all stack-top-relative
2212 numbers.
2213 11 for %st(0) (gcc regno = 8)
2214 12 for %st(1) (gcc regno = 9)
2215 13 for %st(2) (gcc regno = 10)
2216 14 for %st(3) (gcc regno = 11)
2217 15 for %st(4) (gcc regno = 12)
2218 16 for %st(5) (gcc regno = 13)
2219 17 for %st(6) (gcc regno = 14)
2220 18 for %st(7) (gcc regno = 15)
2221 */
2222 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2223 {
2224 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2225 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2226 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2227 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2228 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2229 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2230 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2231 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2232 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2233 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2234 };
2235
2236 /* Define parameter passing and return registers. */
2237
2238 static int const x86_64_int_parameter_registers[6] =
2239 {
2240 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2241 };
2242
2243 static int const x86_64_ms_abi_int_parameter_registers[4] =
2244 {
2245 CX_REG, DX_REG, R8_REG, R9_REG
2246 };
2247
2248 static int const x86_64_int_return_registers[4] =
2249 {
2250 AX_REG, DX_REG, DI_REG, SI_REG
2251 };
2252
2253 /* Additional registers that are clobbered by SYSV calls. */
2254
2255 int const x86_64_ms_sysv_extra_clobbered_registers[12] =
2256 {
2257 SI_REG, DI_REG,
2258 XMM6_REG, XMM7_REG,
2259 XMM8_REG, XMM9_REG, XMM10_REG, XMM11_REG,
2260 XMM12_REG, XMM13_REG, XMM14_REG, XMM15_REG
2261 };
2262
2263 /* Define the structure for the machine field in struct function. */
2264
2265 struct GTY(()) stack_local_entry {
2266 unsigned short mode;
2267 unsigned short n;
2268 rtx rtl;
2269 struct stack_local_entry *next;
2270 };
2271
2272 /* Structure describing stack frame layout.
2273 Stack grows downward:
2274
2275 [arguments]
2276 <- ARG_POINTER
2277 saved pc
2278
2279 saved static chain if ix86_static_chain_on_stack
2280
2281 saved frame pointer if frame_pointer_needed
2282 <- HARD_FRAME_POINTER
2283 [saved regs]
2284 <- regs_save_offset
2285 [padding0]
2286
2287 [saved SSE regs]
2288 <- sse_regs_save_offset
2289 [padding1] |
2290 | <- FRAME_POINTER
2291 [va_arg registers] |
2292 |
2293 [frame] |
2294 |
2295 [padding2] | = to_allocate
2296 <- STACK_POINTER
2297 */
2298 struct ix86_frame
2299 {
2300 int nsseregs;
2301 int nregs;
2302 int va_arg_size;
2303 int red_zone_size;
2304 int outgoing_arguments_size;
2305
2306 /* The offsets relative to ARG_POINTER. */
2307 HOST_WIDE_INT frame_pointer_offset;
2308 HOST_WIDE_INT hard_frame_pointer_offset;
2309 HOST_WIDE_INT stack_pointer_offset;
2310 HOST_WIDE_INT hfp_save_offset;
2311 HOST_WIDE_INT reg_save_offset;
2312 HOST_WIDE_INT sse_reg_save_offset;
2313
2314 /* When save_regs_using_mov is set, emit prologue using
2315 move instead of push instructions. */
2316 bool save_regs_using_mov;
2317 };
2318
2319 /* Which cpu are we scheduling for. */
2320 enum attr_cpu ix86_schedule;
2321
2322 /* Which cpu are we optimizing for. */
2323 enum processor_type ix86_tune;
2324
2325 /* Which instruction set architecture to use. */
2326 enum processor_type ix86_arch;
2327
2328 /* True if processor has SSE prefetch instruction. */
2329 unsigned char x86_prefetch_sse;
2330
2331 /* -mstackrealign option */
2332 static const char ix86_force_align_arg_pointer_string[]
2333 = "force_align_arg_pointer";
2334
2335 static rtx (*ix86_gen_leave) (void);
2336 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2337 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2338 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2339 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2340 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2341 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2342 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2343 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2344 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2345 static rtx (*ix86_gen_tls_global_dynamic_64) (rtx, rtx, rtx);
2346 static rtx (*ix86_gen_tls_local_dynamic_base_64) (rtx, rtx);
2347
2348 /* Preferred alignment for stack boundary in bits. */
2349 unsigned int ix86_preferred_stack_boundary;
2350
2351 /* Alignment for incoming stack boundary in bits specified at
2352 command line. */
2353 static unsigned int ix86_user_incoming_stack_boundary;
2354
2355 /* Default alignment for incoming stack boundary in bits. */
2356 static unsigned int ix86_default_incoming_stack_boundary;
2357
2358 /* Alignment for incoming stack boundary in bits. */
2359 unsigned int ix86_incoming_stack_boundary;
2360
2361 /* Calling abi specific va_list type nodes. */
2362 static GTY(()) tree sysv_va_list_type_node;
2363 static GTY(()) tree ms_va_list_type_node;
2364
2365 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2366 char internal_label_prefix[16];
2367 int internal_label_prefix_len;
2368
2369 /* Fence to use after loop using movnt. */
2370 tree x86_mfence;
2371
2372 /* Register class used for passing given 64bit part of the argument.
2373 These represent classes as documented by the PS ABI, with the exception
2374 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2375 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2376
2377 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2378 whenever possible (upper half does contain padding). */
2379 enum x86_64_reg_class
2380 {
2381 X86_64_NO_CLASS,
2382 X86_64_INTEGER_CLASS,
2383 X86_64_INTEGERSI_CLASS,
2384 X86_64_SSE_CLASS,
2385 X86_64_SSESF_CLASS,
2386 X86_64_SSEDF_CLASS,
2387 X86_64_SSEUP_CLASS,
2388 X86_64_X87_CLASS,
2389 X86_64_X87UP_CLASS,
2390 X86_64_COMPLEX_X87_CLASS,
2391 X86_64_MEMORY_CLASS
2392 };
2393
2394 #define MAX_CLASSES 8
2395
2396 /* Table of constants used by fldpi, fldln2, etc.... */
2397 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2398 static bool ext_80387_constants_init = 0;
2399
2400 \f
2401 static struct machine_function * ix86_init_machine_status (void);
2402 static rtx ix86_function_value (const_tree, const_tree, bool);
2403 static bool ix86_function_value_regno_p (const unsigned int);
2404 static unsigned int ix86_function_arg_boundary (enum machine_mode,
2405 const_tree);
2406 static rtx ix86_static_chain (const_tree, bool);
2407 static int ix86_function_regparm (const_tree, const_tree);
2408 static void ix86_compute_frame_layout (struct ix86_frame *);
2409 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
2410 rtx, rtx, int);
2411 static void ix86_add_new_builtins (HOST_WIDE_INT);
2412 static tree ix86_canonical_va_list_type (tree);
2413 static void predict_jump (int);
2414 static unsigned int split_stack_prologue_scratch_regno (void);
2415 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2416
2417 enum ix86_function_specific_strings
2418 {
2419 IX86_FUNCTION_SPECIFIC_ARCH,
2420 IX86_FUNCTION_SPECIFIC_TUNE,
2421 IX86_FUNCTION_SPECIFIC_MAX
2422 };
2423
2424 static char *ix86_target_string (HOST_WIDE_INT, int, const char *,
2425 const char *, enum fpmath_unit, bool);
2426 static void ix86_function_specific_save (struct cl_target_option *,
2427 struct gcc_options *opts);
2428 static void ix86_function_specific_restore (struct gcc_options *opts,
2429 struct cl_target_option *);
2430 static void ix86_function_specific_print (FILE *, int,
2431 struct cl_target_option *);
2432 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2433 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2434 struct gcc_options *,
2435 struct gcc_options *,
2436 struct gcc_options *);
2437 static bool ix86_can_inline_p (tree, tree);
2438 static void ix86_set_current_function (tree);
2439 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2440
2441 static enum calling_abi ix86_function_abi (const_tree);
2442
2443 \f
2444 #ifndef SUBTARGET32_DEFAULT_CPU
2445 #define SUBTARGET32_DEFAULT_CPU "i386"
2446 #endif
2447
2448 /* Whether -mtune= or -march= were specified */
2449 static int ix86_tune_defaulted;
2450 static int ix86_arch_specified;
2451
2452 /* Vectorization library interface and handlers. */
2453 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2454
2455 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2456 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2457
2458 /* Processor target table, indexed by processor number */
2459 struct ptt
2460 {
2461 const char *const name; /* processor name */
2462 const struct processor_costs *cost; /* Processor costs */
2463 const int align_loop; /* Default alignments. */
2464 const int align_loop_max_skip;
2465 const int align_jump;
2466 const int align_jump_max_skip;
2467 const int align_func;
2468 };
2469
2470 /* This table must be in sync with enum processor_type in i386.h. */
2471 static const struct ptt processor_target_table[PROCESSOR_max] =
2472 {
2473 {"generic", &generic_cost, 16, 10, 16, 10, 16},
2474 {"i386", &i386_cost, 4, 3, 4, 3, 4},
2475 {"i486", &i486_cost, 16, 15, 16, 15, 16},
2476 {"pentium", &pentium_cost, 16, 7, 16, 7, 16},
2477 {"pentiumpro", &pentiumpro_cost, 16, 15, 16, 10, 16},
2478 {"pentium4", &pentium4_cost, 0, 0, 0, 0, 0},
2479 {"nocona", &nocona_cost, 0, 0, 0, 0, 0},
2480 {"core2", &core_cost, 16, 10, 16, 10, 16},
2481 {"nehalem", &core_cost, 16, 10, 16, 10, 16},
2482 {"sandybridge", &core_cost, 16, 10, 16, 10, 16},
2483 {"haswell", &core_cost, 16, 10, 16, 10, 16},
2484 {"bonnell", &atom_cost, 16, 15, 16, 7, 16},
2485 {"silvermont", &slm_cost, 16, 15, 16, 7, 16},
2486 {"intel", &intel_cost, 16, 15, 16, 7, 16},
2487 {"geode", &geode_cost, 0, 0, 0, 0, 0},
2488 {"k6", &k6_cost, 32, 7, 32, 7, 32},
2489 {"athlon", &athlon_cost, 16, 7, 16, 7, 16},
2490 {"k8", &k8_cost, 16, 7, 16, 7, 16},
2491 {"amdfam10", &amdfam10_cost, 32, 24, 32, 7, 32},
2492 {"bdver1", &bdver1_cost, 16, 10, 16, 7, 11},
2493 {"bdver2", &bdver2_cost, 16, 10, 16, 7, 11},
2494 {"bdver3", &bdver3_cost, 16, 10, 16, 7, 11},
2495 {"bdver4", &bdver4_cost, 16, 10, 16, 7, 11},
2496 {"btver1", &btver1_cost, 16, 10, 16, 7, 11},
2497 {"btver2", &btver2_cost, 16, 10, 16, 7, 11}
2498 };
2499 \f
2500 static unsigned int
2501 rest_of_handle_insert_vzeroupper (void)
2502 {
2503 int i;
2504
2505 /* vzeroupper instructions are inserted immediately after reload to
2506 account for possible spills from 256bit registers. The pass
2507 reuses mode switching infrastructure by re-running mode insertion
2508 pass, so disable entities that have already been processed. */
2509 for (i = 0; i < MAX_386_ENTITIES; i++)
2510 ix86_optimize_mode_switching[i] = 0;
2511
2512 ix86_optimize_mode_switching[AVX_U128] = 1;
2513
2514 /* Call optimize_mode_switching. */
2515 g->get_passes ()->execute_pass_mode_switching ();
2516 return 0;
2517 }
2518
2519 namespace {
2520
2521 const pass_data pass_data_insert_vzeroupper =
2522 {
2523 RTL_PASS, /* type */
2524 "vzeroupper", /* name */
2525 OPTGROUP_NONE, /* optinfo_flags */
2526 TV_NONE, /* tv_id */
2527 0, /* properties_required */
2528 0, /* properties_provided */
2529 0, /* properties_destroyed */
2530 0, /* todo_flags_start */
2531 TODO_df_finish, /* todo_flags_finish */
2532 };
2533
2534 class pass_insert_vzeroupper : public rtl_opt_pass
2535 {
2536 public:
2537 pass_insert_vzeroupper(gcc::context *ctxt)
2538 : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
2539 {}
2540
2541 /* opt_pass methods: */
2542 virtual bool gate (function *)
2543 {
2544 return TARGET_AVX && !TARGET_AVX512F && TARGET_VZEROUPPER;
2545 }
2546
2547 virtual unsigned int execute (function *)
2548 {
2549 return rest_of_handle_insert_vzeroupper ();
2550 }
2551
2552 }; // class pass_insert_vzeroupper
2553
2554 } // anon namespace
2555
2556 rtl_opt_pass *
2557 make_pass_insert_vzeroupper (gcc::context *ctxt)
2558 {
2559 return new pass_insert_vzeroupper (ctxt);
2560 }
2561
2562 /* Return true if a red-zone is in use. */
2563
2564 static inline bool
2565 ix86_using_red_zone (void)
2566 {
2567 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2568 }
2569 \f
2570 /* Return a string that documents the current -m options. The caller is
2571 responsible for freeing the string. */
2572
2573 static char *
2574 ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
2575 const char *tune, enum fpmath_unit fpmath,
2576 bool add_nl_p)
2577 {
2578 struct ix86_target_opts
2579 {
2580 const char *option; /* option string */
2581 HOST_WIDE_INT mask; /* isa mask options */
2582 };
2583
2584 /* This table is ordered so that options like -msse4.2 that imply
2585 preceding options while match those first. */
2586 static struct ix86_target_opts isa_opts[] =
2587 {
2588 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2589 { "-mfma", OPTION_MASK_ISA_FMA },
2590 { "-mxop", OPTION_MASK_ISA_XOP },
2591 { "-mlwp", OPTION_MASK_ISA_LWP },
2592 { "-mavx512f", OPTION_MASK_ISA_AVX512F },
2593 { "-mavx512er", OPTION_MASK_ISA_AVX512ER },
2594 { "-mavx512cd", OPTION_MASK_ISA_AVX512CD },
2595 { "-mavx512pf", OPTION_MASK_ISA_AVX512PF },
2596 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2597 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2598 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2599 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2600 { "-msse3", OPTION_MASK_ISA_SSE3 },
2601 { "-msse2", OPTION_MASK_ISA_SSE2 },
2602 { "-msse", OPTION_MASK_ISA_SSE },
2603 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2604 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2605 { "-mmmx", OPTION_MASK_ISA_MMX },
2606 { "-mabm", OPTION_MASK_ISA_ABM },
2607 { "-mbmi", OPTION_MASK_ISA_BMI },
2608 { "-mbmi2", OPTION_MASK_ISA_BMI2 },
2609 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
2610 { "-mhle", OPTION_MASK_ISA_HLE },
2611 { "-mfxsr", OPTION_MASK_ISA_FXSR },
2612 { "-mrdseed", OPTION_MASK_ISA_RDSEED },
2613 { "-mprfchw", OPTION_MASK_ISA_PRFCHW },
2614 { "-madx", OPTION_MASK_ISA_ADX },
2615 { "-mtbm", OPTION_MASK_ISA_TBM },
2616 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2617 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2618 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2619 { "-maes", OPTION_MASK_ISA_AES },
2620 { "-msha", OPTION_MASK_ISA_SHA },
2621 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2622 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2623 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2624 { "-mf16c", OPTION_MASK_ISA_F16C },
2625 { "-mrtm", OPTION_MASK_ISA_RTM },
2626 { "-mxsave", OPTION_MASK_ISA_XSAVE },
2627 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT },
2628 { "-mprefetchwt1", OPTION_MASK_ISA_PREFETCHWT1 },
2629 { "-mclflushopt", OPTION_MASK_ISA_CLFLUSHOPT },
2630 { "-mxsavec", OPTION_MASK_ISA_XSAVEC },
2631 { "-mxsaves", OPTION_MASK_ISA_XSAVES },
2632 };
2633
2634 /* Flag options. */
2635 static struct ix86_target_opts flag_opts[] =
2636 {
2637 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2638 { "-mlong-double-128", MASK_LONG_DOUBLE_128 },
2639 { "-mlong-double-64", MASK_LONG_DOUBLE_64 },
2640 { "-m80387", MASK_80387 },
2641 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2642 { "-malign-double", MASK_ALIGN_DOUBLE },
2643 { "-mcld", MASK_CLD },
2644 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2645 { "-mieee-fp", MASK_IEEE_FP },
2646 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2647 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2648 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2649 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2650 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2651 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2652 { "-mno-red-zone", MASK_NO_RED_ZONE },
2653 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2654 { "-mrecip", MASK_RECIP },
2655 { "-mrtd", MASK_RTD },
2656 { "-msseregparm", MASK_SSEREGPARM },
2657 { "-mstack-arg-probe", MASK_STACK_PROBE },
2658 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2659 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2660 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2661 { "-mvzeroupper", MASK_VZEROUPPER },
2662 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
2663 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
2664 { "-mprefer-avx128", MASK_PREFER_AVX128},
2665 };
2666
2667 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2668
2669 char isa_other[40];
2670 char target_other[40];
2671 unsigned num = 0;
2672 unsigned i, j;
2673 char *ret;
2674 char *ptr;
2675 size_t len;
2676 size_t line_len;
2677 size_t sep_len;
2678 const char *abi;
2679
2680 memset (opts, '\0', sizeof (opts));
2681
2682 /* Add -march= option. */
2683 if (arch)
2684 {
2685 opts[num][0] = "-march=";
2686 opts[num++][1] = arch;
2687 }
2688
2689 /* Add -mtune= option. */
2690 if (tune)
2691 {
2692 opts[num][0] = "-mtune=";
2693 opts[num++][1] = tune;
2694 }
2695
2696 /* Add -m32/-m64/-mx32. */
2697 if ((isa & OPTION_MASK_ISA_64BIT) != 0)
2698 {
2699 if ((isa & OPTION_MASK_ABI_64) != 0)
2700 abi = "-m64";
2701 else
2702 abi = "-mx32";
2703 isa &= ~ (OPTION_MASK_ISA_64BIT
2704 | OPTION_MASK_ABI_64
2705 | OPTION_MASK_ABI_X32);
2706 }
2707 else
2708 abi = "-m32";
2709 opts[num++][0] = abi;
2710
2711 /* Pick out the options in isa options. */
2712 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2713 {
2714 if ((isa & isa_opts[i].mask) != 0)
2715 {
2716 opts[num++][0] = isa_opts[i].option;
2717 isa &= ~ isa_opts[i].mask;
2718 }
2719 }
2720
2721 if (isa && add_nl_p)
2722 {
2723 opts[num++][0] = isa_other;
2724 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)",
2725 isa);
2726 }
2727
2728 /* Add flag options. */
2729 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2730 {
2731 if ((flags & flag_opts[i].mask) != 0)
2732 {
2733 opts[num++][0] = flag_opts[i].option;
2734 flags &= ~ flag_opts[i].mask;
2735 }
2736 }
2737
2738 if (flags && add_nl_p)
2739 {
2740 opts[num++][0] = target_other;
2741 sprintf (target_other, "(other flags: %#x)", flags);
2742 }
2743
2744 /* Add -fpmath= option. */
2745 if (fpmath)
2746 {
2747 opts[num][0] = "-mfpmath=";
2748 switch ((int) fpmath)
2749 {
2750 case FPMATH_387:
2751 opts[num++][1] = "387";
2752 break;
2753
2754 case FPMATH_SSE:
2755 opts[num++][1] = "sse";
2756 break;
2757
2758 case FPMATH_387 | FPMATH_SSE:
2759 opts[num++][1] = "sse+387";
2760 break;
2761
2762 default:
2763 gcc_unreachable ();
2764 }
2765 }
2766
2767 /* Any options? */
2768 if (num == 0)
2769 return NULL;
2770
2771 gcc_assert (num < ARRAY_SIZE (opts));
2772
2773 /* Size the string. */
2774 len = 0;
2775 sep_len = (add_nl_p) ? 3 : 1;
2776 for (i = 0; i < num; i++)
2777 {
2778 len += sep_len;
2779 for (j = 0; j < 2; j++)
2780 if (opts[i][j])
2781 len += strlen (opts[i][j]);
2782 }
2783
2784 /* Build the string. */
2785 ret = ptr = (char *) xmalloc (len);
2786 line_len = 0;
2787
2788 for (i = 0; i < num; i++)
2789 {
2790 size_t len2[2];
2791
2792 for (j = 0; j < 2; j++)
2793 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2794
2795 if (i != 0)
2796 {
2797 *ptr++ = ' ';
2798 line_len++;
2799
2800 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2801 {
2802 *ptr++ = '\\';
2803 *ptr++ = '\n';
2804 line_len = 0;
2805 }
2806 }
2807
2808 for (j = 0; j < 2; j++)
2809 if (opts[i][j])
2810 {
2811 memcpy (ptr, opts[i][j], len2[j]);
2812 ptr += len2[j];
2813 line_len += len2[j];
2814 }
2815 }
2816
2817 *ptr = '\0';
2818 gcc_assert (ret + len >= ptr);
2819
2820 return ret;
2821 }
2822
2823 /* Return true, if profiling code should be emitted before
2824 prologue. Otherwise it returns false.
2825 Note: For x86 with "hotfix" it is sorried. */
2826 static bool
2827 ix86_profile_before_prologue (void)
2828 {
2829 return flag_fentry != 0;
2830 }
2831
2832 /* Function that is callable from the debugger to print the current
2833 options. */
2834 void ATTRIBUTE_UNUSED
2835 ix86_debug_options (void)
2836 {
2837 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2838 ix86_arch_string, ix86_tune_string,
2839 ix86_fpmath, true);
2840
2841 if (opts)
2842 {
2843 fprintf (stderr, "%s\n\n", opts);
2844 free (opts);
2845 }
2846 else
2847 fputs ("<no options>\n\n", stderr);
2848
2849 return;
2850 }
2851
2852 static const char *stringop_alg_names[] = {
2853 #define DEF_ENUM
2854 #define DEF_ALG(alg, name) #name,
2855 #include "stringop.def"
2856 #undef DEF_ENUM
2857 #undef DEF_ALG
2858 };
2859
2860 /* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=.
2861 The string is of the following form (or comma separated list of it):
2862
2863 strategy_alg:max_size:[align|noalign]
2864
2865 where the full size range for the strategy is either [0, max_size] or
2866 [min_size, max_size], in which min_size is the max_size + 1 of the
2867 preceding range. The last size range must have max_size == -1.
2868
2869 Examples:
2870
2871 1.
2872 -mmemcpy-strategy=libcall:-1:noalign
2873
2874 this is equivalent to (for known size memcpy) -mstringop-strategy=libcall
2875
2876
2877 2.
2878 -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign
2879
2880 This is to tell the compiler to use the following strategy for memset
2881 1) when the expected size is between [1, 16], use rep_8byte strategy;
2882 2) when the size is between [17, 2048], use vector_loop;
2883 3) when the size is > 2048, use libcall. */
2884
2885 struct stringop_size_range
2886 {
2887 int max;
2888 stringop_alg alg;
2889 bool noalign;
2890 };
2891
2892 static void
2893 ix86_parse_stringop_strategy_string (char *strategy_str, bool is_memset)
2894 {
2895 const struct stringop_algs *default_algs;
2896 stringop_size_range input_ranges[MAX_STRINGOP_ALGS];
2897 char *curr_range_str, *next_range_str;
2898 int i = 0, n = 0;
2899
2900 if (is_memset)
2901 default_algs = &ix86_cost->memset[TARGET_64BIT != 0];
2902 else
2903 default_algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
2904
2905 curr_range_str = strategy_str;
2906
2907 do
2908 {
2909 int maxs;
2910 char alg_name[128];
2911 char align[16];
2912 next_range_str = strchr (curr_range_str, ',');
2913 if (next_range_str)
2914 *next_range_str++ = '\0';
2915
2916 if (3 != sscanf (curr_range_str, "%20[^:]:%d:%10s",
2917 alg_name, &maxs, align))
2918 {
2919 error ("wrong arg %s to option %s", curr_range_str,
2920 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2921 return;
2922 }
2923
2924 if (n > 0 && (maxs < (input_ranges[n - 1].max + 1) && maxs != -1))
2925 {
2926 error ("size ranges of option %s should be increasing",
2927 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2928 return;
2929 }
2930
2931 for (i = 0; i < last_alg; i++)
2932 if (!strcmp (alg_name, stringop_alg_names[i]))
2933 break;
2934
2935 if (i == last_alg)
2936 {
2937 error ("wrong stringop strategy name %s specified for option %s",
2938 alg_name,
2939 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2940 return;
2941 }
2942
2943 input_ranges[n].max = maxs;
2944 input_ranges[n].alg = (stringop_alg) i;
2945 if (!strcmp (align, "align"))
2946 input_ranges[n].noalign = false;
2947 else if (!strcmp (align, "noalign"))
2948 input_ranges[n].noalign = true;
2949 else
2950 {
2951 error ("unknown alignment %s specified for option %s",
2952 align, is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2953 return;
2954 }
2955 n++;
2956 curr_range_str = next_range_str;
2957 }
2958 while (curr_range_str);
2959
2960 if (input_ranges[n - 1].max != -1)
2961 {
2962 error ("the max value for the last size range should be -1"
2963 " for option %s",
2964 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2965 return;
2966 }
2967
2968 if (n > MAX_STRINGOP_ALGS)
2969 {
2970 error ("too many size ranges specified in option %s",
2971 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2972 return;
2973 }
2974
2975 /* Now override the default algs array. */
2976 for (i = 0; i < n; i++)
2977 {
2978 *const_cast<int *>(&default_algs->size[i].max) = input_ranges[i].max;
2979 *const_cast<stringop_alg *>(&default_algs->size[i].alg)
2980 = input_ranges[i].alg;
2981 *const_cast<int *>(&default_algs->size[i].noalign)
2982 = input_ranges[i].noalign;
2983 }
2984 }
2985
2986 \f
2987 /* parse -mtune-ctrl= option. When DUMP is true,
2988 print the features that are explicitly set. */
2989
2990 static void
2991 parse_mtune_ctrl_str (bool dump)
2992 {
2993 if (!ix86_tune_ctrl_string)
2994 return;
2995
2996 char *next_feature_string = NULL;
2997 char *curr_feature_string = xstrdup (ix86_tune_ctrl_string);
2998 char *orig = curr_feature_string;
2999 int i;
3000 do
3001 {
3002 bool clear = false;
3003
3004 next_feature_string = strchr (curr_feature_string, ',');
3005 if (next_feature_string)
3006 *next_feature_string++ = '\0';
3007 if (*curr_feature_string == '^')
3008 {
3009 curr_feature_string++;
3010 clear = true;
3011 }
3012 for (i = 0; i < X86_TUNE_LAST; i++)
3013 {
3014 if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
3015 {
3016 ix86_tune_features[i] = !clear;
3017 if (dump)
3018 fprintf (stderr, "Explicitly %s feature %s\n",
3019 clear ? "clear" : "set", ix86_tune_feature_names[i]);
3020 break;
3021 }
3022 }
3023 if (i == X86_TUNE_LAST)
3024 error ("Unknown parameter to option -mtune-ctrl: %s",
3025 clear ? curr_feature_string - 1 : curr_feature_string);
3026 curr_feature_string = next_feature_string;
3027 }
3028 while (curr_feature_string);
3029 free (orig);
3030 }
3031
3032 /* Helper function to set ix86_tune_features. IX86_TUNE is the
3033 processor type. */
3034
3035 static void
3036 set_ix86_tune_features (enum processor_type ix86_tune, bool dump)
3037 {
3038 unsigned int ix86_tune_mask = 1u << ix86_tune;
3039 int i;
3040
3041 for (i = 0; i < X86_TUNE_LAST; ++i)
3042 {
3043 if (ix86_tune_no_default)
3044 ix86_tune_features[i] = 0;
3045 else
3046 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3047 }
3048
3049 if (dump)
3050 {
3051 fprintf (stderr, "List of x86 specific tuning parameter names:\n");
3052 for (i = 0; i < X86_TUNE_LAST; i++)
3053 fprintf (stderr, "%s : %s\n", ix86_tune_feature_names[i],
3054 ix86_tune_features[i] ? "on" : "off");
3055 }
3056
3057 parse_mtune_ctrl_str (dump);
3058 }
3059
3060
3061 /* Override various settings based on options. If MAIN_ARGS_P, the
3062 options are from the command line, otherwise they are from
3063 attributes. */
3064
3065 static void
3066 ix86_option_override_internal (bool main_args_p,
3067 struct gcc_options *opts,
3068 struct gcc_options *opts_set)
3069 {
3070 int i;
3071 unsigned int ix86_arch_mask;
3072 const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL);
3073 const char *prefix;
3074 const char *suffix;
3075 const char *sw;
3076
3077 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
3078 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
3079 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
3080 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
3081 #define PTA_AES (HOST_WIDE_INT_1 << 4)
3082 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
3083 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
3084 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
3085 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
3086 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
3087 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
3088 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
3089 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
3090 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
3091 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
3092 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
3093 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
3094 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
3095 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
3096 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
3097 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
3098 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
3099 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
3100 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
3101 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
3102 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
3103 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
3104 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
3105 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
3106 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
3107 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
3108 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
3109 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
3110 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
3111 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
3112 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
3113 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
3114 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
3115 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
3116 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
3117 #define PTA_AVX512F (HOST_WIDE_INT_1 << 40)
3118 #define PTA_AVX512ER (HOST_WIDE_INT_1 << 41)
3119 #define PTA_AVX512PF (HOST_WIDE_INT_1 << 42)
3120 #define PTA_AVX512CD (HOST_WIDE_INT_1 << 43)
3121 #define PTA_SHA (HOST_WIDE_INT_1 << 45)
3122 #define PTA_PREFETCHWT1 (HOST_WIDE_INT_1 << 46)
3123 #define PTA_CLFLUSHOPT (HOST_WIDE_INT_1 << 47)
3124 #define PTA_XSAVEC (HOST_WIDE_INT_1 << 48)
3125 #define PTA_XSAVES (HOST_WIDE_INT_1 << 49)
3126
3127 #define PTA_CORE2 \
3128 (PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \
3129 | PTA_CX16 | PTA_FXSR)
3130 #define PTA_NEHALEM \
3131 (PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_POPCNT)
3132 #define PTA_WESTMERE \
3133 (PTA_NEHALEM | PTA_AES | PTA_PCLMUL)
3134 #define PTA_SANDYBRIDGE \
3135 (PTA_WESTMERE | PTA_AVX | PTA_XSAVE | PTA_XSAVEOPT)
3136 #define PTA_IVYBRIDGE \
3137 (PTA_SANDYBRIDGE | PTA_FSGSBASE | PTA_RDRND | PTA_F16C)
3138 #define PTA_HASWELL \
3139 (PTA_IVYBRIDGE | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_LZCNT \
3140 | PTA_FMA | PTA_MOVBE | PTA_HLE)
3141 #define PTA_BROADWELL \
3142 (PTA_HASWELL | PTA_ADX | PTA_PRFCHW | PTA_RDSEED)
3143 #define PTA_BONNELL \
3144 (PTA_CORE2 | PTA_MOVBE)
3145 #define PTA_SILVERMONT \
3146 (PTA_WESTMERE | PTA_MOVBE)
3147
3148 /* if this reaches 64, need to widen struct pta flags below */
3149
3150 static struct pta
3151 {
3152 const char *const name; /* processor name or nickname. */
3153 const enum processor_type processor;
3154 const enum attr_cpu schedule;
3155 const unsigned HOST_WIDE_INT flags;
3156 }
3157 const processor_alias_table[] =
3158 {
3159 {"i386", PROCESSOR_I386, CPU_NONE, 0},
3160 {"i486", PROCESSOR_I486, CPU_NONE, 0},
3161 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3162 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3163 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
3164 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
3165 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3166 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3167 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3168 PTA_MMX | PTA_SSE | PTA_FXSR},
3169 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3170 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3171 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_FXSR},
3172 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3173 PTA_MMX | PTA_SSE | PTA_FXSR},
3174 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3175 PTA_MMX | PTA_SSE | PTA_FXSR},
3176 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3177 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3178 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
3179 PTA_MMX |PTA_SSE | PTA_SSE2 | PTA_FXSR},
3180 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
3181 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3182 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
3183 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3184 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
3185 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3186 | PTA_CX16 | PTA_NO_SAHF | PTA_FXSR},
3187 {"core2", PROCESSOR_CORE2, CPU_CORE2, PTA_CORE2},
3188 {"nehalem", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3189 {"corei7", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3190 {"westmere", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_WESTMERE},
3191 {"sandybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3192 PTA_SANDYBRIDGE},
3193 {"corei7-avx", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3194 PTA_SANDYBRIDGE},
3195 {"ivybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3196 PTA_IVYBRIDGE},
3197 {"core-avx-i", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3198 PTA_IVYBRIDGE},
3199 {"haswell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3200 {"core-avx2", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3201 {"broadwell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_BROADWELL},
3202 {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3203 {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3204 {"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3205 {"slm", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3206 {"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM},
3207 {"geode", PROCESSOR_GEODE, CPU_GEODE,
3208 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3209 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
3210 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3211 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3212 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3213 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3214 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3215 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3216 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3217 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3218 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3219 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3220 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3221 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3222 {"x86-64", PROCESSOR_K8, CPU_K8,
3223 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3224 {"k8", PROCESSOR_K8, CPU_K8,
3225 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3226 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3227 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3228 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3229 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3230 {"opteron", PROCESSOR_K8, CPU_K8,
3231 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3232 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3233 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3234 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3235 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3236 {"athlon64", PROCESSOR_K8, CPU_K8,
3237 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3238 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3239 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3240 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3241 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3242 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3243 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3244 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3245 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3246 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3247 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3248 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3249 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3250 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3251 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3252 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3253 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3254 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3255 | PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3256 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
3257 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3258 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3259 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3260 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3261 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3262 {"bdver3", PROCESSOR_BDVER3, CPU_BDVER3,
3263 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3264 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3265 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3266 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3267 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE
3268 | PTA_XSAVEOPT | PTA_FSGSBASE},
3269 {"bdver4", PROCESSOR_BDVER4, CPU_BDVER4,
3270 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3271 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3272 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
3273 | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2
3274 | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR
3275 | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE | PTA_RDRND
3276 | PTA_MOVBE},
3277 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
3278 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3279 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW
3280 | PTA_FXSR | PTA_XSAVE},
3281 {"btver2", PROCESSOR_BTVER2, CPU_BTVER2,
3282 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3283 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_SSE4_1
3284 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3285 | PTA_BMI | PTA_F16C | PTA_MOVBE | PTA_PRFCHW
3286 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT},
3287
3288 {"generic", PROCESSOR_GENERIC, CPU_GENERIC,
3289 PTA_64BIT
3290 | PTA_HLE /* flags are only used for -march switch. */ },
3291 };
3292
3293 /* -mrecip options. */
3294 static struct
3295 {
3296 const char *string; /* option name */
3297 unsigned int mask; /* mask bits to set */
3298 }
3299 const recip_options[] =
3300 {
3301 { "all", RECIP_MASK_ALL },
3302 { "none", RECIP_MASK_NONE },
3303 { "div", RECIP_MASK_DIV },
3304 { "sqrt", RECIP_MASK_SQRT },
3305 { "vec-div", RECIP_MASK_VEC_DIV },
3306 { "vec-sqrt", RECIP_MASK_VEC_SQRT },
3307 };
3308
3309 int const pta_size = ARRAY_SIZE (processor_alias_table);
3310
3311 /* Set up prefix/suffix so the error messages refer to either the command
3312 line argument, or the attribute(target). */
3313 if (main_args_p)
3314 {
3315 prefix = "-m";
3316 suffix = "";
3317 sw = "switch";
3318 }
3319 else
3320 {
3321 prefix = "option(\"";
3322 suffix = "\")";
3323 sw = "attribute";
3324 }
3325
3326 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3327 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3328 if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3329 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ABI_64 | OPTION_MASK_ABI_X32);
3330 #ifdef TARGET_BI_ARCH
3331 else
3332 {
3333 #if TARGET_BI_ARCH == 1
3334 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3335 is on and OPTION_MASK_ABI_X32 is off. We turn off
3336 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3337 -mx32. */
3338 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3339 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3340 #else
3341 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3342 on and OPTION_MASK_ABI_64 is off. We turn off
3343 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3344 -m64. */
3345 if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3346 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3347 #endif
3348 }
3349 #endif
3350
3351 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3352 {
3353 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3354 OPTION_MASK_ABI_64 for TARGET_X32. */
3355 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3356 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3357 }
3358 else if (TARGET_16BIT_P (opts->x_ix86_isa_flags))
3359 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ISA_64BIT
3360 | OPTION_MASK_ABI_X32
3361 | OPTION_MASK_ABI_64);
3362 else if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3363 {
3364 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3365 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3366 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3367 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3368 }
3369
3370 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3371 SUBTARGET_OVERRIDE_OPTIONS;
3372 #endif
3373
3374 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3375 SUBSUBTARGET_OVERRIDE_OPTIONS;
3376 #endif
3377
3378 /* -fPIC is the default for x86_64. */
3379 if (TARGET_MACHO && TARGET_64BIT_P (opts->x_ix86_isa_flags))
3380 opts->x_flag_pic = 2;
3381
3382 /* Need to check -mtune=generic first. */
3383 if (opts->x_ix86_tune_string)
3384 {
3385 /* As special support for cross compilers we read -mtune=native
3386 as -mtune=generic. With native compilers we won't see the
3387 -mtune=native, as it was changed by the driver. */
3388 if (!strcmp (opts->x_ix86_tune_string, "native"))
3389 {
3390 opts->x_ix86_tune_string = "generic";
3391 }
3392 else if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3393 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3394 "%stune=k8%s or %stune=generic%s instead as appropriate",
3395 prefix, suffix, prefix, suffix, prefix, suffix);
3396 }
3397 else
3398 {
3399 if (opts->x_ix86_arch_string)
3400 opts->x_ix86_tune_string = opts->x_ix86_arch_string;
3401 if (!opts->x_ix86_tune_string)
3402 {
3403 opts->x_ix86_tune_string
3404 = processor_target_table[TARGET_CPU_DEFAULT].name;
3405 ix86_tune_defaulted = 1;
3406 }
3407
3408 /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string
3409 or defaulted. We need to use a sensible tune option. */
3410 if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3411 {
3412 opts->x_ix86_tune_string = "generic";
3413 }
3414 }
3415
3416 if (opts->x_ix86_stringop_alg == rep_prefix_8_byte
3417 && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3418 {
3419 /* rep; movq isn't available in 32-bit code. */
3420 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3421 opts->x_ix86_stringop_alg = no_stringop;
3422 }
3423
3424 if (!opts->x_ix86_arch_string)
3425 opts->x_ix86_arch_string
3426 = TARGET_64BIT_P (opts->x_ix86_isa_flags)
3427 ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3428 else
3429 ix86_arch_specified = 1;
3430
3431 if (opts_set->x_ix86_pmode)
3432 {
3433 if ((TARGET_LP64_P (opts->x_ix86_isa_flags)
3434 && opts->x_ix86_pmode == PMODE_SI)
3435 || (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
3436 && opts->x_ix86_pmode == PMODE_DI))
3437 error ("address mode %qs not supported in the %s bit mode",
3438 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "short" : "long",
3439 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "64" : "32");
3440 }
3441 else
3442 opts->x_ix86_pmode = TARGET_LP64_P (opts->x_ix86_isa_flags)
3443 ? PMODE_DI : PMODE_SI;
3444
3445 if (!opts_set->x_ix86_abi)
3446 opts->x_ix86_abi = DEFAULT_ABI;
3447
3448 /* For targets using ms ABI enable ms-extensions, if not
3449 explicit turned off. For non-ms ABI we turn off this
3450 option. */
3451 if (!opts_set->x_flag_ms_extensions)
3452 opts->x_flag_ms_extensions = (MS_ABI == DEFAULT_ABI);
3453
3454 if (opts_set->x_ix86_cmodel)
3455 {
3456 switch (opts->x_ix86_cmodel)
3457 {
3458 case CM_SMALL:
3459 case CM_SMALL_PIC:
3460 if (opts->x_flag_pic)
3461 opts->x_ix86_cmodel = CM_SMALL_PIC;
3462 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3463 error ("code model %qs not supported in the %s bit mode",
3464 "small", "32");
3465 break;
3466
3467 case CM_MEDIUM:
3468 case CM_MEDIUM_PIC:
3469 if (opts->x_flag_pic)
3470 opts->x_ix86_cmodel = CM_MEDIUM_PIC;
3471 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3472 error ("code model %qs not supported in the %s bit mode",
3473 "medium", "32");
3474 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3475 error ("code model %qs not supported in x32 mode",
3476 "medium");
3477 break;
3478
3479 case CM_LARGE:
3480 case CM_LARGE_PIC:
3481 if (opts->x_flag_pic)
3482 opts->x_ix86_cmodel = CM_LARGE_PIC;
3483 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3484 error ("code model %qs not supported in the %s bit mode",
3485 "large", "32");
3486 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3487 error ("code model %qs not supported in x32 mode",
3488 "large");
3489 break;
3490
3491 case CM_32:
3492 if (opts->x_flag_pic)
3493 error ("code model %s does not support PIC mode", "32");
3494 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3495 error ("code model %qs not supported in the %s bit mode",
3496 "32", "64");
3497 break;
3498
3499 case CM_KERNEL:
3500 if (opts->x_flag_pic)
3501 {
3502 error ("code model %s does not support PIC mode", "kernel");
3503 opts->x_ix86_cmodel = CM_32;
3504 }
3505 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3506 error ("code model %qs not supported in the %s bit mode",
3507 "kernel", "32");
3508 break;
3509
3510 default:
3511 gcc_unreachable ();
3512 }
3513 }
3514 else
3515 {
3516 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3517 use of rip-relative addressing. This eliminates fixups that
3518 would otherwise be needed if this object is to be placed in a
3519 DLL, and is essentially just as efficient as direct addressing. */
3520 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3521 && (TARGET_RDOS || TARGET_PECOFF))
3522 opts->x_ix86_cmodel = CM_MEDIUM_PIC, opts->x_flag_pic = 1;
3523 else if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3524 opts->x_ix86_cmodel = opts->x_flag_pic ? CM_SMALL_PIC : CM_SMALL;
3525 else
3526 opts->x_ix86_cmodel = CM_32;
3527 }
3528 if (TARGET_MACHO && opts->x_ix86_asm_dialect == ASM_INTEL)
3529 {
3530 error ("-masm=intel not supported in this configuration");
3531 opts->x_ix86_asm_dialect = ASM_ATT;
3532 }
3533 if ((TARGET_64BIT_P (opts->x_ix86_isa_flags) != 0)
3534 != ((opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3535 sorry ("%i-bit mode not compiled in",
3536 (opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3537
3538 for (i = 0; i < pta_size; i++)
3539 if (! strcmp (opts->x_ix86_arch_string, processor_alias_table[i].name))
3540 {
3541 ix86_schedule = processor_alias_table[i].schedule;
3542 ix86_arch = processor_alias_table[i].processor;
3543 /* Default cpu tuning to the architecture. */
3544 ix86_tune = ix86_arch;
3545
3546 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3547 && !(processor_alias_table[i].flags & PTA_64BIT))
3548 error ("CPU you selected does not support x86-64 "
3549 "instruction set");
3550
3551 if (processor_alias_table[i].flags & PTA_MMX
3552 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3553 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3554 if (processor_alias_table[i].flags & PTA_3DNOW
3555 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3556 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3557 if (processor_alias_table[i].flags & PTA_3DNOW_A
3558 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3559 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3560 if (processor_alias_table[i].flags & PTA_SSE
3561 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3562 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3563 if (processor_alias_table[i].flags & PTA_SSE2
3564 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3565 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3566 if (processor_alias_table[i].flags & PTA_SSE3
3567 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3568 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3569 if (processor_alias_table[i].flags & PTA_SSSE3
3570 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3571 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3572 if (processor_alias_table[i].flags & PTA_SSE4_1
3573 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3574 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3575 if (processor_alias_table[i].flags & PTA_SSE4_2
3576 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3577 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3578 if (processor_alias_table[i].flags & PTA_AVX
3579 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3580 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3581 if (processor_alias_table[i].flags & PTA_AVX2
3582 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
3583 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
3584 if (processor_alias_table[i].flags & PTA_FMA
3585 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3586 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3587 if (processor_alias_table[i].flags & PTA_SSE4A
3588 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3589 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3590 if (processor_alias_table[i].flags & PTA_FMA4
3591 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3592 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3593 if (processor_alias_table[i].flags & PTA_XOP
3594 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3595 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3596 if (processor_alias_table[i].flags & PTA_LWP
3597 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3598 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3599 if (processor_alias_table[i].flags & PTA_ABM
3600 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3601 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3602 if (processor_alias_table[i].flags & PTA_BMI
3603 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3604 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3605 if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
3606 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
3607 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
3608 if (processor_alias_table[i].flags & PTA_TBM
3609 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3610 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3611 if (processor_alias_table[i].flags & PTA_BMI2
3612 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
3613 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
3614 if (processor_alias_table[i].flags & PTA_CX16
3615 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3616 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3617 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3618 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3619 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3620 if (!(TARGET_64BIT_P (opts->x_ix86_isa_flags)
3621 && (processor_alias_table[i].flags & PTA_NO_SAHF))
3622 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3623 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3624 if (processor_alias_table[i].flags & PTA_MOVBE
3625 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3626 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3627 if (processor_alias_table[i].flags & PTA_AES
3628 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3629 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3630 if (processor_alias_table[i].flags & PTA_SHA
3631 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA))
3632 ix86_isa_flags |= OPTION_MASK_ISA_SHA;
3633 if (processor_alias_table[i].flags & PTA_PCLMUL
3634 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3635 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3636 if (processor_alias_table[i].flags & PTA_FSGSBASE
3637 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3638 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3639 if (processor_alias_table[i].flags & PTA_RDRND
3640 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3641 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3642 if (processor_alias_table[i].flags & PTA_F16C
3643 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3644 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3645 if (processor_alias_table[i].flags & PTA_RTM
3646 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RTM))
3647 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RTM;
3648 if (processor_alias_table[i].flags & PTA_HLE
3649 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_HLE))
3650 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_HLE;
3651 if (processor_alias_table[i].flags & PTA_PRFCHW
3652 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PRFCHW))
3653 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW;
3654 if (processor_alias_table[i].flags & PTA_RDSEED
3655 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDSEED))
3656 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDSEED;
3657 if (processor_alias_table[i].flags & PTA_ADX
3658 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ADX))
3659 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ADX;
3660 if (processor_alias_table[i].flags & PTA_FXSR
3661 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FXSR))
3662 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FXSR;
3663 if (processor_alias_table[i].flags & PTA_XSAVE
3664 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVE))
3665 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVE;
3666 if (processor_alias_table[i].flags & PTA_XSAVEOPT
3667 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT))
3668 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT;
3669 if (processor_alias_table[i].flags & PTA_AVX512F
3670 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F))
3671 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F;
3672 if (processor_alias_table[i].flags & PTA_AVX512ER
3673 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512ER))
3674 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER;
3675 if (processor_alias_table[i].flags & PTA_AVX512PF
3676 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512PF))
3677 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF;
3678 if (processor_alias_table[i].flags & PTA_AVX512CD
3679 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512CD))
3680 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD;
3681 if (processor_alias_table[i].flags & PTA_PREFETCHWT1
3682 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PREFETCHWT1))
3683 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PREFETCHWT1;
3684 if (processor_alias_table[i].flags & PTA_CLFLUSHOPT
3685 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLFLUSHOPT))
3686 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLFLUSHOPT;
3687 if (processor_alias_table[i].flags & PTA_XSAVEC
3688 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEC))
3689 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEC;
3690 if (processor_alias_table[i].flags & PTA_XSAVES
3691 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVES))
3692 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVES;
3693 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3694 x86_prefetch_sse = true;
3695
3696 break;
3697 }
3698
3699 if (!strcmp (opts->x_ix86_arch_string, "generic"))
3700 error ("generic CPU can be used only for %stune=%s %s",
3701 prefix, suffix, sw);
3702 else if (!strcmp (opts->x_ix86_arch_string, "intel"))
3703 error ("intel CPU can be used only for %stune=%s %s",
3704 prefix, suffix, sw);
3705 else if (i == pta_size)
3706 error ("bad value (%s) for %sarch=%s %s",
3707 opts->x_ix86_arch_string, prefix, suffix, sw);
3708
3709 ix86_arch_mask = 1u << ix86_arch;
3710 for (i = 0; i < X86_ARCH_LAST; ++i)
3711 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3712
3713 for (i = 0; i < pta_size; i++)
3714 if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name))
3715 {
3716 ix86_schedule = processor_alias_table[i].schedule;
3717 ix86_tune = processor_alias_table[i].processor;
3718 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3719 {
3720 if (!(processor_alias_table[i].flags & PTA_64BIT))
3721 {
3722 if (ix86_tune_defaulted)
3723 {
3724 opts->x_ix86_tune_string = "x86-64";
3725 for (i = 0; i < pta_size; i++)
3726 if (! strcmp (opts->x_ix86_tune_string,
3727 processor_alias_table[i].name))
3728 break;
3729 ix86_schedule = processor_alias_table[i].schedule;
3730 ix86_tune = processor_alias_table[i].processor;
3731 }
3732 else
3733 error ("CPU you selected does not support x86-64 "
3734 "instruction set");
3735 }
3736 }
3737 /* Intel CPUs have always interpreted SSE prefetch instructions as
3738 NOPs; so, we can enable SSE prefetch instructions even when
3739 -mtune (rather than -march) points us to a processor that has them.
3740 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3741 higher processors. */
3742 if (TARGET_CMOV
3743 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3744 x86_prefetch_sse = true;
3745 break;
3746 }
3747
3748 if (ix86_tune_specified && i == pta_size)
3749 error ("bad value (%s) for %stune=%s %s",
3750 opts->x_ix86_tune_string, prefix, suffix, sw);
3751
3752 set_ix86_tune_features (ix86_tune, opts->x_ix86_dump_tunes);
3753
3754 #ifndef USE_IX86_FRAME_POINTER
3755 #define USE_IX86_FRAME_POINTER 0
3756 #endif
3757
3758 #ifndef USE_X86_64_FRAME_POINTER
3759 #define USE_X86_64_FRAME_POINTER 0
3760 #endif
3761
3762 /* Set the default values for switches whose default depends on TARGET_64BIT
3763 in case they weren't overwritten by command line options. */
3764 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3765 {
3766 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3767 opts->x_flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3768 if (opts->x_flag_asynchronous_unwind_tables
3769 && !opts_set->x_flag_unwind_tables
3770 && TARGET_64BIT_MS_ABI)
3771 opts->x_flag_unwind_tables = 1;
3772 if (opts->x_flag_asynchronous_unwind_tables == 2)
3773 opts->x_flag_unwind_tables
3774 = opts->x_flag_asynchronous_unwind_tables = 1;
3775 if (opts->x_flag_pcc_struct_return == 2)
3776 opts->x_flag_pcc_struct_return = 0;
3777 }
3778 else
3779 {
3780 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3781 opts->x_flag_omit_frame_pointer
3782 = !(USE_IX86_FRAME_POINTER || opts->x_optimize_size);
3783 if (opts->x_flag_asynchronous_unwind_tables == 2)
3784 opts->x_flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3785 if (opts->x_flag_pcc_struct_return == 2)
3786 opts->x_flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3787 }
3788
3789 ix86_tune_cost = processor_target_table[ix86_tune].cost;
3790 if (opts->x_optimize_size)
3791 ix86_cost = &ix86_size_cost;
3792 else
3793 ix86_cost = ix86_tune_cost;
3794
3795 /* Arrange to set up i386_stack_locals for all functions. */
3796 init_machine_status = ix86_init_machine_status;
3797
3798 /* Validate -mregparm= value. */
3799 if (opts_set->x_ix86_regparm)
3800 {
3801 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3802 warning (0, "-mregparm is ignored in 64-bit mode");
3803 if (opts->x_ix86_regparm > REGPARM_MAX)
3804 {
3805 error ("-mregparm=%d is not between 0 and %d",
3806 opts->x_ix86_regparm, REGPARM_MAX);
3807 opts->x_ix86_regparm = 0;
3808 }
3809 }
3810 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3811 opts->x_ix86_regparm = REGPARM_MAX;
3812
3813 /* Default align_* from the processor table. */
3814 if (opts->x_align_loops == 0)
3815 {
3816 opts->x_align_loops = processor_target_table[ix86_tune].align_loop;
3817 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3818 }
3819 if (opts->x_align_jumps == 0)
3820 {
3821 opts->x_align_jumps = processor_target_table[ix86_tune].align_jump;
3822 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3823 }
3824 if (opts->x_align_functions == 0)
3825 {
3826 opts->x_align_functions = processor_target_table[ix86_tune].align_func;
3827 }
3828
3829 /* Provide default for -mbranch-cost= value. */
3830 if (!opts_set->x_ix86_branch_cost)
3831 opts->x_ix86_branch_cost = ix86_cost->branch_cost;
3832
3833 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3834 {
3835 opts->x_target_flags
3836 |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags;
3837
3838 /* Enable by default the SSE and MMX builtins. Do allow the user to
3839 explicitly disable any of these. In particular, disabling SSE and
3840 MMX for kernel code is extremely useful. */
3841 if (!ix86_arch_specified)
3842 opts->x_ix86_isa_flags
3843 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3844 | TARGET_SUBTARGET64_ISA_DEFAULT)
3845 & ~opts->x_ix86_isa_flags_explicit);
3846
3847 if (TARGET_RTD_P (opts->x_target_flags))
3848 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3849 }
3850 else
3851 {
3852 opts->x_target_flags
3853 |= TARGET_SUBTARGET32_DEFAULT & ~opts_set->x_target_flags;
3854
3855 if (!ix86_arch_specified)
3856 opts->x_ix86_isa_flags
3857 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit;
3858
3859 /* i386 ABI does not specify red zone. It still makes sense to use it
3860 when programmer takes care to stack from being destroyed. */
3861 if (!(opts_set->x_target_flags & MASK_NO_RED_ZONE))
3862 opts->x_target_flags |= MASK_NO_RED_ZONE;
3863 }
3864
3865 /* Keep nonleaf frame pointers. */
3866 if (opts->x_flag_omit_frame_pointer)
3867 opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3868 else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags))
3869 opts->x_flag_omit_frame_pointer = 1;
3870
3871 /* If we're doing fast math, we don't care about comparison order
3872 wrt NaNs. This lets us use a shorter comparison sequence. */
3873 if (opts->x_flag_finite_math_only)
3874 opts->x_target_flags &= ~MASK_IEEE_FP;
3875
3876 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3877 since the insns won't need emulation. */
3878 if (ix86_tune_features [X86_TUNE_ALWAYS_FANCY_MATH_387])
3879 opts->x_target_flags &= ~MASK_NO_FANCY_MATH_387;
3880
3881 /* Likewise, if the target doesn't have a 387, or we've specified
3882 software floating point, don't use 387 inline intrinsics. */
3883 if (!TARGET_80387_P (opts->x_target_flags))
3884 opts->x_target_flags |= MASK_NO_FANCY_MATH_387;
3885
3886 /* Turn on MMX builtins for -msse. */
3887 if (TARGET_SSE_P (opts->x_ix86_isa_flags))
3888 opts->x_ix86_isa_flags
3889 |= OPTION_MASK_ISA_MMX & ~opts->x_ix86_isa_flags_explicit;
3890
3891 /* Enable SSE prefetch. */
3892 if (TARGET_SSE_P (opts->x_ix86_isa_flags)
3893 || (TARGET_PRFCHW && !TARGET_3DNOW_P (opts->x_ix86_isa_flags)))
3894 x86_prefetch_sse = true;
3895
3896 /* Enable prefetch{,w} instructions for -m3dnow and -mprefetchwt1. */
3897 if (TARGET_3DNOW_P (opts->x_ix86_isa_flags)
3898 || TARGET_PREFETCHWT1_P (opts->x_ix86_isa_flags))
3899 opts->x_ix86_isa_flags
3900 |= OPTION_MASK_ISA_PRFCHW & ~opts->x_ix86_isa_flags_explicit;
3901
3902 /* Enable popcnt instruction for -msse4.2 or -mabm. */
3903 if (TARGET_SSE4_2_P (opts->x_ix86_isa_flags)
3904 || TARGET_ABM_P (opts->x_ix86_isa_flags))
3905 opts->x_ix86_isa_flags
3906 |= OPTION_MASK_ISA_POPCNT & ~opts->x_ix86_isa_flags_explicit;
3907
3908 /* Enable lzcnt instruction for -mabm. */
3909 if (TARGET_ABM_P(opts->x_ix86_isa_flags))
3910 opts->x_ix86_isa_flags
3911 |= OPTION_MASK_ISA_LZCNT & ~opts->x_ix86_isa_flags_explicit;
3912
3913 /* Validate -mpreferred-stack-boundary= value or default it to
3914 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3915 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3916 if (opts_set->x_ix86_preferred_stack_boundary_arg)
3917 {
3918 int min = (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3919 ? (TARGET_SSE_P (opts->x_ix86_isa_flags) ? 4 : 3) : 2);
3920 int max = (TARGET_SEH ? 4 : 12);
3921
3922 if (opts->x_ix86_preferred_stack_boundary_arg < min
3923 || opts->x_ix86_preferred_stack_boundary_arg > max)
3924 {
3925 if (min == max)
3926 error ("-mpreferred-stack-boundary is not supported "
3927 "for this target");
3928 else
3929 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3930 opts->x_ix86_preferred_stack_boundary_arg, min, max);
3931 }
3932 else
3933 ix86_preferred_stack_boundary
3934 = (1 << opts->x_ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
3935 }
3936
3937 /* Set the default value for -mstackrealign. */
3938 if (opts->x_ix86_force_align_arg_pointer == -1)
3939 opts->x_ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3940
3941 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3942
3943 /* Validate -mincoming-stack-boundary= value or default it to
3944 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3945 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3946 if (opts_set->x_ix86_incoming_stack_boundary_arg)
3947 {
3948 if (opts->x_ix86_incoming_stack_boundary_arg
3949 < (TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2)
3950 || opts->x_ix86_incoming_stack_boundary_arg > 12)
3951 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3952 opts->x_ix86_incoming_stack_boundary_arg,
3953 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2);
3954 else
3955 {
3956 ix86_user_incoming_stack_boundary
3957 = (1 << opts->x_ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
3958 ix86_incoming_stack_boundary
3959 = ix86_user_incoming_stack_boundary;
3960 }
3961 }
3962
3963 /* Accept -msseregparm only if at least SSE support is enabled. */
3964 if (TARGET_SSEREGPARM_P (opts->x_target_flags)
3965 && ! TARGET_SSE_P (opts->x_ix86_isa_flags))
3966 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3967
3968 if (opts_set->x_ix86_fpmath)
3969 {
3970 if (opts->x_ix86_fpmath & FPMATH_SSE)
3971 {
3972 if (!TARGET_SSE_P (opts->x_ix86_isa_flags))
3973 {
3974 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3975 opts->x_ix86_fpmath = FPMATH_387;
3976 }
3977 else if ((opts->x_ix86_fpmath & FPMATH_387)
3978 && !TARGET_80387_P (opts->x_target_flags))
3979 {
3980 warning (0, "387 instruction set disabled, using SSE arithmetics");
3981 opts->x_ix86_fpmath = FPMATH_SSE;
3982 }
3983 }
3984 }
3985 /* For all chips supporting SSE2, -mfpmath=sse performs better than
3986 fpmath=387. The second is however default at many targets since the
3987 extra 80bit precision of temporaries is considered to be part of ABI.
3988 Overwrite the default at least for -ffast-math.
3989 TODO: -mfpmath=both seems to produce same performing code with bit
3990 smaller binaries. It is however not clear if register allocation is
3991 ready for this setting.
3992 Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE
3993 codegen. We may switch to 387 with -ffast-math for size optimized
3994 functions. */
3995 else if (fast_math_flags_set_p (&global_options)
3996 && TARGET_SSE2_P (opts->x_ix86_isa_flags))
3997 opts->x_ix86_fpmath = FPMATH_SSE;
3998 else
3999 opts->x_ix86_fpmath = TARGET_FPMATH_DEFAULT_P (opts->x_ix86_isa_flags);
4000
4001 /* If the i387 is disabled, then do not return values in it. */
4002 if (!TARGET_80387_P (opts->x_target_flags))
4003 opts->x_target_flags &= ~MASK_FLOAT_RETURNS;
4004
4005 /* Use external vectorized library in vectorizing intrinsics. */
4006 if (opts_set->x_ix86_veclibabi_type)
4007 switch (opts->x_ix86_veclibabi_type)
4008 {
4009 case ix86_veclibabi_type_svml:
4010 ix86_veclib_handler = ix86_veclibabi_svml;
4011 break;
4012
4013 case ix86_veclibabi_type_acml:
4014 ix86_veclib_handler = ix86_veclibabi_acml;
4015 break;
4016
4017 default:
4018 gcc_unreachable ();
4019 }
4020
4021 if (ix86_tune_features [X86_TUNE_ACCUMULATE_OUTGOING_ARGS]
4022 && !(opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4023 && !opts->x_optimize_size)
4024 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4025
4026 /* If stack probes are required, the space used for large function
4027 arguments on the stack must also be probed, so enable
4028 -maccumulate-outgoing-args so this happens in the prologue. */
4029 if (TARGET_STACK_PROBE_P (opts->x_target_flags)
4030 && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4031 {
4032 if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4033 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
4034 "for correctness", prefix, suffix);
4035 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4036 }
4037
4038 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
4039 {
4040 char *p;
4041 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
4042 p = strchr (internal_label_prefix, 'X');
4043 internal_label_prefix_len = p - internal_label_prefix;
4044 *p = '\0';
4045 }
4046
4047 /* When scheduling description is not available, disable scheduler pass
4048 so it won't slow down the compilation and make x87 code slower. */
4049 if (!TARGET_SCHEDULE)
4050 opts->x_flag_schedule_insns_after_reload = opts->x_flag_schedule_insns = 0;
4051
4052 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4053 ix86_tune_cost->simultaneous_prefetches,
4054 opts->x_param_values,
4055 opts_set->x_param_values);
4056 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4057 ix86_tune_cost->prefetch_block,
4058 opts->x_param_values,
4059 opts_set->x_param_values);
4060 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
4061 ix86_tune_cost->l1_cache_size,
4062 opts->x_param_values,
4063 opts_set->x_param_values);
4064 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
4065 ix86_tune_cost->l2_cache_size,
4066 opts->x_param_values,
4067 opts_set->x_param_values);
4068
4069 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
4070 if (opts->x_flag_prefetch_loop_arrays < 0
4071 && HAVE_prefetch
4072 && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
4073 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
4074 opts->x_flag_prefetch_loop_arrays = 1;
4075
4076 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
4077 can be opts->x_optimized to ap = __builtin_next_arg (0). */
4078 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && !opts->x_flag_split_stack)
4079 targetm.expand_builtin_va_start = NULL;
4080
4081 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4082 {
4083 ix86_gen_leave = gen_leave_rex64;
4084 if (Pmode == DImode)
4085 {
4086 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_di;
4087 ix86_gen_tls_local_dynamic_base_64
4088 = gen_tls_local_dynamic_base_64_di;
4089 }
4090 else
4091 {
4092 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_si;
4093 ix86_gen_tls_local_dynamic_base_64
4094 = gen_tls_local_dynamic_base_64_si;
4095 }
4096 }
4097 else
4098 ix86_gen_leave = gen_leave;
4099
4100 if (Pmode == DImode)
4101 {
4102 ix86_gen_add3 = gen_adddi3;
4103 ix86_gen_sub3 = gen_subdi3;
4104 ix86_gen_sub3_carry = gen_subdi3_carry;
4105 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
4106 ix86_gen_andsp = gen_anddi3;
4107 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
4108 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
4109 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
4110 ix86_gen_monitor = gen_sse3_monitor_di;
4111 }
4112 else
4113 {
4114 ix86_gen_add3 = gen_addsi3;
4115 ix86_gen_sub3 = gen_subsi3;
4116 ix86_gen_sub3_carry = gen_subsi3_carry;
4117 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
4118 ix86_gen_andsp = gen_andsi3;
4119 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
4120 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
4121 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
4122 ix86_gen_monitor = gen_sse3_monitor_si;
4123 }
4124
4125 #ifdef USE_IX86_CLD
4126 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
4127 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4128 opts->x_target_flags |= MASK_CLD & ~opts_set->x_target_flags;
4129 #endif
4130
4131 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && opts->x_flag_pic)
4132 {
4133 if (opts->x_flag_fentry > 0)
4134 sorry ("-mfentry isn%'t supported for 32-bit in combination "
4135 "with -fpic");
4136 opts->x_flag_fentry = 0;
4137 }
4138 else if (TARGET_SEH)
4139 {
4140 if (opts->x_flag_fentry == 0)
4141 sorry ("-mno-fentry isn%'t compatible with SEH");
4142 opts->x_flag_fentry = 1;
4143 }
4144 else if (opts->x_flag_fentry < 0)
4145 {
4146 #if defined(PROFILE_BEFORE_PROLOGUE)
4147 opts->x_flag_fentry = 1;
4148 #else
4149 opts->x_flag_fentry = 0;
4150 #endif
4151 }
4152
4153 /* When not opts->x_optimize for size, enable vzeroupper optimization for
4154 TARGET_AVX with -fexpensive-optimizations and split 32-byte
4155 AVX unaligned load/store. */
4156 if (!opts->x_optimize_size)
4157 {
4158 if (flag_expensive_optimizations
4159 && !(opts_set->x_target_flags & MASK_VZEROUPPER))
4160 opts->x_target_flags |= MASK_VZEROUPPER;
4161 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL]
4162 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
4163 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
4164 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL]
4165 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE))
4166 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
4167 /* Enable 128-bit AVX instruction generation
4168 for the auto-vectorizer. */
4169 if (TARGET_AVX128_OPTIMAL
4170 && !(opts_set->x_target_flags & MASK_PREFER_AVX128))
4171 opts->x_target_flags |= MASK_PREFER_AVX128;
4172 }
4173
4174 if (opts->x_ix86_recip_name)
4175 {
4176 char *p = ASTRDUP (opts->x_ix86_recip_name);
4177 char *q;
4178 unsigned int mask, i;
4179 bool invert;
4180
4181 while ((q = strtok (p, ",")) != NULL)
4182 {
4183 p = NULL;
4184 if (*q == '!')
4185 {
4186 invert = true;
4187 q++;
4188 }
4189 else
4190 invert = false;
4191
4192 if (!strcmp (q, "default"))
4193 mask = RECIP_MASK_ALL;
4194 else
4195 {
4196 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4197 if (!strcmp (q, recip_options[i].string))
4198 {
4199 mask = recip_options[i].mask;
4200 break;
4201 }
4202
4203 if (i == ARRAY_SIZE (recip_options))
4204 {
4205 error ("unknown option for -mrecip=%s", q);
4206 invert = false;
4207 mask = RECIP_MASK_NONE;
4208 }
4209 }
4210
4211 opts->x_recip_mask_explicit |= mask;
4212 if (invert)
4213 opts->x_recip_mask &= ~mask;
4214 else
4215 opts->x_recip_mask |= mask;
4216 }
4217 }
4218
4219 if (TARGET_RECIP_P (opts->x_target_flags))
4220 opts->x_recip_mask |= RECIP_MASK_ALL & ~opts->x_recip_mask_explicit;
4221 else if (opts_set->x_target_flags & MASK_RECIP)
4222 opts->x_recip_mask &= ~(RECIP_MASK_ALL & ~opts->x_recip_mask_explicit);
4223
4224 /* Default long double to 64-bit for 32-bit Bionic and to __float128
4225 for 64-bit Bionic. */
4226 if (TARGET_HAS_BIONIC
4227 && !(opts_set->x_target_flags
4228 & (MASK_LONG_DOUBLE_64 | MASK_LONG_DOUBLE_128)))
4229 opts->x_target_flags |= (TARGET_64BIT
4230 ? MASK_LONG_DOUBLE_128
4231 : MASK_LONG_DOUBLE_64);
4232
4233 /* Only one of them can be active. */
4234 gcc_assert ((opts->x_target_flags & MASK_LONG_DOUBLE_64) == 0
4235 || (opts->x_target_flags & MASK_LONG_DOUBLE_128) == 0);
4236
4237 /* Save the initial options in case the user does function specific
4238 options. */
4239 if (main_args_p)
4240 target_option_default_node = target_option_current_node
4241 = build_target_option_node (opts);
4242
4243 /* Handle stack protector */
4244 if (!opts_set->x_ix86_stack_protector_guard)
4245 opts->x_ix86_stack_protector_guard
4246 = TARGET_HAS_BIONIC ? SSP_GLOBAL : SSP_TLS;
4247
4248 /* Handle -mmemcpy-strategy= and -mmemset-strategy= */
4249 if (opts->x_ix86_tune_memcpy_strategy)
4250 {
4251 char *str = xstrdup (opts->x_ix86_tune_memcpy_strategy);
4252 ix86_parse_stringop_strategy_string (str, false);
4253 free (str);
4254 }
4255
4256 if (opts->x_ix86_tune_memset_strategy)
4257 {
4258 char *str = xstrdup (opts->x_ix86_tune_memset_strategy);
4259 ix86_parse_stringop_strategy_string (str, true);
4260 free (str);
4261 }
4262 }
4263
4264 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4265
4266 static void
4267 ix86_option_override (void)
4268 {
4269 opt_pass *pass_insert_vzeroupper = make_pass_insert_vzeroupper (g);
4270 static struct register_pass_info insert_vzeroupper_info
4271 = { pass_insert_vzeroupper, "reload",
4272 1, PASS_POS_INSERT_AFTER
4273 };
4274
4275 ix86_option_override_internal (true, &global_options, &global_options_set);
4276
4277
4278 /* This needs to be done at start up. It's convenient to do it here. */
4279 register_pass (&insert_vzeroupper_info);
4280 }
4281
4282 /* Update register usage after having seen the compiler flags. */
4283
4284 static void
4285 ix86_conditional_register_usage (void)
4286 {
4287 int i, c_mask;
4288 unsigned int j;
4289
4290 /* The PIC register, if it exists, is fixed. */
4291 j = PIC_OFFSET_TABLE_REGNUM;
4292 if (j != INVALID_REGNUM)
4293 fixed_regs[j] = call_used_regs[j] = 1;
4294
4295 /* For 32-bit targets, squash the REX registers. */
4296 if (! TARGET_64BIT)
4297 {
4298 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
4299 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4300 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4301 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4302 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4303 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4304 }
4305
4306 /* See the definition of CALL_USED_REGISTERS in i386.h. */
4307 c_mask = (TARGET_64BIT_MS_ABI ? (1 << 3)
4308 : TARGET_64BIT ? (1 << 2)
4309 : (1 << 1));
4310
4311 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
4312
4313 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4314 {
4315 /* Set/reset conditionally defined registers from
4316 CALL_USED_REGISTERS initializer. */
4317 if (call_used_regs[i] > 1)
4318 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
4319
4320 /* Calculate registers of CLOBBERED_REGS register set
4321 as call used registers from GENERAL_REGS register set. */
4322 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
4323 && call_used_regs[i])
4324 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
4325 }
4326
4327 /* If MMX is disabled, squash the registers. */
4328 if (! TARGET_MMX)
4329 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4330 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
4331 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4332
4333 /* If SSE is disabled, squash the registers. */
4334 if (! TARGET_SSE)
4335 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4336 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
4337 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4338
4339 /* If the FPU is disabled, squash the registers. */
4340 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
4341 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4342 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
4343 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4344
4345 /* If AVX512F is disabled, squash the registers. */
4346 if (! TARGET_AVX512F)
4347 {
4348 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4349 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4350
4351 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
4352 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4353 }
4354 }
4355
4356 \f
4357 /* Save the current options */
4358
4359 static void
4360 ix86_function_specific_save (struct cl_target_option *ptr,
4361 struct gcc_options *opts)
4362 {
4363 ptr->arch = ix86_arch;
4364 ptr->schedule = ix86_schedule;
4365 ptr->tune = ix86_tune;
4366 ptr->branch_cost = ix86_branch_cost;
4367 ptr->tune_defaulted = ix86_tune_defaulted;
4368 ptr->arch_specified = ix86_arch_specified;
4369 ptr->x_ix86_isa_flags_explicit = opts->x_ix86_isa_flags_explicit;
4370 ptr->x_ix86_target_flags_explicit = opts->x_ix86_target_flags_explicit;
4371 ptr->x_recip_mask_explicit = opts->x_recip_mask_explicit;
4372 ptr->x_ix86_arch_string = opts->x_ix86_arch_string;
4373 ptr->x_ix86_tune_string = opts->x_ix86_tune_string;
4374 ptr->x_ix86_cmodel = opts->x_ix86_cmodel;
4375 ptr->x_ix86_abi = opts->x_ix86_abi;
4376 ptr->x_ix86_asm_dialect = opts->x_ix86_asm_dialect;
4377 ptr->x_ix86_branch_cost = opts->x_ix86_branch_cost;
4378 ptr->x_ix86_dump_tunes = opts->x_ix86_dump_tunes;
4379 ptr->x_ix86_force_align_arg_pointer = opts->x_ix86_force_align_arg_pointer;
4380 ptr->x_ix86_force_drap = opts->x_ix86_force_drap;
4381 ptr->x_ix86_incoming_stack_boundary_arg = opts->x_ix86_incoming_stack_boundary_arg;
4382 ptr->x_ix86_pmode = opts->x_ix86_pmode;
4383 ptr->x_ix86_preferred_stack_boundary_arg = opts->x_ix86_preferred_stack_boundary_arg;
4384 ptr->x_ix86_recip_name = opts->x_ix86_recip_name;
4385 ptr->x_ix86_regparm = opts->x_ix86_regparm;
4386 ptr->x_ix86_section_threshold = opts->x_ix86_section_threshold;
4387 ptr->x_ix86_sse2avx = opts->x_ix86_sse2avx;
4388 ptr->x_ix86_stack_protector_guard = opts->x_ix86_stack_protector_guard;
4389 ptr->x_ix86_stringop_alg = opts->x_ix86_stringop_alg;
4390 ptr->x_ix86_tls_dialect = opts->x_ix86_tls_dialect;
4391 ptr->x_ix86_tune_ctrl_string = opts->x_ix86_tune_ctrl_string;
4392 ptr->x_ix86_tune_memcpy_strategy = opts->x_ix86_tune_memcpy_strategy;
4393 ptr->x_ix86_tune_memset_strategy = opts->x_ix86_tune_memset_strategy;
4394 ptr->x_ix86_tune_no_default = opts->x_ix86_tune_no_default;
4395 ptr->x_ix86_veclibabi_type = opts->x_ix86_veclibabi_type;
4396
4397 /* The fields are char but the variables are not; make sure the
4398 values fit in the fields. */
4399 gcc_assert (ptr->arch == ix86_arch);
4400 gcc_assert (ptr->schedule == ix86_schedule);
4401 gcc_assert (ptr->tune == ix86_tune);
4402 gcc_assert (ptr->branch_cost == ix86_branch_cost);
4403 }
4404
4405 /* Restore the current options */
4406
4407 static void
4408 ix86_function_specific_restore (struct gcc_options *opts,
4409 struct cl_target_option *ptr)
4410 {
4411 enum processor_type old_tune = ix86_tune;
4412 enum processor_type old_arch = ix86_arch;
4413 unsigned int ix86_arch_mask;
4414 int i;
4415
4416 /* We don't change -fPIC. */
4417 opts->x_flag_pic = flag_pic;
4418
4419 ix86_arch = (enum processor_type) ptr->arch;
4420 ix86_schedule = (enum attr_cpu) ptr->schedule;
4421 ix86_tune = (enum processor_type) ptr->tune;
4422 opts->x_ix86_branch_cost = ptr->branch_cost;
4423 ix86_tune_defaulted = ptr->tune_defaulted;
4424 ix86_arch_specified = ptr->arch_specified;
4425 opts->x_ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
4426 opts->x_ix86_target_flags_explicit = ptr->x_ix86_target_flags_explicit;
4427 opts->x_recip_mask_explicit = ptr->x_recip_mask_explicit;
4428 opts->x_ix86_arch_string = ptr->x_ix86_arch_string;
4429 opts->x_ix86_tune_string = ptr->x_ix86_tune_string;
4430 opts->x_ix86_cmodel = ptr->x_ix86_cmodel;
4431 opts->x_ix86_abi = ptr->x_ix86_abi;
4432 opts->x_ix86_asm_dialect = ptr->x_ix86_asm_dialect;
4433 opts->x_ix86_branch_cost = ptr->x_ix86_branch_cost;
4434 opts->x_ix86_dump_tunes = ptr->x_ix86_dump_tunes;
4435 opts->x_ix86_force_align_arg_pointer = ptr->x_ix86_force_align_arg_pointer;
4436 opts->x_ix86_force_drap = ptr->x_ix86_force_drap;
4437 opts->x_ix86_incoming_stack_boundary_arg = ptr->x_ix86_incoming_stack_boundary_arg;
4438 opts->x_ix86_pmode = ptr->x_ix86_pmode;
4439 opts->x_ix86_preferred_stack_boundary_arg = ptr->x_ix86_preferred_stack_boundary_arg;
4440 opts->x_ix86_recip_name = ptr->x_ix86_recip_name;
4441 opts->x_ix86_regparm = ptr->x_ix86_regparm;
4442 opts->x_ix86_section_threshold = ptr->x_ix86_section_threshold;
4443 opts->x_ix86_sse2avx = ptr->x_ix86_sse2avx;
4444 opts->x_ix86_stack_protector_guard = ptr->x_ix86_stack_protector_guard;
4445 opts->x_ix86_stringop_alg = ptr->x_ix86_stringop_alg;
4446 opts->x_ix86_tls_dialect = ptr->x_ix86_tls_dialect;
4447 opts->x_ix86_tune_ctrl_string = ptr->x_ix86_tune_ctrl_string;
4448 opts->x_ix86_tune_memcpy_strategy = ptr->x_ix86_tune_memcpy_strategy;
4449 opts->x_ix86_tune_memset_strategy = ptr->x_ix86_tune_memset_strategy;
4450 opts->x_ix86_tune_no_default = ptr->x_ix86_tune_no_default;
4451 opts->x_ix86_veclibabi_type = ptr->x_ix86_veclibabi_type;
4452
4453 /* Recreate the arch feature tests if the arch changed */
4454 if (old_arch != ix86_arch)
4455 {
4456 ix86_arch_mask = 1u << ix86_arch;
4457 for (i = 0; i < X86_ARCH_LAST; ++i)
4458 ix86_arch_features[i]
4459 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4460 }
4461
4462 /* Recreate the tune optimization tests */
4463 if (old_tune != ix86_tune)
4464 set_ix86_tune_features (ix86_tune, false);
4465 }
4466
4467 /* Print the current options */
4468
4469 static void
4470 ix86_function_specific_print (FILE *file, int indent,
4471 struct cl_target_option *ptr)
4472 {
4473 char *target_string
4474 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
4475 NULL, NULL, ptr->x_ix86_fpmath, false);
4476
4477 gcc_assert (ptr->arch < PROCESSOR_max);
4478 fprintf (file, "%*sarch = %d (%s)\n",
4479 indent, "",
4480 ptr->arch, processor_target_table[ptr->arch].name);
4481
4482 gcc_assert (ptr->tune < PROCESSOR_max);
4483 fprintf (file, "%*stune = %d (%s)\n",
4484 indent, "",
4485 ptr->tune, processor_target_table[ptr->tune].name);
4486
4487 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4488
4489 if (target_string)
4490 {
4491 fprintf (file, "%*s%s\n", indent, "", target_string);
4492 free (target_string);
4493 }
4494 }
4495
4496 \f
4497 /* Inner function to process the attribute((target(...))), take an argument and
4498 set the current options from the argument. If we have a list, recursively go
4499 over the list. */
4500
4501 static bool
4502 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
4503 struct gcc_options *opts,
4504 struct gcc_options *opts_set,
4505 struct gcc_options *enum_opts_set)
4506 {
4507 char *next_optstr;
4508 bool ret = true;
4509
4510 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4511 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4512 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4513 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4514 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4515
4516 enum ix86_opt_type
4517 {
4518 ix86_opt_unknown,
4519 ix86_opt_yes,
4520 ix86_opt_no,
4521 ix86_opt_str,
4522 ix86_opt_enum,
4523 ix86_opt_isa
4524 };
4525
4526 static const struct
4527 {
4528 const char *string;
4529 size_t len;
4530 enum ix86_opt_type type;
4531 int opt;
4532 int mask;
4533 } attrs[] = {
4534 /* isa options */
4535 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4536 IX86_ATTR_ISA ("abm", OPT_mabm),
4537 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4538 IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
4539 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
4540 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4541 IX86_ATTR_ISA ("aes", OPT_maes),
4542 IX86_ATTR_ISA ("sha", OPT_msha),
4543 IX86_ATTR_ISA ("avx", OPT_mavx),
4544 IX86_ATTR_ISA ("avx2", OPT_mavx2),
4545 IX86_ATTR_ISA ("avx512f", OPT_mavx512f),
4546 IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf),
4547 IX86_ATTR_ISA ("avx512er", OPT_mavx512er),
4548 IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd),
4549 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4550 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4551 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4552 IX86_ATTR_ISA ("sse", OPT_msse),
4553 IX86_ATTR_ISA ("sse2", OPT_msse2),
4554 IX86_ATTR_ISA ("sse3", OPT_msse3),
4555 IX86_ATTR_ISA ("sse4", OPT_msse4),
4556 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4557 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4558 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4559 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4560 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4561 IX86_ATTR_ISA ("fma", OPT_mfma),
4562 IX86_ATTR_ISA ("xop", OPT_mxop),
4563 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4564 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4565 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4566 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4567 IX86_ATTR_ISA ("rtm", OPT_mrtm),
4568 IX86_ATTR_ISA ("hle", OPT_mhle),
4569 IX86_ATTR_ISA ("prfchw", OPT_mprfchw),
4570 IX86_ATTR_ISA ("rdseed", OPT_mrdseed),
4571 IX86_ATTR_ISA ("adx", OPT_madx),
4572 IX86_ATTR_ISA ("fxsr", OPT_mfxsr),
4573 IX86_ATTR_ISA ("xsave", OPT_mxsave),
4574 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt),
4575 IX86_ATTR_ISA ("prefetchwt1", OPT_mprefetchwt1),
4576 IX86_ATTR_ISA ("clflushopt", OPT_mclflushopt),
4577 IX86_ATTR_ISA ("xsavec", OPT_mxsavec),
4578 IX86_ATTR_ISA ("xsaves", OPT_mxsaves),
4579
4580 /* enum options */
4581 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
4582
4583 /* string options */
4584 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4585 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4586
4587 /* flag options */
4588 IX86_ATTR_YES ("cld",
4589 OPT_mcld,
4590 MASK_CLD),
4591
4592 IX86_ATTR_NO ("fancy-math-387",
4593 OPT_mfancy_math_387,
4594 MASK_NO_FANCY_MATH_387),
4595
4596 IX86_ATTR_YES ("ieee-fp",
4597 OPT_mieee_fp,
4598 MASK_IEEE_FP),
4599
4600 IX86_ATTR_YES ("inline-all-stringops",
4601 OPT_minline_all_stringops,
4602 MASK_INLINE_ALL_STRINGOPS),
4603
4604 IX86_ATTR_YES ("inline-stringops-dynamically",
4605 OPT_minline_stringops_dynamically,
4606 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4607
4608 IX86_ATTR_NO ("align-stringops",
4609 OPT_mno_align_stringops,
4610 MASK_NO_ALIGN_STRINGOPS),
4611
4612 IX86_ATTR_YES ("recip",
4613 OPT_mrecip,
4614 MASK_RECIP),
4615
4616 };
4617
4618 /* If this is a list, recurse to get the options. */
4619 if (TREE_CODE (args) == TREE_LIST)
4620 {
4621 bool ret = true;
4622
4623 for (; args; args = TREE_CHAIN (args))
4624 if (TREE_VALUE (args)
4625 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
4626 p_strings, opts, opts_set,
4627 enum_opts_set))
4628 ret = false;
4629
4630 return ret;
4631 }
4632
4633 else if (TREE_CODE (args) != STRING_CST)
4634 {
4635 error ("attribute %<target%> argument not a string");
4636 return false;
4637 }
4638
4639 /* Handle multiple arguments separated by commas. */
4640 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4641
4642 while (next_optstr && *next_optstr != '\0')
4643 {
4644 char *p = next_optstr;
4645 char *orig_p = p;
4646 char *comma = strchr (next_optstr, ',');
4647 const char *opt_string;
4648 size_t len, opt_len;
4649 int opt;
4650 bool opt_set_p;
4651 char ch;
4652 unsigned i;
4653 enum ix86_opt_type type = ix86_opt_unknown;
4654 int mask = 0;
4655
4656 if (comma)
4657 {
4658 *comma = '\0';
4659 len = comma - next_optstr;
4660 next_optstr = comma + 1;
4661 }
4662 else
4663 {
4664 len = strlen (p);
4665 next_optstr = NULL;
4666 }
4667
4668 /* Recognize no-xxx. */
4669 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4670 {
4671 opt_set_p = false;
4672 p += 3;
4673 len -= 3;
4674 }
4675 else
4676 opt_set_p = true;
4677
4678 /* Find the option. */
4679 ch = *p;
4680 opt = N_OPTS;
4681 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4682 {
4683 type = attrs[i].type;
4684 opt_len = attrs[i].len;
4685 if (ch == attrs[i].string[0]
4686 && ((type != ix86_opt_str && type != ix86_opt_enum)
4687 ? len == opt_len
4688 : len > opt_len)
4689 && memcmp (p, attrs[i].string, opt_len) == 0)
4690 {
4691 opt = attrs[i].opt;
4692 mask = attrs[i].mask;
4693 opt_string = attrs[i].string;
4694 break;
4695 }
4696 }
4697
4698 /* Process the option. */
4699 if (opt == N_OPTS)
4700 {
4701 error ("attribute(target(\"%s\")) is unknown", orig_p);
4702 ret = false;
4703 }
4704
4705 else if (type == ix86_opt_isa)
4706 {
4707 struct cl_decoded_option decoded;
4708
4709 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
4710 ix86_handle_option (opts, opts_set,
4711 &decoded, input_location);
4712 }
4713
4714 else if (type == ix86_opt_yes || type == ix86_opt_no)
4715 {
4716 if (type == ix86_opt_no)
4717 opt_set_p = !opt_set_p;
4718
4719 if (opt_set_p)
4720 opts->x_target_flags |= mask;
4721 else
4722 opts->x_target_flags &= ~mask;
4723 }
4724
4725 else if (type == ix86_opt_str)
4726 {
4727 if (p_strings[opt])
4728 {
4729 error ("option(\"%s\") was already specified", opt_string);
4730 ret = false;
4731 }
4732 else
4733 p_strings[opt] = xstrdup (p + opt_len);
4734 }
4735
4736 else if (type == ix86_opt_enum)
4737 {
4738 bool arg_ok;
4739 int value;
4740
4741 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
4742 if (arg_ok)
4743 set_option (opts, enum_opts_set, opt, value,
4744 p + opt_len, DK_UNSPECIFIED, input_location,
4745 global_dc);
4746 else
4747 {
4748 error ("attribute(target(\"%s\")) is unknown", orig_p);
4749 ret = false;
4750 }
4751 }
4752
4753 else
4754 gcc_unreachable ();
4755 }
4756
4757 return ret;
4758 }
4759
4760 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4761
4762 tree
4763 ix86_valid_target_attribute_tree (tree args,
4764 struct gcc_options *opts,
4765 struct gcc_options *opts_set)
4766 {
4767 const char *orig_arch_string = opts->x_ix86_arch_string;
4768 const char *orig_tune_string = opts->x_ix86_tune_string;
4769 enum fpmath_unit orig_fpmath_set = opts_set->x_ix86_fpmath;
4770 int orig_tune_defaulted = ix86_tune_defaulted;
4771 int orig_arch_specified = ix86_arch_specified;
4772 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
4773 tree t = NULL_TREE;
4774 int i;
4775 struct cl_target_option *def
4776 = TREE_TARGET_OPTION (target_option_default_node);
4777 struct gcc_options enum_opts_set;
4778
4779 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
4780
4781 /* Process each of the options on the chain. */
4782 if (! ix86_valid_target_attribute_inner_p (args, option_strings, opts,
4783 opts_set, &enum_opts_set))
4784 return error_mark_node;
4785
4786 /* If the changed options are different from the default, rerun
4787 ix86_option_override_internal, and then save the options away.
4788 The string options are are attribute options, and will be undone
4789 when we copy the save structure. */
4790 if (opts->x_ix86_isa_flags != def->x_ix86_isa_flags
4791 || opts->x_target_flags != def->x_target_flags
4792 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4793 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4794 || enum_opts_set.x_ix86_fpmath)
4795 {
4796 /* If we are using the default tune= or arch=, undo the string assigned,
4797 and use the default. */
4798 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4799 opts->x_ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4800 else if (!orig_arch_specified)
4801 opts->x_ix86_arch_string = NULL;
4802
4803 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4804 opts->x_ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4805 else if (orig_tune_defaulted)
4806 opts->x_ix86_tune_string = NULL;
4807
4808 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4809 if (enum_opts_set.x_ix86_fpmath)
4810 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4811 else if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
4812 && TARGET_SSE_P (opts->x_ix86_isa_flags))
4813 {
4814 opts->x_ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
4815 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4816 }
4817
4818 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4819 ix86_option_override_internal (false, opts, opts_set);
4820
4821 /* Add any builtin functions with the new isa if any. */
4822 ix86_add_new_builtins (opts->x_ix86_isa_flags);
4823
4824 /* Save the current options unless we are validating options for
4825 #pragma. */
4826 t = build_target_option_node (opts);
4827
4828 opts->x_ix86_arch_string = orig_arch_string;
4829 opts->x_ix86_tune_string = orig_tune_string;
4830 opts_set->x_ix86_fpmath = orig_fpmath_set;
4831
4832 /* Free up memory allocated to hold the strings */
4833 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4834 free (option_strings[i]);
4835 }
4836
4837 return t;
4838 }
4839
4840 /* Hook to validate attribute((target("string"))). */
4841
4842 static bool
4843 ix86_valid_target_attribute_p (tree fndecl,
4844 tree ARG_UNUSED (name),
4845 tree args,
4846 int ARG_UNUSED (flags))
4847 {
4848 struct gcc_options func_options;
4849 tree new_target, new_optimize;
4850 bool ret = true;
4851
4852 /* attribute((target("default"))) does nothing, beyond
4853 affecting multi-versioning. */
4854 if (TREE_VALUE (args)
4855 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
4856 && TREE_CHAIN (args) == NULL_TREE
4857 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
4858 return true;
4859
4860 tree old_optimize = build_optimization_node (&global_options);
4861
4862 /* Get the optimization options of the current function. */
4863 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4864
4865 if (!func_optimize)
4866 func_optimize = old_optimize;
4867
4868 /* Init func_options. */
4869 memset (&func_options, 0, sizeof (func_options));
4870 init_options_struct (&func_options, NULL);
4871 lang_hooks.init_options_struct (&func_options);
4872
4873 cl_optimization_restore (&func_options,
4874 TREE_OPTIMIZATION (func_optimize));
4875
4876 /* Initialize func_options to the default before its target options can
4877 be set. */
4878 cl_target_option_restore (&func_options,
4879 TREE_TARGET_OPTION (target_option_default_node));
4880
4881 new_target = ix86_valid_target_attribute_tree (args, &func_options,
4882 &global_options_set);
4883
4884 new_optimize = build_optimization_node (&func_options);
4885
4886 if (new_target == error_mark_node)
4887 ret = false;
4888
4889 else if (fndecl && new_target)
4890 {
4891 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4892
4893 if (old_optimize != new_optimize)
4894 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4895 }
4896
4897 return ret;
4898 }
4899
4900 \f
4901 /* Hook to determine if one function can safely inline another. */
4902
4903 static bool
4904 ix86_can_inline_p (tree caller, tree callee)
4905 {
4906 bool ret = false;
4907 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
4908 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
4909
4910 /* If callee has no option attributes, then it is ok to inline. */
4911 if (!callee_tree)
4912 ret = true;
4913
4914 /* If caller has no option attributes, but callee does then it is not ok to
4915 inline. */
4916 else if (!caller_tree)
4917 ret = false;
4918
4919 else
4920 {
4921 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
4922 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
4923
4924 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4925 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4926 function. */
4927 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
4928 != callee_opts->x_ix86_isa_flags)
4929 ret = false;
4930
4931 /* See if we have the same non-isa options. */
4932 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
4933 ret = false;
4934
4935 /* See if arch, tune, etc. are the same. */
4936 else if (caller_opts->arch != callee_opts->arch)
4937 ret = false;
4938
4939 else if (caller_opts->tune != callee_opts->tune)
4940 ret = false;
4941
4942 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
4943 ret = false;
4944
4945 else if (caller_opts->branch_cost != callee_opts->branch_cost)
4946 ret = false;
4947
4948 else
4949 ret = true;
4950 }
4951
4952 return ret;
4953 }
4954
4955 \f
4956 /* Remember the last target of ix86_set_current_function. */
4957 static GTY(()) tree ix86_previous_fndecl;
4958
4959 /* Invalidate ix86_previous_fndecl cache. */
4960 void
4961 ix86_reset_previous_fndecl (void)
4962 {
4963 ix86_previous_fndecl = NULL_TREE;
4964 }
4965
4966 /* Establish appropriate back-end context for processing the function
4967 FNDECL. The argument might be NULL to indicate processing at top
4968 level, outside of any function scope. */
4969 static void
4970 ix86_set_current_function (tree fndecl)
4971 {
4972 /* Only change the context if the function changes. This hook is called
4973 several times in the course of compiling a function, and we don't want to
4974 slow things down too much or call target_reinit when it isn't safe. */
4975 if (fndecl && fndecl != ix86_previous_fndecl)
4976 {
4977 tree old_tree = (ix86_previous_fndecl
4978 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
4979 : NULL_TREE);
4980
4981 tree new_tree = (fndecl
4982 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
4983 : NULL_TREE);
4984
4985 ix86_previous_fndecl = fndecl;
4986 if (old_tree == new_tree)
4987 ;
4988
4989 else if (new_tree)
4990 {
4991 cl_target_option_restore (&global_options,
4992 TREE_TARGET_OPTION (new_tree));
4993 if (TREE_TARGET_GLOBALS (new_tree))
4994 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
4995 else
4996 TREE_TARGET_GLOBALS (new_tree)
4997 = save_target_globals_default_opts ();
4998 }
4999
5000 else if (old_tree)
5001 {
5002 new_tree = target_option_current_node;
5003 cl_target_option_restore (&global_options,
5004 TREE_TARGET_OPTION (new_tree));
5005 if (TREE_TARGET_GLOBALS (new_tree))
5006 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5007 else if (new_tree == target_option_default_node)
5008 restore_target_globals (&default_target_globals);
5009 else
5010 TREE_TARGET_GLOBALS (new_tree)
5011 = save_target_globals_default_opts ();
5012 }
5013 }
5014 }
5015
5016 \f
5017 /* Return true if this goes in large data/bss. */
5018
5019 static bool
5020 ix86_in_large_data_p (tree exp)
5021 {
5022 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
5023 return false;
5024
5025 /* Functions are never large data. */
5026 if (TREE_CODE (exp) == FUNCTION_DECL)
5027 return false;
5028
5029 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
5030 {
5031 const char *section = DECL_SECTION_NAME (exp);
5032 if (strcmp (section, ".ldata") == 0
5033 || strcmp (section, ".lbss") == 0)
5034 return true;
5035 return false;
5036 }
5037 else
5038 {
5039 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
5040
5041 /* If this is an incomplete type with size 0, then we can't put it
5042 in data because it might be too big when completed. Also,
5043 int_size_in_bytes returns -1 if size can vary or is larger than
5044 an integer in which case also it is safer to assume that it goes in
5045 large data. */
5046 if (size <= 0 || size > ix86_section_threshold)
5047 return true;
5048 }
5049
5050 return false;
5051 }
5052
5053 /* Switch to the appropriate section for output of DECL.
5054 DECL is either a `VAR_DECL' node or a constant of some sort.
5055 RELOC indicates whether forming the initial value of DECL requires
5056 link-time relocations. */
5057
5058 ATTRIBUTE_UNUSED static section *
5059 x86_64_elf_select_section (tree decl, int reloc,
5060 unsigned HOST_WIDE_INT align)
5061 {
5062 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5063 && ix86_in_large_data_p (decl))
5064 {
5065 const char *sname = NULL;
5066 unsigned int flags = SECTION_WRITE;
5067 switch (categorize_decl_for_section (decl, reloc))
5068 {
5069 case SECCAT_DATA:
5070 sname = ".ldata";
5071 break;
5072 case SECCAT_DATA_REL:
5073 sname = ".ldata.rel";
5074 break;
5075 case SECCAT_DATA_REL_LOCAL:
5076 sname = ".ldata.rel.local";
5077 break;
5078 case SECCAT_DATA_REL_RO:
5079 sname = ".ldata.rel.ro";
5080 break;
5081 case SECCAT_DATA_REL_RO_LOCAL:
5082 sname = ".ldata.rel.ro.local";
5083 break;
5084 case SECCAT_BSS:
5085 sname = ".lbss";
5086 flags |= SECTION_BSS;
5087 break;
5088 case SECCAT_RODATA:
5089 case SECCAT_RODATA_MERGE_STR:
5090 case SECCAT_RODATA_MERGE_STR_INIT:
5091 case SECCAT_RODATA_MERGE_CONST:
5092 sname = ".lrodata";
5093 flags = 0;
5094 break;
5095 case SECCAT_SRODATA:
5096 case SECCAT_SDATA:
5097 case SECCAT_SBSS:
5098 gcc_unreachable ();
5099 case SECCAT_TEXT:
5100 case SECCAT_TDATA:
5101 case SECCAT_TBSS:
5102 /* We don't split these for medium model. Place them into
5103 default sections and hope for best. */
5104 break;
5105 }
5106 if (sname)
5107 {
5108 /* We might get called with string constants, but get_named_section
5109 doesn't like them as they are not DECLs. Also, we need to set
5110 flags in that case. */
5111 if (!DECL_P (decl))
5112 return get_section (sname, flags, NULL);
5113 return get_named_section (decl, sname, reloc);
5114 }
5115 }
5116 return default_elf_select_section (decl, reloc, align);
5117 }
5118
5119 /* Select a set of attributes for section NAME based on the properties
5120 of DECL and whether or not RELOC indicates that DECL's initializer
5121 might contain runtime relocations. */
5122
5123 static unsigned int ATTRIBUTE_UNUSED
5124 x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
5125 {
5126 unsigned int flags = default_section_type_flags (decl, name, reloc);
5127
5128 if (decl == NULL_TREE
5129 && (strcmp (name, ".ldata.rel.ro") == 0
5130 || strcmp (name, ".ldata.rel.ro.local") == 0))
5131 flags |= SECTION_RELRO;
5132
5133 if (strcmp (name, ".lbss") == 0
5134 || strncmp (name, ".lbss.", 5) == 0
5135 || strncmp (name, ".gnu.linkonce.lb.", 16) == 0)
5136 flags |= SECTION_BSS;
5137
5138 return flags;
5139 }
5140
5141 /* Build up a unique section name, expressed as a
5142 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
5143 RELOC indicates whether the initial value of EXP requires
5144 link-time relocations. */
5145
5146 static void ATTRIBUTE_UNUSED
5147 x86_64_elf_unique_section (tree decl, int reloc)
5148 {
5149 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5150 && ix86_in_large_data_p (decl))
5151 {
5152 const char *prefix = NULL;
5153 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
5154 bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
5155
5156 switch (categorize_decl_for_section (decl, reloc))
5157 {
5158 case SECCAT_DATA:
5159 case SECCAT_DATA_REL:
5160 case SECCAT_DATA_REL_LOCAL:
5161 case SECCAT_DATA_REL_RO:
5162 case SECCAT_DATA_REL_RO_LOCAL:
5163 prefix = one_only ? ".ld" : ".ldata";
5164 break;
5165 case SECCAT_BSS:
5166 prefix = one_only ? ".lb" : ".lbss";
5167 break;
5168 case SECCAT_RODATA:
5169 case SECCAT_RODATA_MERGE_STR:
5170 case SECCAT_RODATA_MERGE_STR_INIT:
5171 case SECCAT_RODATA_MERGE_CONST:
5172 prefix = one_only ? ".lr" : ".lrodata";
5173 break;
5174 case SECCAT_SRODATA:
5175 case SECCAT_SDATA:
5176 case SECCAT_SBSS:
5177 gcc_unreachable ();
5178 case SECCAT_TEXT:
5179 case SECCAT_TDATA:
5180 case SECCAT_TBSS:
5181 /* We don't split these for medium model. Place them into
5182 default sections and hope for best. */
5183 break;
5184 }
5185 if (prefix)
5186 {
5187 const char *name, *linkonce;
5188 char *string;
5189
5190 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
5191 name = targetm.strip_name_encoding (name);
5192
5193 /* If we're using one_only, then there needs to be a .gnu.linkonce
5194 prefix to the section name. */
5195 linkonce = one_only ? ".gnu.linkonce" : "";
5196
5197 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
5198
5199 set_decl_section_name (decl, string);
5200 return;
5201 }
5202 }
5203 default_unique_section (decl, reloc);
5204 }
5205
5206 #ifdef COMMON_ASM_OP
5207 /* This says how to output assembler code to declare an
5208 uninitialized external linkage data object.
5209
5210 For medium model x86-64 we need to use .largecomm opcode for
5211 large objects. */
5212 void
5213 x86_elf_aligned_common (FILE *file,
5214 const char *name, unsigned HOST_WIDE_INT size,
5215 int align)
5216 {
5217 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5218 && size > (unsigned int)ix86_section_threshold)
5219 fputs (".largecomm\t", file);
5220 else
5221 fputs (COMMON_ASM_OP, file);
5222 assemble_name (file, name);
5223 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
5224 size, align / BITS_PER_UNIT);
5225 }
5226 #endif
5227
5228 /* Utility function for targets to use in implementing
5229 ASM_OUTPUT_ALIGNED_BSS. */
5230
5231 void
5232 x86_output_aligned_bss (FILE *file, tree decl, const char *name,
5233 unsigned HOST_WIDE_INT size, int align)
5234 {
5235 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5236 && size > (unsigned int)ix86_section_threshold)
5237 switch_to_section (get_named_section (decl, ".lbss", 0));
5238 else
5239 switch_to_section (bss_section);
5240 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
5241 #ifdef ASM_DECLARE_OBJECT_NAME
5242 last_assemble_variable_decl = decl;
5243 ASM_DECLARE_OBJECT_NAME (file, name, decl);
5244 #else
5245 /* Standard thing is just output label for the object. */
5246 ASM_OUTPUT_LABEL (file, name);
5247 #endif /* ASM_DECLARE_OBJECT_NAME */
5248 ASM_OUTPUT_SKIP (file, size ? size : 1);
5249 }
5250 \f
5251 /* Decide whether we must probe the stack before any space allocation
5252 on this target. It's essentially TARGET_STACK_PROBE except when
5253 -fstack-check causes the stack to be already probed differently. */
5254
5255 bool
5256 ix86_target_stack_probe (void)
5257 {
5258 /* Do not probe the stack twice if static stack checking is enabled. */
5259 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5260 return false;
5261
5262 return TARGET_STACK_PROBE;
5263 }
5264 \f
5265 /* Decide whether we can make a sibling call to a function. DECL is the
5266 declaration of the function being targeted by the call and EXP is the
5267 CALL_EXPR representing the call. */
5268
5269 static bool
5270 ix86_function_ok_for_sibcall (tree decl, tree exp)
5271 {
5272 tree type, decl_or_type;
5273 rtx a, b;
5274
5275 /* If we are generating position-independent code, we cannot sibcall
5276 optimize any indirect call, or a direct call to a global function,
5277 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
5278 if (!TARGET_MACHO
5279 && !TARGET_64BIT
5280 && flag_pic
5281 && (!decl || !targetm.binds_local_p (decl)))
5282 return false;
5283
5284 /* If we need to align the outgoing stack, then sibcalling would
5285 unalign the stack, which may break the called function. */
5286 if (ix86_minimum_incoming_stack_boundary (true)
5287 < PREFERRED_STACK_BOUNDARY)
5288 return false;
5289
5290 if (decl)
5291 {
5292 decl_or_type = decl;
5293 type = TREE_TYPE (decl);
5294 }
5295 else
5296 {
5297 /* We're looking at the CALL_EXPR, we need the type of the function. */
5298 type = CALL_EXPR_FN (exp); /* pointer expression */
5299 type = TREE_TYPE (type); /* pointer type */
5300 type = TREE_TYPE (type); /* function type */
5301 decl_or_type = type;
5302 }
5303
5304 /* Check that the return value locations are the same. Like
5305 if we are returning floats on the 80387 register stack, we cannot
5306 make a sibcall from a function that doesn't return a float to a
5307 function that does or, conversely, from a function that does return
5308 a float to a function that doesn't; the necessary stack adjustment
5309 would not be executed. This is also the place we notice
5310 differences in the return value ABI. Note that it is ok for one
5311 of the functions to have void return type as long as the return
5312 value of the other is passed in a register. */
5313 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
5314 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5315 cfun->decl, false);
5316 if (STACK_REG_P (a) || STACK_REG_P (b))
5317 {
5318 if (!rtx_equal_p (a, b))
5319 return false;
5320 }
5321 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5322 ;
5323 else if (!rtx_equal_p (a, b))
5324 return false;
5325
5326 if (TARGET_64BIT)
5327 {
5328 /* The SYSV ABI has more call-clobbered registers;
5329 disallow sibcalls from MS to SYSV. */
5330 if (cfun->machine->call_abi == MS_ABI
5331 && ix86_function_type_abi (type) == SYSV_ABI)
5332 return false;
5333 }
5334 else
5335 {
5336 /* If this call is indirect, we'll need to be able to use a
5337 call-clobbered register for the address of the target function.
5338 Make sure that all such registers are not used for passing
5339 parameters. Note that DLLIMPORT functions are indirect. */
5340 if (!decl
5341 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
5342 {
5343 if (ix86_function_regparm (type, NULL) >= 3)
5344 {
5345 /* ??? Need to count the actual number of registers to be used,
5346 not the possible number of registers. Fix later. */
5347 return false;
5348 }
5349 }
5350 }
5351
5352 /* Otherwise okay. That also includes certain types of indirect calls. */
5353 return true;
5354 }
5355
5356 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5357 and "sseregparm" calling convention attributes;
5358 arguments as in struct attribute_spec.handler. */
5359
5360 static tree
5361 ix86_handle_cconv_attribute (tree *node, tree name,
5362 tree args,
5363 int,
5364 bool *no_add_attrs)
5365 {
5366 if (TREE_CODE (*node) != FUNCTION_TYPE
5367 && TREE_CODE (*node) != METHOD_TYPE
5368 && TREE_CODE (*node) != FIELD_DECL
5369 && TREE_CODE (*node) != TYPE_DECL)
5370 {
5371 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5372 name);
5373 *no_add_attrs = true;
5374 return NULL_TREE;
5375 }
5376
5377 /* Can combine regparm with all attributes but fastcall, and thiscall. */
5378 if (is_attribute_p ("regparm", name))
5379 {
5380 tree cst;
5381
5382 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5383 {
5384 error ("fastcall and regparm attributes are not compatible");
5385 }
5386
5387 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5388 {
5389 error ("regparam and thiscall attributes are not compatible");
5390 }
5391
5392 cst = TREE_VALUE (args);
5393 if (TREE_CODE (cst) != INTEGER_CST)
5394 {
5395 warning (OPT_Wattributes,
5396 "%qE attribute requires an integer constant argument",
5397 name);
5398 *no_add_attrs = true;
5399 }
5400 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
5401 {
5402 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
5403 name, REGPARM_MAX);
5404 *no_add_attrs = true;
5405 }
5406
5407 return NULL_TREE;
5408 }
5409
5410 if (TARGET_64BIT)
5411 {
5412 /* Do not warn when emulating the MS ABI. */
5413 if ((TREE_CODE (*node) != FUNCTION_TYPE
5414 && TREE_CODE (*node) != METHOD_TYPE)
5415 || ix86_function_type_abi (*node) != MS_ABI)
5416 warning (OPT_Wattributes, "%qE attribute ignored",
5417 name);
5418 *no_add_attrs = true;
5419 return NULL_TREE;
5420 }
5421
5422 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5423 if (is_attribute_p ("fastcall", name))
5424 {
5425 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5426 {
5427 error ("fastcall and cdecl attributes are not compatible");
5428 }
5429 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5430 {
5431 error ("fastcall and stdcall attributes are not compatible");
5432 }
5433 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
5434 {
5435 error ("fastcall and regparm attributes are not compatible");
5436 }
5437 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5438 {
5439 error ("fastcall and thiscall attributes are not compatible");
5440 }
5441 }
5442
5443 /* Can combine stdcall with fastcall (redundant), regparm and
5444 sseregparm. */
5445 else if (is_attribute_p ("stdcall", name))
5446 {
5447 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5448 {
5449 error ("stdcall and cdecl attributes are not compatible");
5450 }
5451 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5452 {
5453 error ("stdcall and fastcall attributes are not compatible");
5454 }
5455 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5456 {
5457 error ("stdcall and thiscall attributes are not compatible");
5458 }
5459 }
5460
5461 /* Can combine cdecl with regparm and sseregparm. */
5462 else if (is_attribute_p ("cdecl", name))
5463 {
5464 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5465 {
5466 error ("stdcall and cdecl attributes are not compatible");
5467 }
5468 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5469 {
5470 error ("fastcall and cdecl attributes are not compatible");
5471 }
5472 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5473 {
5474 error ("cdecl and thiscall attributes are not compatible");
5475 }
5476 }
5477 else if (is_attribute_p ("thiscall", name))
5478 {
5479 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
5480 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
5481 name);
5482 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5483 {
5484 error ("stdcall and thiscall attributes are not compatible");
5485 }
5486 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5487 {
5488 error ("fastcall and thiscall attributes are not compatible");
5489 }
5490 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5491 {
5492 error ("cdecl and thiscall attributes are not compatible");
5493 }
5494 }
5495
5496 /* Can combine sseregparm with all attributes. */
5497
5498 return NULL_TREE;
5499 }
5500
5501 /* The transactional memory builtins are implicitly regparm or fastcall
5502 depending on the ABI. Override the generic do-nothing attribute that
5503 these builtins were declared with, and replace it with one of the two
5504 attributes that we expect elsewhere. */
5505
5506 static tree
5507 ix86_handle_tm_regparm_attribute (tree *node, tree, tree,
5508 int flags, bool *no_add_attrs)
5509 {
5510 tree alt;
5511
5512 /* In no case do we want to add the placeholder attribute. */
5513 *no_add_attrs = true;
5514
5515 /* The 64-bit ABI is unchanged for transactional memory. */
5516 if (TARGET_64BIT)
5517 return NULL_TREE;
5518
5519 /* ??? Is there a better way to validate 32-bit windows? We have
5520 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5521 if (CHECK_STACK_LIMIT > 0)
5522 alt = tree_cons (get_identifier ("fastcall"), NULL, NULL);
5523 else
5524 {
5525 alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL);
5526 alt = tree_cons (get_identifier ("regparm"), alt, NULL);
5527 }
5528 decl_attributes (node, alt, flags);
5529
5530 return NULL_TREE;
5531 }
5532
5533 /* This function determines from TYPE the calling-convention. */
5534
5535 unsigned int
5536 ix86_get_callcvt (const_tree type)
5537 {
5538 unsigned int ret = 0;
5539 bool is_stdarg;
5540 tree attrs;
5541
5542 if (TARGET_64BIT)
5543 return IX86_CALLCVT_CDECL;
5544
5545 attrs = TYPE_ATTRIBUTES (type);
5546 if (attrs != NULL_TREE)
5547 {
5548 if (lookup_attribute ("cdecl", attrs))
5549 ret |= IX86_CALLCVT_CDECL;
5550 else if (lookup_attribute ("stdcall", attrs))
5551 ret |= IX86_CALLCVT_STDCALL;
5552 else if (lookup_attribute ("fastcall", attrs))
5553 ret |= IX86_CALLCVT_FASTCALL;
5554 else if (lookup_attribute ("thiscall", attrs))
5555 ret |= IX86_CALLCVT_THISCALL;
5556
5557 /* Regparam isn't allowed for thiscall and fastcall. */
5558 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
5559 {
5560 if (lookup_attribute ("regparm", attrs))
5561 ret |= IX86_CALLCVT_REGPARM;
5562 if (lookup_attribute ("sseregparm", attrs))
5563 ret |= IX86_CALLCVT_SSEREGPARM;
5564 }
5565
5566 if (IX86_BASE_CALLCVT(ret) != 0)
5567 return ret;
5568 }
5569
5570 is_stdarg = stdarg_p (type);
5571 if (TARGET_RTD && !is_stdarg)
5572 return IX86_CALLCVT_STDCALL | ret;
5573
5574 if (ret != 0
5575 || is_stdarg
5576 || TREE_CODE (type) != METHOD_TYPE
5577 || ix86_function_type_abi (type) != MS_ABI)
5578 return IX86_CALLCVT_CDECL | ret;
5579
5580 return IX86_CALLCVT_THISCALL;
5581 }
5582
5583 /* Return 0 if the attributes for two types are incompatible, 1 if they
5584 are compatible, and 2 if they are nearly compatible (which causes a
5585 warning to be generated). */
5586
5587 static int
5588 ix86_comp_type_attributes (const_tree type1, const_tree type2)
5589 {
5590 unsigned int ccvt1, ccvt2;
5591
5592 if (TREE_CODE (type1) != FUNCTION_TYPE
5593 && TREE_CODE (type1) != METHOD_TYPE)
5594 return 1;
5595
5596 ccvt1 = ix86_get_callcvt (type1);
5597 ccvt2 = ix86_get_callcvt (type2);
5598 if (ccvt1 != ccvt2)
5599 return 0;
5600 if (ix86_function_regparm (type1, NULL)
5601 != ix86_function_regparm (type2, NULL))
5602 return 0;
5603
5604 return 1;
5605 }
5606 \f
5607 /* Return the regparm value for a function with the indicated TYPE and DECL.
5608 DECL may be NULL when calling function indirectly
5609 or considering a libcall. */
5610
5611 static int
5612 ix86_function_regparm (const_tree type, const_tree decl)
5613 {
5614 tree attr;
5615 int regparm;
5616 unsigned int ccvt;
5617
5618 if (TARGET_64BIT)
5619 return (ix86_function_type_abi (type) == SYSV_ABI
5620 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5621 ccvt = ix86_get_callcvt (type);
5622 regparm = ix86_regparm;
5623
5624 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
5625 {
5626 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5627 if (attr)
5628 {
5629 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5630 return regparm;
5631 }
5632 }
5633 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5634 return 2;
5635 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5636 return 1;
5637
5638 /* Use register calling convention for local functions when possible. */
5639 if (decl
5640 && TREE_CODE (decl) == FUNCTION_DECL
5641 /* Caller and callee must agree on the calling convention, so
5642 checking here just optimize means that with
5643 __attribute__((optimize (...))) caller could use regparm convention
5644 and callee not, or vice versa. Instead look at whether the callee
5645 is optimized or not. */
5646 && opt_for_fn (decl, optimize)
5647 && !(profile_flag && !flag_fentry))
5648 {
5649 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5650 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE (decl));
5651 if (i && i->local && i->can_change_signature)
5652 {
5653 int local_regparm, globals = 0, regno;
5654
5655 /* Make sure no regparm register is taken by a
5656 fixed register variable. */
5657 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
5658 if (fixed_regs[local_regparm])
5659 break;
5660
5661 /* We don't want to use regparm(3) for nested functions as
5662 these use a static chain pointer in the third argument. */
5663 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
5664 local_regparm = 2;
5665
5666 /* In 32-bit mode save a register for the split stack. */
5667 if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack)
5668 local_regparm = 2;
5669
5670 /* Each fixed register usage increases register pressure,
5671 so less registers should be used for argument passing.
5672 This functionality can be overriden by an explicit
5673 regparm value. */
5674 for (regno = AX_REG; regno <= DI_REG; regno++)
5675 if (fixed_regs[regno])
5676 globals++;
5677
5678 local_regparm
5679 = globals < local_regparm ? local_regparm - globals : 0;
5680
5681 if (local_regparm > regparm)
5682 regparm = local_regparm;
5683 }
5684 }
5685
5686 return regparm;
5687 }
5688
5689 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5690 DFmode (2) arguments in SSE registers for a function with the
5691 indicated TYPE and DECL. DECL may be NULL when calling function
5692 indirectly or considering a libcall. Otherwise return 0. */
5693
5694 static int
5695 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5696 {
5697 gcc_assert (!TARGET_64BIT);
5698
5699 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5700 by the sseregparm attribute. */
5701 if (TARGET_SSEREGPARM
5702 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5703 {
5704 if (!TARGET_SSE)
5705 {
5706 if (warn)
5707 {
5708 if (decl)
5709 error ("calling %qD with attribute sseregparm without "
5710 "SSE/SSE2 enabled", decl);
5711 else
5712 error ("calling %qT with attribute sseregparm without "
5713 "SSE/SSE2 enabled", type);
5714 }
5715 return 0;
5716 }
5717
5718 return 2;
5719 }
5720
5721 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5722 (and DFmode for SSE2) arguments in SSE registers. */
5723 if (decl && TARGET_SSE_MATH && optimize
5724 && !(profile_flag && !flag_fentry))
5725 {
5726 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5727 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
5728 if (i && i->local && i->can_change_signature)
5729 return TARGET_SSE2 ? 2 : 1;
5730 }
5731
5732 return 0;
5733 }
5734
5735 /* Return true if EAX is live at the start of the function. Used by
5736 ix86_expand_prologue to determine if we need special help before
5737 calling allocate_stack_worker. */
5738
5739 static bool
5740 ix86_eax_live_at_start_p (void)
5741 {
5742 /* Cheat. Don't bother working forward from ix86_function_regparm
5743 to the function type to whether an actual argument is located in
5744 eax. Instead just look at cfg info, which is still close enough
5745 to correct at this point. This gives false positives for broken
5746 functions that might use uninitialized data that happens to be
5747 allocated in eax, but who cares? */
5748 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
5749 }
5750
5751 static bool
5752 ix86_keep_aggregate_return_pointer (tree fntype)
5753 {
5754 tree attr;
5755
5756 if (!TARGET_64BIT)
5757 {
5758 attr = lookup_attribute ("callee_pop_aggregate_return",
5759 TYPE_ATTRIBUTES (fntype));
5760 if (attr)
5761 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
5762
5763 /* For 32-bit MS-ABI the default is to keep aggregate
5764 return pointer. */
5765 if (ix86_function_type_abi (fntype) == MS_ABI)
5766 return true;
5767 }
5768 return KEEP_AGGREGATE_RETURN_POINTER != 0;
5769 }
5770
5771 /* Value is the number of bytes of arguments automatically
5772 popped when returning from a subroutine call.
5773 FUNDECL is the declaration node of the function (as a tree),
5774 FUNTYPE is the data type of the function (as a tree),
5775 or for a library call it is an identifier node for the subroutine name.
5776 SIZE is the number of bytes of arguments passed on the stack.
5777
5778 On the 80386, the RTD insn may be used to pop them if the number
5779 of args is fixed, but if the number is variable then the caller
5780 must pop them all. RTD can't be used for library calls now
5781 because the library is compiled with the Unix compiler.
5782 Use of RTD is a selectable option, since it is incompatible with
5783 standard Unix calling sequences. If the option is not selected,
5784 the caller must always pop the args.
5785
5786 The attribute stdcall is equivalent to RTD on a per module basis. */
5787
5788 static int
5789 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5790 {
5791 unsigned int ccvt;
5792
5793 /* None of the 64-bit ABIs pop arguments. */
5794 if (TARGET_64BIT)
5795 return 0;
5796
5797 ccvt = ix86_get_callcvt (funtype);
5798
5799 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
5800 | IX86_CALLCVT_THISCALL)) != 0
5801 && ! stdarg_p (funtype))
5802 return size;
5803
5804 /* Lose any fake structure return argument if it is passed on the stack. */
5805 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5806 && !ix86_keep_aggregate_return_pointer (funtype))
5807 {
5808 int nregs = ix86_function_regparm (funtype, fundecl);
5809 if (nregs == 0)
5810 return GET_MODE_SIZE (Pmode);
5811 }
5812
5813 return 0;
5814 }
5815
5816 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
5817
5818 static bool
5819 ix86_legitimate_combined_insn (rtx insn)
5820 {
5821 /* Check operand constraints in case hard registers were propagated
5822 into insn pattern. This check prevents combine pass from
5823 generating insn patterns with invalid hard register operands.
5824 These invalid insns can eventually confuse reload to error out
5825 with a spill failure. See also PRs 46829 and 46843. */
5826 if ((INSN_CODE (insn) = recog (PATTERN (insn), insn, 0)) >= 0)
5827 {
5828 int i;
5829
5830 extract_insn (insn);
5831 preprocess_constraints (insn);
5832
5833 int n_operands = recog_data.n_operands;
5834 int n_alternatives = recog_data.n_alternatives;
5835 for (i = 0; i < n_operands; i++)
5836 {
5837 rtx op = recog_data.operand[i];
5838 enum machine_mode mode = GET_MODE (op);
5839 const operand_alternative *op_alt;
5840 int offset = 0;
5841 bool win;
5842 int j;
5843
5844 /* For pre-AVX disallow unaligned loads/stores where the
5845 instructions don't support it. */
5846 if (!TARGET_AVX
5847 && VECTOR_MODE_P (GET_MODE (op))
5848 && misaligned_operand (op, GET_MODE (op)))
5849 {
5850 int min_align = get_attr_ssememalign (insn);
5851 if (min_align == 0)
5852 return false;
5853 }
5854
5855 /* A unary operator may be accepted by the predicate, but it
5856 is irrelevant for matching constraints. */
5857 if (UNARY_P (op))
5858 op = XEXP (op, 0);
5859
5860 if (GET_CODE (op) == SUBREG)
5861 {
5862 if (REG_P (SUBREG_REG (op))
5863 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
5864 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
5865 GET_MODE (SUBREG_REG (op)),
5866 SUBREG_BYTE (op),
5867 GET_MODE (op));
5868 op = SUBREG_REG (op);
5869 }
5870
5871 if (!(REG_P (op) && HARD_REGISTER_P (op)))
5872 continue;
5873
5874 op_alt = recog_op_alt;
5875
5876 /* Operand has no constraints, anything is OK. */
5877 win = !n_alternatives;
5878
5879 alternative_mask enabled = recog_data.enabled_alternatives;
5880 for (j = 0; j < n_alternatives; j++, op_alt += n_operands)
5881 {
5882 if (!TEST_BIT (enabled, j))
5883 continue;
5884 if (op_alt[i].anything_ok
5885 || (op_alt[i].matches != -1
5886 && operands_match_p
5887 (recog_data.operand[i],
5888 recog_data.operand[op_alt[i].matches]))
5889 || reg_fits_class_p (op, op_alt[i].cl, offset, mode))
5890 {
5891 win = true;
5892 break;
5893 }
5894 }
5895
5896 if (!win)
5897 return false;
5898 }
5899 }
5900
5901 return true;
5902 }
5903 \f
5904 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
5905
5906 static unsigned HOST_WIDE_INT
5907 ix86_asan_shadow_offset (void)
5908 {
5909 return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44)
5910 : HOST_WIDE_INT_C (0x7fff8000))
5911 : (HOST_WIDE_INT_1 << 29);
5912 }
5913 \f
5914 /* Argument support functions. */
5915
5916 /* Return true when register may be used to pass function parameters. */
5917 bool
5918 ix86_function_arg_regno_p (int regno)
5919 {
5920 int i;
5921 const int *parm_regs;
5922
5923 if (!TARGET_64BIT)
5924 {
5925 if (TARGET_MACHO)
5926 return (regno < REGPARM_MAX
5927 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
5928 else
5929 return (regno < REGPARM_MAX
5930 || (TARGET_MMX && MMX_REGNO_P (regno)
5931 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
5932 || (TARGET_SSE && SSE_REGNO_P (regno)
5933 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
5934 }
5935
5936 if (TARGET_SSE && SSE_REGNO_P (regno)
5937 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
5938 return true;
5939
5940 /* TODO: The function should depend on current function ABI but
5941 builtins.c would need updating then. Therefore we use the
5942 default ABI. */
5943
5944 /* RAX is used as hidden argument to va_arg functions. */
5945 if (ix86_abi == SYSV_ABI && regno == AX_REG)
5946 return true;
5947
5948 if (ix86_abi == MS_ABI)
5949 parm_regs = x86_64_ms_abi_int_parameter_registers;
5950 else
5951 parm_regs = x86_64_int_parameter_registers;
5952 for (i = 0; i < (ix86_abi == MS_ABI
5953 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
5954 if (regno == parm_regs[i])
5955 return true;
5956 return false;
5957 }
5958
5959 /* Return if we do not know how to pass TYPE solely in registers. */
5960
5961 static bool
5962 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
5963 {
5964 if (must_pass_in_stack_var_size_or_pad (mode, type))
5965 return true;
5966
5967 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5968 The layout_type routine is crafty and tries to trick us into passing
5969 currently unsupported vector types on the stack by using TImode. */
5970 return (!TARGET_64BIT && mode == TImode
5971 && type && TREE_CODE (type) != VECTOR_TYPE);
5972 }
5973
5974 /* It returns the size, in bytes, of the area reserved for arguments passed
5975 in registers for the function represented by fndecl dependent to the used
5976 abi format. */
5977 int
5978 ix86_reg_parm_stack_space (const_tree fndecl)
5979 {
5980 enum calling_abi call_abi = SYSV_ABI;
5981 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
5982 call_abi = ix86_function_abi (fndecl);
5983 else
5984 call_abi = ix86_function_type_abi (fndecl);
5985 if (TARGET_64BIT && call_abi == MS_ABI)
5986 return 32;
5987 return 0;
5988 }
5989
5990 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5991 call abi used. */
5992 enum calling_abi
5993 ix86_function_type_abi (const_tree fntype)
5994 {
5995 if (fntype != NULL_TREE && TYPE_ATTRIBUTES (fntype) != NULL_TREE)
5996 {
5997 enum calling_abi abi = ix86_abi;
5998 if (abi == SYSV_ABI)
5999 {
6000 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
6001 abi = MS_ABI;
6002 }
6003 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
6004 abi = SYSV_ABI;
6005 return abi;
6006 }
6007 return ix86_abi;
6008 }
6009
6010 /* We add this as a workaround in order to use libc_has_function
6011 hook in i386.md. */
6012 bool
6013 ix86_libc_has_function (enum function_class fn_class)
6014 {
6015 return targetm.libc_has_function (fn_class);
6016 }
6017
6018 static bool
6019 ix86_function_ms_hook_prologue (const_tree fn)
6020 {
6021 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
6022 {
6023 if (decl_function_context (fn) != NULL_TREE)
6024 error_at (DECL_SOURCE_LOCATION (fn),
6025 "ms_hook_prologue is not compatible with nested function");
6026 else
6027 return true;
6028 }
6029 return false;
6030 }
6031
6032 static enum calling_abi
6033 ix86_function_abi (const_tree fndecl)
6034 {
6035 if (! fndecl)
6036 return ix86_abi;
6037 return ix86_function_type_abi (TREE_TYPE (fndecl));
6038 }
6039
6040 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
6041 call abi used. */
6042 enum calling_abi
6043 ix86_cfun_abi (void)
6044 {
6045 if (! cfun)
6046 return ix86_abi;
6047 return cfun->machine->call_abi;
6048 }
6049
6050 /* Write the extra assembler code needed to declare a function properly. */
6051
6052 void
6053 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
6054 tree decl)
6055 {
6056 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
6057
6058 if (is_ms_hook)
6059 {
6060 int i, filler_count = (TARGET_64BIT ? 32 : 16);
6061 unsigned int filler_cc = 0xcccccccc;
6062
6063 for (i = 0; i < filler_count; i += 4)
6064 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
6065 }
6066
6067 #ifdef SUBTARGET_ASM_UNWIND_INIT
6068 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
6069 #endif
6070
6071 ASM_OUTPUT_LABEL (asm_out_file, fname);
6072
6073 /* Output magic byte marker, if hot-patch attribute is set. */
6074 if (is_ms_hook)
6075 {
6076 if (TARGET_64BIT)
6077 {
6078 /* leaq [%rsp + 0], %rsp */
6079 asm_fprintf (asm_out_file, ASM_BYTE
6080 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
6081 }
6082 else
6083 {
6084 /* movl.s %edi, %edi
6085 push %ebp
6086 movl.s %esp, %ebp */
6087 asm_fprintf (asm_out_file, ASM_BYTE
6088 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
6089 }
6090 }
6091 }
6092
6093 /* regclass.c */
6094 extern void init_regs (void);
6095
6096 /* Implementation of call abi switching target hook. Specific to FNDECL
6097 the specific call register sets are set. See also
6098 ix86_conditional_register_usage for more details. */
6099 void
6100 ix86_call_abi_override (const_tree fndecl)
6101 {
6102 if (fndecl == NULL_TREE)
6103 cfun->machine->call_abi = ix86_abi;
6104 else
6105 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
6106 }
6107
6108 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
6109 expensive re-initialization of init_regs each time we switch function context
6110 since this is needed only during RTL expansion. */
6111 static void
6112 ix86_maybe_switch_abi (void)
6113 {
6114 if (TARGET_64BIT &&
6115 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
6116 reinit_regs ();
6117 }
6118
6119 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6120 for a call to a function whose data type is FNTYPE.
6121 For a library call, FNTYPE is 0. */
6122
6123 void
6124 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
6125 tree fntype, /* tree ptr for function decl */
6126 rtx libname, /* SYMBOL_REF of library name or 0 */
6127 tree fndecl,
6128 int caller)
6129 {
6130 struct cgraph_local_info *i;
6131
6132 memset (cum, 0, sizeof (*cum));
6133
6134 if (fndecl)
6135 {
6136 i = cgraph_local_info (fndecl);
6137 cum->call_abi = ix86_function_abi (fndecl);
6138 }
6139 else
6140 {
6141 i = NULL;
6142 cum->call_abi = ix86_function_type_abi (fntype);
6143 }
6144
6145 cum->caller = caller;
6146
6147 /* Set up the number of registers to use for passing arguments. */
6148 cum->nregs = ix86_regparm;
6149 if (TARGET_64BIT)
6150 {
6151 cum->nregs = (cum->call_abi == SYSV_ABI
6152 ? X86_64_REGPARM_MAX
6153 : X86_64_MS_REGPARM_MAX);
6154 }
6155 if (TARGET_SSE)
6156 {
6157 cum->sse_nregs = SSE_REGPARM_MAX;
6158 if (TARGET_64BIT)
6159 {
6160 cum->sse_nregs = (cum->call_abi == SYSV_ABI
6161 ? X86_64_SSE_REGPARM_MAX
6162 : X86_64_MS_SSE_REGPARM_MAX);
6163 }
6164 }
6165 if (TARGET_MMX)
6166 cum->mmx_nregs = MMX_REGPARM_MAX;
6167 cum->warn_avx512f = true;
6168 cum->warn_avx = true;
6169 cum->warn_sse = true;
6170 cum->warn_mmx = true;
6171
6172 /* Because type might mismatch in between caller and callee, we need to
6173 use actual type of function for local calls.
6174 FIXME: cgraph_analyze can be told to actually record if function uses
6175 va_start so for local functions maybe_vaarg can be made aggressive
6176 helping K&R code.
6177 FIXME: once typesytem is fixed, we won't need this code anymore. */
6178 if (i && i->local && i->can_change_signature)
6179 fntype = TREE_TYPE (fndecl);
6180 cum->maybe_vaarg = (fntype
6181 ? (!prototype_p (fntype) || stdarg_p (fntype))
6182 : !libname);
6183
6184 if (!TARGET_64BIT)
6185 {
6186 /* If there are variable arguments, then we won't pass anything
6187 in registers in 32-bit mode. */
6188 if (stdarg_p (fntype))
6189 {
6190 cum->nregs = 0;
6191 cum->sse_nregs = 0;
6192 cum->mmx_nregs = 0;
6193 cum->warn_avx512f = false;
6194 cum->warn_avx = false;
6195 cum->warn_sse = false;
6196 cum->warn_mmx = false;
6197 return;
6198 }
6199
6200 /* Use ecx and edx registers if function has fastcall attribute,
6201 else look for regparm information. */
6202 if (fntype)
6203 {
6204 unsigned int ccvt = ix86_get_callcvt (fntype);
6205 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
6206 {
6207 cum->nregs = 1;
6208 cum->fastcall = 1; /* Same first register as in fastcall. */
6209 }
6210 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
6211 {
6212 cum->nregs = 2;
6213 cum->fastcall = 1;
6214 }
6215 else
6216 cum->nregs = ix86_function_regparm (fntype, fndecl);
6217 }
6218
6219 /* Set up the number of SSE registers used for passing SFmode
6220 and DFmode arguments. Warn for mismatching ABI. */
6221 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
6222 }
6223 }
6224
6225 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
6226 But in the case of vector types, it is some vector mode.
6227
6228 When we have only some of our vector isa extensions enabled, then there
6229 are some modes for which vector_mode_supported_p is false. For these
6230 modes, the generic vector support in gcc will choose some non-vector mode
6231 in order to implement the type. By computing the natural mode, we'll
6232 select the proper ABI location for the operand and not depend on whatever
6233 the middle-end decides to do with these vector types.
6234
6235 The midde-end can't deal with the vector types > 16 bytes. In this
6236 case, we return the original mode and warn ABI change if CUM isn't
6237 NULL.
6238
6239 If INT_RETURN is true, warn ABI change if the vector mode isn't
6240 available for function return value. */
6241
6242 static enum machine_mode
6243 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
6244 bool in_return)
6245 {
6246 enum machine_mode mode = TYPE_MODE (type);
6247
6248 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
6249 {
6250 HOST_WIDE_INT size = int_size_in_bytes (type);
6251 if ((size == 8 || size == 16 || size == 32 || size == 64)
6252 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
6253 && TYPE_VECTOR_SUBPARTS (type) > 1)
6254 {
6255 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
6256
6257 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6258 mode = MIN_MODE_VECTOR_FLOAT;
6259 else
6260 mode = MIN_MODE_VECTOR_INT;
6261
6262 /* Get the mode which has this inner mode and number of units. */
6263 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
6264 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
6265 && GET_MODE_INNER (mode) == innermode)
6266 {
6267 if (size == 64 && !TARGET_AVX512F)
6268 {
6269 static bool warnedavx512f;
6270 static bool warnedavx512f_ret;
6271
6272 if (cum && cum->warn_avx512f && !warnedavx512f)
6273 {
6274 if (warning (OPT_Wpsabi, "AVX512F vector argument "
6275 "without AVX512F enabled changes the ABI"))
6276 warnedavx512f = true;
6277 }
6278 else if (in_return && !warnedavx512f_ret)
6279 {
6280 if (warning (OPT_Wpsabi, "AVX512F vector return "
6281 "without AVX512F enabled changes the ABI"))
6282 warnedavx512f_ret = true;
6283 }
6284
6285 return TYPE_MODE (type);
6286 }
6287 else if (size == 32 && !TARGET_AVX)
6288 {
6289 static bool warnedavx;
6290 static bool warnedavx_ret;
6291
6292 if (cum && cum->warn_avx && !warnedavx)
6293 {
6294 if (warning (OPT_Wpsabi, "AVX vector argument "
6295 "without AVX enabled changes the ABI"))
6296 warnedavx = true;
6297 }
6298 else if (in_return && !warnedavx_ret)
6299 {
6300 if (warning (OPT_Wpsabi, "AVX vector return "
6301 "without AVX enabled changes the ABI"))
6302 warnedavx_ret = true;
6303 }
6304
6305 return TYPE_MODE (type);
6306 }
6307 else if (((size == 8 && TARGET_64BIT) || size == 16)
6308 && !TARGET_SSE)
6309 {
6310 static bool warnedsse;
6311 static bool warnedsse_ret;
6312
6313 if (cum && cum->warn_sse && !warnedsse)
6314 {
6315 if (warning (OPT_Wpsabi, "SSE vector argument "
6316 "without SSE enabled changes the ABI"))
6317 warnedsse = true;
6318 }
6319 else if (!TARGET_64BIT && in_return && !warnedsse_ret)
6320 {
6321 if (warning (OPT_Wpsabi, "SSE vector return "
6322 "without SSE enabled changes the ABI"))
6323 warnedsse_ret = true;
6324 }
6325 }
6326 else if ((size == 8 && !TARGET_64BIT) && !TARGET_MMX)
6327 {
6328 static bool warnedmmx;
6329 static bool warnedmmx_ret;
6330
6331 if (cum && cum->warn_mmx && !warnedmmx)
6332 {
6333 if (warning (OPT_Wpsabi, "MMX vector argument "
6334 "without MMX enabled changes the ABI"))
6335 warnedmmx = true;
6336 }
6337 else if (in_return && !warnedmmx_ret)
6338 {
6339 if (warning (OPT_Wpsabi, "MMX vector return "
6340 "without MMX enabled changes the ABI"))
6341 warnedmmx_ret = true;
6342 }
6343 }
6344 return mode;
6345 }
6346
6347 gcc_unreachable ();
6348 }
6349 }
6350
6351 return mode;
6352 }
6353
6354 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
6355 this may not agree with the mode that the type system has chosen for the
6356 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
6357 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
6358
6359 static rtx
6360 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
6361 unsigned int regno)
6362 {
6363 rtx tmp;
6364
6365 if (orig_mode != BLKmode)
6366 tmp = gen_rtx_REG (orig_mode, regno);
6367 else
6368 {
6369 tmp = gen_rtx_REG (mode, regno);
6370 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
6371 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
6372 }
6373
6374 return tmp;
6375 }
6376
6377 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
6378 of this code is to classify each 8bytes of incoming argument by the register
6379 class and assign registers accordingly. */
6380
6381 /* Return the union class of CLASS1 and CLASS2.
6382 See the x86-64 PS ABI for details. */
6383
6384 static enum x86_64_reg_class
6385 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
6386 {
6387 /* Rule #1: If both classes are equal, this is the resulting class. */
6388 if (class1 == class2)
6389 return class1;
6390
6391 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
6392 the other class. */
6393 if (class1 == X86_64_NO_CLASS)
6394 return class2;
6395 if (class2 == X86_64_NO_CLASS)
6396 return class1;
6397
6398 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
6399 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
6400 return X86_64_MEMORY_CLASS;
6401
6402 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
6403 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
6404 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
6405 return X86_64_INTEGERSI_CLASS;
6406 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
6407 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
6408 return X86_64_INTEGER_CLASS;
6409
6410 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
6411 MEMORY is used. */
6412 if (class1 == X86_64_X87_CLASS
6413 || class1 == X86_64_X87UP_CLASS
6414 || class1 == X86_64_COMPLEX_X87_CLASS
6415 || class2 == X86_64_X87_CLASS
6416 || class2 == X86_64_X87UP_CLASS
6417 || class2 == X86_64_COMPLEX_X87_CLASS)
6418 return X86_64_MEMORY_CLASS;
6419
6420 /* Rule #6: Otherwise class SSE is used. */
6421 return X86_64_SSE_CLASS;
6422 }
6423
6424 /* Classify the argument of type TYPE and mode MODE.
6425 CLASSES will be filled by the register class used to pass each word
6426 of the operand. The number of words is returned. In case the parameter
6427 should be passed in memory, 0 is returned. As a special case for zero
6428 sized containers, classes[0] will be NO_CLASS and 1 is returned.
6429
6430 BIT_OFFSET is used internally for handling records and specifies offset
6431 of the offset in bits modulo 512 to avoid overflow cases.
6432
6433 See the x86-64 PS ABI for details.
6434 */
6435
6436 static int
6437 classify_argument (enum machine_mode mode, const_tree type,
6438 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
6439 {
6440 HOST_WIDE_INT bytes =
6441 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6442 int words
6443 = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6444
6445 /* Variable sized entities are always passed/returned in memory. */
6446 if (bytes < 0)
6447 return 0;
6448
6449 if (mode != VOIDmode
6450 && targetm.calls.must_pass_in_stack (mode, type))
6451 return 0;
6452
6453 if (type && AGGREGATE_TYPE_P (type))
6454 {
6455 int i;
6456 tree field;
6457 enum x86_64_reg_class subclasses[MAX_CLASSES];
6458
6459 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
6460 if (bytes > 64)
6461 return 0;
6462
6463 for (i = 0; i < words; i++)
6464 classes[i] = X86_64_NO_CLASS;
6465
6466 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
6467 signalize memory class, so handle it as special case. */
6468 if (!words)
6469 {
6470 classes[0] = X86_64_NO_CLASS;
6471 return 1;
6472 }
6473
6474 /* Classify each field of record and merge classes. */
6475 switch (TREE_CODE (type))
6476 {
6477 case RECORD_TYPE:
6478 /* And now merge the fields of structure. */
6479 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6480 {
6481 if (TREE_CODE (field) == FIELD_DECL)
6482 {
6483 int num;
6484
6485 if (TREE_TYPE (field) == error_mark_node)
6486 continue;
6487
6488 /* Bitfields are always classified as integer. Handle them
6489 early, since later code would consider them to be
6490 misaligned integers. */
6491 if (DECL_BIT_FIELD (field))
6492 {
6493 for (i = (int_bit_position (field)
6494 + (bit_offset % 64)) / 8 / 8;
6495 i < ((int_bit_position (field) + (bit_offset % 64))
6496 + tree_to_shwi (DECL_SIZE (field))
6497 + 63) / 8 / 8; i++)
6498 classes[i] =
6499 merge_classes (X86_64_INTEGER_CLASS,
6500 classes[i]);
6501 }
6502 else
6503 {
6504 int pos;
6505
6506 type = TREE_TYPE (field);
6507
6508 /* Flexible array member is ignored. */
6509 if (TYPE_MODE (type) == BLKmode
6510 && TREE_CODE (type) == ARRAY_TYPE
6511 && TYPE_SIZE (type) == NULL_TREE
6512 && TYPE_DOMAIN (type) != NULL_TREE
6513 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
6514 == NULL_TREE))
6515 {
6516 static bool warned;
6517
6518 if (!warned && warn_psabi)
6519 {
6520 warned = true;
6521 inform (input_location,
6522 "the ABI of passing struct with"
6523 " a flexible array member has"
6524 " changed in GCC 4.4");
6525 }
6526 continue;
6527 }
6528 num = classify_argument (TYPE_MODE (type), type,
6529 subclasses,
6530 (int_bit_position (field)
6531 + bit_offset) % 512);
6532 if (!num)
6533 return 0;
6534 pos = (int_bit_position (field)
6535 + (bit_offset % 64)) / 8 / 8;
6536 for (i = 0; i < num && (i + pos) < words; i++)
6537 classes[i + pos] =
6538 merge_classes (subclasses[i], classes[i + pos]);
6539 }
6540 }
6541 }
6542 break;
6543
6544 case ARRAY_TYPE:
6545 /* Arrays are handled as small records. */
6546 {
6547 int num;
6548 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
6549 TREE_TYPE (type), subclasses, bit_offset);
6550 if (!num)
6551 return 0;
6552
6553 /* The partial classes are now full classes. */
6554 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
6555 subclasses[0] = X86_64_SSE_CLASS;
6556 if (subclasses[0] == X86_64_INTEGERSI_CLASS
6557 && !((bit_offset % 64) == 0 && bytes == 4))
6558 subclasses[0] = X86_64_INTEGER_CLASS;
6559
6560 for (i = 0; i < words; i++)
6561 classes[i] = subclasses[i % num];
6562
6563 break;
6564 }
6565 case UNION_TYPE:
6566 case QUAL_UNION_TYPE:
6567 /* Unions are similar to RECORD_TYPE but offset is always 0.
6568 */
6569 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6570 {
6571 if (TREE_CODE (field) == FIELD_DECL)
6572 {
6573 int num;
6574
6575 if (TREE_TYPE (field) == error_mark_node)
6576 continue;
6577
6578 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
6579 TREE_TYPE (field), subclasses,
6580 bit_offset);
6581 if (!num)
6582 return 0;
6583 for (i = 0; i < num && i < words; i++)
6584 classes[i] = merge_classes (subclasses[i], classes[i]);
6585 }
6586 }
6587 break;
6588
6589 default:
6590 gcc_unreachable ();
6591 }
6592
6593 if (words > 2)
6594 {
6595 /* When size > 16 bytes, if the first one isn't
6596 X86_64_SSE_CLASS or any other ones aren't
6597 X86_64_SSEUP_CLASS, everything should be passed in
6598 memory. */
6599 if (classes[0] != X86_64_SSE_CLASS)
6600 return 0;
6601
6602 for (i = 1; i < words; i++)
6603 if (classes[i] != X86_64_SSEUP_CLASS)
6604 return 0;
6605 }
6606
6607 /* Final merger cleanup. */
6608 for (i = 0; i < words; i++)
6609 {
6610 /* If one class is MEMORY, everything should be passed in
6611 memory. */
6612 if (classes[i] == X86_64_MEMORY_CLASS)
6613 return 0;
6614
6615 /* The X86_64_SSEUP_CLASS should be always preceded by
6616 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6617 if (classes[i] == X86_64_SSEUP_CLASS
6618 && classes[i - 1] != X86_64_SSE_CLASS
6619 && classes[i - 1] != X86_64_SSEUP_CLASS)
6620 {
6621 /* The first one should never be X86_64_SSEUP_CLASS. */
6622 gcc_assert (i != 0);
6623 classes[i] = X86_64_SSE_CLASS;
6624 }
6625
6626 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6627 everything should be passed in memory. */
6628 if (classes[i] == X86_64_X87UP_CLASS
6629 && (classes[i - 1] != X86_64_X87_CLASS))
6630 {
6631 static bool warned;
6632
6633 /* The first one should never be X86_64_X87UP_CLASS. */
6634 gcc_assert (i != 0);
6635 if (!warned && warn_psabi)
6636 {
6637 warned = true;
6638 inform (input_location,
6639 "the ABI of passing union with long double"
6640 " has changed in GCC 4.4");
6641 }
6642 return 0;
6643 }
6644 }
6645 return words;
6646 }
6647
6648 /* Compute alignment needed. We align all types to natural boundaries with
6649 exception of XFmode that is aligned to 64bits. */
6650 if (mode != VOIDmode && mode != BLKmode)
6651 {
6652 int mode_alignment = GET_MODE_BITSIZE (mode);
6653
6654 if (mode == XFmode)
6655 mode_alignment = 128;
6656 else if (mode == XCmode)
6657 mode_alignment = 256;
6658 if (COMPLEX_MODE_P (mode))
6659 mode_alignment /= 2;
6660 /* Misaligned fields are always returned in memory. */
6661 if (bit_offset % mode_alignment)
6662 return 0;
6663 }
6664
6665 /* for V1xx modes, just use the base mode */
6666 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
6667 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
6668 mode = GET_MODE_INNER (mode);
6669
6670 /* Classification of atomic types. */
6671 switch (mode)
6672 {
6673 case SDmode:
6674 case DDmode:
6675 classes[0] = X86_64_SSE_CLASS;
6676 return 1;
6677 case TDmode:
6678 classes[0] = X86_64_SSE_CLASS;
6679 classes[1] = X86_64_SSEUP_CLASS;
6680 return 2;
6681 case DImode:
6682 case SImode:
6683 case HImode:
6684 case QImode:
6685 case CSImode:
6686 case CHImode:
6687 case CQImode:
6688 {
6689 int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
6690
6691 /* Analyze last 128 bits only. */
6692 size = (size - 1) & 0x7f;
6693
6694 if (size < 32)
6695 {
6696 classes[0] = X86_64_INTEGERSI_CLASS;
6697 return 1;
6698 }
6699 else if (size < 64)
6700 {
6701 classes[0] = X86_64_INTEGER_CLASS;
6702 return 1;
6703 }
6704 else if (size < 64+32)
6705 {
6706 classes[0] = X86_64_INTEGER_CLASS;
6707 classes[1] = X86_64_INTEGERSI_CLASS;
6708 return 2;
6709 }
6710 else if (size < 64+64)
6711 {
6712 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6713 return 2;
6714 }
6715 else
6716 gcc_unreachable ();
6717 }
6718 case CDImode:
6719 case TImode:
6720 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6721 return 2;
6722 case COImode:
6723 case OImode:
6724 /* OImode shouldn't be used directly. */
6725 gcc_unreachable ();
6726 case CTImode:
6727 return 0;
6728 case SFmode:
6729 if (!(bit_offset % 64))
6730 classes[0] = X86_64_SSESF_CLASS;
6731 else
6732 classes[0] = X86_64_SSE_CLASS;
6733 return 1;
6734 case DFmode:
6735 classes[0] = X86_64_SSEDF_CLASS;
6736 return 1;
6737 case XFmode:
6738 classes[0] = X86_64_X87_CLASS;
6739 classes[1] = X86_64_X87UP_CLASS;
6740 return 2;
6741 case TFmode:
6742 classes[0] = X86_64_SSE_CLASS;
6743 classes[1] = X86_64_SSEUP_CLASS;
6744 return 2;
6745 case SCmode:
6746 classes[0] = X86_64_SSE_CLASS;
6747 if (!(bit_offset % 64))
6748 return 1;
6749 else
6750 {
6751 static bool warned;
6752
6753 if (!warned && warn_psabi)
6754 {
6755 warned = true;
6756 inform (input_location,
6757 "the ABI of passing structure with complex float"
6758 " member has changed in GCC 4.4");
6759 }
6760 classes[1] = X86_64_SSESF_CLASS;
6761 return 2;
6762 }
6763 case DCmode:
6764 classes[0] = X86_64_SSEDF_CLASS;
6765 classes[1] = X86_64_SSEDF_CLASS;
6766 return 2;
6767 case XCmode:
6768 classes[0] = X86_64_COMPLEX_X87_CLASS;
6769 return 1;
6770 case TCmode:
6771 /* This modes is larger than 16 bytes. */
6772 return 0;
6773 case V8SFmode:
6774 case V8SImode:
6775 case V32QImode:
6776 case V16HImode:
6777 case V4DFmode:
6778 case V4DImode:
6779 classes[0] = X86_64_SSE_CLASS;
6780 classes[1] = X86_64_SSEUP_CLASS;
6781 classes[2] = X86_64_SSEUP_CLASS;
6782 classes[3] = X86_64_SSEUP_CLASS;
6783 return 4;
6784 case V8DFmode:
6785 case V16SFmode:
6786 case V8DImode:
6787 case V16SImode:
6788 case V32HImode:
6789 case V64QImode:
6790 classes[0] = X86_64_SSE_CLASS;
6791 classes[1] = X86_64_SSEUP_CLASS;
6792 classes[2] = X86_64_SSEUP_CLASS;
6793 classes[3] = X86_64_SSEUP_CLASS;
6794 classes[4] = X86_64_SSEUP_CLASS;
6795 classes[5] = X86_64_SSEUP_CLASS;
6796 classes[6] = X86_64_SSEUP_CLASS;
6797 classes[7] = X86_64_SSEUP_CLASS;
6798 return 8;
6799 case V4SFmode:
6800 case V4SImode:
6801 case V16QImode:
6802 case V8HImode:
6803 case V2DFmode:
6804 case V2DImode:
6805 classes[0] = X86_64_SSE_CLASS;
6806 classes[1] = X86_64_SSEUP_CLASS;
6807 return 2;
6808 case V1TImode:
6809 case V1DImode:
6810 case V2SFmode:
6811 case V2SImode:
6812 case V4HImode:
6813 case V8QImode:
6814 classes[0] = X86_64_SSE_CLASS;
6815 return 1;
6816 case BLKmode:
6817 case VOIDmode:
6818 return 0;
6819 default:
6820 gcc_assert (VECTOR_MODE_P (mode));
6821
6822 if (bytes > 16)
6823 return 0;
6824
6825 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
6826
6827 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
6828 classes[0] = X86_64_INTEGERSI_CLASS;
6829 else
6830 classes[0] = X86_64_INTEGER_CLASS;
6831 classes[1] = X86_64_INTEGER_CLASS;
6832 return 1 + (bytes > 8);
6833 }
6834 }
6835
6836 /* Examine the argument and return set number of register required in each
6837 class. Return true iff parameter should be passed in memory. */
6838
6839 static bool
6840 examine_argument (enum machine_mode mode, const_tree type, int in_return,
6841 int *int_nregs, int *sse_nregs)
6842 {
6843 enum x86_64_reg_class regclass[MAX_CLASSES];
6844 int n = classify_argument (mode, type, regclass, 0);
6845
6846 *int_nregs = 0;
6847 *sse_nregs = 0;
6848
6849 if (!n)
6850 return true;
6851 for (n--; n >= 0; n--)
6852 switch (regclass[n])
6853 {
6854 case X86_64_INTEGER_CLASS:
6855 case X86_64_INTEGERSI_CLASS:
6856 (*int_nregs)++;
6857 break;
6858 case X86_64_SSE_CLASS:
6859 case X86_64_SSESF_CLASS:
6860 case X86_64_SSEDF_CLASS:
6861 (*sse_nregs)++;
6862 break;
6863 case X86_64_NO_CLASS:
6864 case X86_64_SSEUP_CLASS:
6865 break;
6866 case X86_64_X87_CLASS:
6867 case X86_64_X87UP_CLASS:
6868 case X86_64_COMPLEX_X87_CLASS:
6869 if (!in_return)
6870 return true;
6871 break;
6872 case X86_64_MEMORY_CLASS:
6873 gcc_unreachable ();
6874 }
6875
6876 return false;
6877 }
6878
6879 /* Construct container for the argument used by GCC interface. See
6880 FUNCTION_ARG for the detailed description. */
6881
6882 static rtx
6883 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
6884 const_tree type, int in_return, int nintregs, int nsseregs,
6885 const int *intreg, int sse_regno)
6886 {
6887 /* The following variables hold the static issued_error state. */
6888 static bool issued_sse_arg_error;
6889 static bool issued_sse_ret_error;
6890 static bool issued_x87_ret_error;
6891
6892 enum machine_mode tmpmode;
6893 int bytes =
6894 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6895 enum x86_64_reg_class regclass[MAX_CLASSES];
6896 int n;
6897 int i;
6898 int nexps = 0;
6899 int needed_sseregs, needed_intregs;
6900 rtx exp[MAX_CLASSES];
6901 rtx ret;
6902
6903 n = classify_argument (mode, type, regclass, 0);
6904 if (!n)
6905 return NULL;
6906 if (examine_argument (mode, type, in_return, &needed_intregs,
6907 &needed_sseregs))
6908 return NULL;
6909 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
6910 return NULL;
6911
6912 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6913 some less clueful developer tries to use floating-point anyway. */
6914 if (needed_sseregs && !TARGET_SSE)
6915 {
6916 if (in_return)
6917 {
6918 if (!issued_sse_ret_error)
6919 {
6920 error ("SSE register return with SSE disabled");
6921 issued_sse_ret_error = true;
6922 }
6923 }
6924 else if (!issued_sse_arg_error)
6925 {
6926 error ("SSE register argument with SSE disabled");
6927 issued_sse_arg_error = true;
6928 }
6929 return NULL;
6930 }
6931
6932 /* Likewise, error if the ABI requires us to return values in the
6933 x87 registers and the user specified -mno-80387. */
6934 if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
6935 for (i = 0; i < n; i++)
6936 if (regclass[i] == X86_64_X87_CLASS
6937 || regclass[i] == X86_64_X87UP_CLASS
6938 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
6939 {
6940 if (!issued_x87_ret_error)
6941 {
6942 error ("x87 register return with x87 disabled");
6943 issued_x87_ret_error = true;
6944 }
6945 return NULL;
6946 }
6947
6948 /* First construct simple cases. Avoid SCmode, since we want to use
6949 single register to pass this type. */
6950 if (n == 1 && mode != SCmode)
6951 switch (regclass[0])
6952 {
6953 case X86_64_INTEGER_CLASS:
6954 case X86_64_INTEGERSI_CLASS:
6955 return gen_rtx_REG (mode, intreg[0]);
6956 case X86_64_SSE_CLASS:
6957 case X86_64_SSESF_CLASS:
6958 case X86_64_SSEDF_CLASS:
6959 if (mode != BLKmode)
6960 return gen_reg_or_parallel (mode, orig_mode,
6961 SSE_REGNO (sse_regno));
6962 break;
6963 case X86_64_X87_CLASS:
6964 case X86_64_COMPLEX_X87_CLASS:
6965 return gen_rtx_REG (mode, FIRST_STACK_REG);
6966 case X86_64_NO_CLASS:
6967 /* Zero sized array, struct or class. */
6968 return NULL;
6969 default:
6970 gcc_unreachable ();
6971 }
6972 if (n == 2
6973 && regclass[0] == X86_64_SSE_CLASS
6974 && regclass[1] == X86_64_SSEUP_CLASS
6975 && mode != BLKmode)
6976 return gen_reg_or_parallel (mode, orig_mode,
6977 SSE_REGNO (sse_regno));
6978 if (n == 4
6979 && regclass[0] == X86_64_SSE_CLASS
6980 && regclass[1] == X86_64_SSEUP_CLASS
6981 && regclass[2] == X86_64_SSEUP_CLASS
6982 && regclass[3] == X86_64_SSEUP_CLASS
6983 && mode != BLKmode)
6984 return gen_reg_or_parallel (mode, orig_mode,
6985 SSE_REGNO (sse_regno));
6986 if (n == 8
6987 && regclass[0] == X86_64_SSE_CLASS
6988 && regclass[1] == X86_64_SSEUP_CLASS
6989 && regclass[2] == X86_64_SSEUP_CLASS
6990 && regclass[3] == X86_64_SSEUP_CLASS
6991 && regclass[4] == X86_64_SSEUP_CLASS
6992 && regclass[5] == X86_64_SSEUP_CLASS
6993 && regclass[6] == X86_64_SSEUP_CLASS
6994 && regclass[7] == X86_64_SSEUP_CLASS
6995 && mode != BLKmode)
6996 return gen_reg_or_parallel (mode, orig_mode,
6997 SSE_REGNO (sse_regno));
6998 if (n == 2
6999 && regclass[0] == X86_64_X87_CLASS
7000 && regclass[1] == X86_64_X87UP_CLASS)
7001 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
7002
7003 if (n == 2
7004 && regclass[0] == X86_64_INTEGER_CLASS
7005 && regclass[1] == X86_64_INTEGER_CLASS
7006 && (mode == CDImode || mode == TImode)
7007 && intreg[0] + 1 == intreg[1])
7008 return gen_rtx_REG (mode, intreg[0]);
7009
7010 /* Otherwise figure out the entries of the PARALLEL. */
7011 for (i = 0; i < n; i++)
7012 {
7013 int pos;
7014
7015 switch (regclass[i])
7016 {
7017 case X86_64_NO_CLASS:
7018 break;
7019 case X86_64_INTEGER_CLASS:
7020 case X86_64_INTEGERSI_CLASS:
7021 /* Merge TImodes on aligned occasions here too. */
7022 if (i * 8 + 8 > bytes)
7023 tmpmode
7024 = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
7025 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
7026 tmpmode = SImode;
7027 else
7028 tmpmode = DImode;
7029 /* We've requested 24 bytes we
7030 don't have mode for. Use DImode. */
7031 if (tmpmode == BLKmode)
7032 tmpmode = DImode;
7033 exp [nexps++]
7034 = gen_rtx_EXPR_LIST (VOIDmode,
7035 gen_rtx_REG (tmpmode, *intreg),
7036 GEN_INT (i*8));
7037 intreg++;
7038 break;
7039 case X86_64_SSESF_CLASS:
7040 exp [nexps++]
7041 = gen_rtx_EXPR_LIST (VOIDmode,
7042 gen_rtx_REG (SFmode,
7043 SSE_REGNO (sse_regno)),
7044 GEN_INT (i*8));
7045 sse_regno++;
7046 break;
7047 case X86_64_SSEDF_CLASS:
7048 exp [nexps++]
7049 = gen_rtx_EXPR_LIST (VOIDmode,
7050 gen_rtx_REG (DFmode,
7051 SSE_REGNO (sse_regno)),
7052 GEN_INT (i*8));
7053 sse_regno++;
7054 break;
7055 case X86_64_SSE_CLASS:
7056 pos = i;
7057 switch (n)
7058 {
7059 case 1:
7060 tmpmode = DImode;
7061 break;
7062 case 2:
7063 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
7064 {
7065 tmpmode = TImode;
7066 i++;
7067 }
7068 else
7069 tmpmode = DImode;
7070 break;
7071 case 4:
7072 gcc_assert (i == 0
7073 && regclass[1] == X86_64_SSEUP_CLASS
7074 && regclass[2] == X86_64_SSEUP_CLASS
7075 && regclass[3] == X86_64_SSEUP_CLASS);
7076 tmpmode = OImode;
7077 i += 3;
7078 break;
7079 case 8:
7080 gcc_assert (i == 0
7081 && regclass[1] == X86_64_SSEUP_CLASS
7082 && regclass[2] == X86_64_SSEUP_CLASS
7083 && regclass[3] == X86_64_SSEUP_CLASS
7084 && regclass[4] == X86_64_SSEUP_CLASS
7085 && regclass[5] == X86_64_SSEUP_CLASS
7086 && regclass[6] == X86_64_SSEUP_CLASS
7087 && regclass[7] == X86_64_SSEUP_CLASS);
7088 tmpmode = XImode;
7089 i += 7;
7090 break;
7091 default:
7092 gcc_unreachable ();
7093 }
7094 exp [nexps++]
7095 = gen_rtx_EXPR_LIST (VOIDmode,
7096 gen_rtx_REG (tmpmode,
7097 SSE_REGNO (sse_regno)),
7098 GEN_INT (pos*8));
7099 sse_regno++;
7100 break;
7101 default:
7102 gcc_unreachable ();
7103 }
7104 }
7105
7106 /* Empty aligned struct, union or class. */
7107 if (nexps == 0)
7108 return NULL;
7109
7110 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
7111 for (i = 0; i < nexps; i++)
7112 XVECEXP (ret, 0, i) = exp [i];
7113 return ret;
7114 }
7115
7116 /* Update the data in CUM to advance over an argument of mode MODE
7117 and data type TYPE. (TYPE is null for libcalls where that information
7118 may not be available.) */
7119
7120 static void
7121 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7122 const_tree type, HOST_WIDE_INT bytes,
7123 HOST_WIDE_INT words)
7124 {
7125 switch (mode)
7126 {
7127 default:
7128 break;
7129
7130 case BLKmode:
7131 if (bytes < 0)
7132 break;
7133 /* FALLTHRU */
7134
7135 case DImode:
7136 case SImode:
7137 case HImode:
7138 case QImode:
7139 cum->words += words;
7140 cum->nregs -= words;
7141 cum->regno += words;
7142
7143 if (cum->nregs <= 0)
7144 {
7145 cum->nregs = 0;
7146 cum->regno = 0;
7147 }
7148 break;
7149
7150 case OImode:
7151 /* OImode shouldn't be used directly. */
7152 gcc_unreachable ();
7153
7154 case DFmode:
7155 if (cum->float_in_sse < 2)
7156 break;
7157 case SFmode:
7158 if (cum->float_in_sse < 1)
7159 break;
7160 /* FALLTHRU */
7161
7162 case V8SFmode:
7163 case V8SImode:
7164 case V64QImode:
7165 case V32HImode:
7166 case V16SImode:
7167 case V8DImode:
7168 case V16SFmode:
7169 case V8DFmode:
7170 case V32QImode:
7171 case V16HImode:
7172 case V4DFmode:
7173 case V4DImode:
7174 case TImode:
7175 case V16QImode:
7176 case V8HImode:
7177 case V4SImode:
7178 case V2DImode:
7179 case V4SFmode:
7180 case V2DFmode:
7181 if (!type || !AGGREGATE_TYPE_P (type))
7182 {
7183 cum->sse_words += words;
7184 cum->sse_nregs -= 1;
7185 cum->sse_regno += 1;
7186 if (cum->sse_nregs <= 0)
7187 {
7188 cum->sse_nregs = 0;
7189 cum->sse_regno = 0;
7190 }
7191 }
7192 break;
7193
7194 case V8QImode:
7195 case V4HImode:
7196 case V2SImode:
7197 case V2SFmode:
7198 case V1TImode:
7199 case V1DImode:
7200 if (!type || !AGGREGATE_TYPE_P (type))
7201 {
7202 cum->mmx_words += words;
7203 cum->mmx_nregs -= 1;
7204 cum->mmx_regno += 1;
7205 if (cum->mmx_nregs <= 0)
7206 {
7207 cum->mmx_nregs = 0;
7208 cum->mmx_regno = 0;
7209 }
7210 }
7211 break;
7212 }
7213 }
7214
7215 static void
7216 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7217 const_tree type, HOST_WIDE_INT words, bool named)
7218 {
7219 int int_nregs, sse_nregs;
7220
7221 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
7222 if (!named && (VALID_AVX512F_REG_MODE (mode)
7223 || VALID_AVX256_REG_MODE (mode)))
7224 return;
7225
7226 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
7227 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
7228 {
7229 cum->nregs -= int_nregs;
7230 cum->sse_nregs -= sse_nregs;
7231 cum->regno += int_nregs;
7232 cum->sse_regno += sse_nregs;
7233 }
7234 else
7235 {
7236 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
7237 cum->words = (cum->words + align - 1) & ~(align - 1);
7238 cum->words += words;
7239 }
7240 }
7241
7242 static void
7243 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
7244 HOST_WIDE_INT words)
7245 {
7246 /* Otherwise, this should be passed indirect. */
7247 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
7248
7249 cum->words += words;
7250 if (cum->nregs > 0)
7251 {
7252 cum->nregs -= 1;
7253 cum->regno += 1;
7254 }
7255 }
7256
7257 /* Update the data in CUM to advance over an argument of mode MODE and
7258 data type TYPE. (TYPE is null for libcalls where that information
7259 may not be available.) */
7260
7261 static void
7262 ix86_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
7263 const_tree type, bool named)
7264 {
7265 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7266 HOST_WIDE_INT bytes, words;
7267
7268 if (mode == BLKmode)
7269 bytes = int_size_in_bytes (type);
7270 else
7271 bytes = GET_MODE_SIZE (mode);
7272 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7273
7274 if (type)
7275 mode = type_natural_mode (type, NULL, false);
7276
7277 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7278 function_arg_advance_ms_64 (cum, bytes, words);
7279 else if (TARGET_64BIT)
7280 function_arg_advance_64 (cum, mode, type, words, named);
7281 else
7282 function_arg_advance_32 (cum, mode, type, bytes, words);
7283 }
7284
7285 /* Define where to put the arguments to a function.
7286 Value is zero to push the argument on the stack,
7287 or a hard register in which to store the argument.
7288
7289 MODE is the argument's machine mode.
7290 TYPE is the data type of the argument (as a tree).
7291 This is null for libcalls where that information may
7292 not be available.
7293 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7294 the preceding args and about the function being called.
7295 NAMED is nonzero if this argument is a named parameter
7296 (otherwise it is an extra parameter matching an ellipsis). */
7297
7298 static rtx
7299 function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7300 enum machine_mode orig_mode, const_tree type,
7301 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
7302 {
7303 /* Avoid the AL settings for the Unix64 ABI. */
7304 if (mode == VOIDmode)
7305 return constm1_rtx;
7306
7307 switch (mode)
7308 {
7309 default:
7310 break;
7311
7312 case BLKmode:
7313 if (bytes < 0)
7314 break;
7315 /* FALLTHRU */
7316 case DImode:
7317 case SImode:
7318 case HImode:
7319 case QImode:
7320 if (words <= cum->nregs)
7321 {
7322 int regno = cum->regno;
7323
7324 /* Fastcall allocates the first two DWORD (SImode) or
7325 smaller arguments to ECX and EDX if it isn't an
7326 aggregate type . */
7327 if (cum->fastcall)
7328 {
7329 if (mode == BLKmode
7330 || mode == DImode
7331 || (type && AGGREGATE_TYPE_P (type)))
7332 break;
7333
7334 /* ECX not EAX is the first allocated register. */
7335 if (regno == AX_REG)
7336 regno = CX_REG;
7337 }
7338 return gen_rtx_REG (mode, regno);
7339 }
7340 break;
7341
7342 case DFmode:
7343 if (cum->float_in_sse < 2)
7344 break;
7345 case SFmode:
7346 if (cum->float_in_sse < 1)
7347 break;
7348 /* FALLTHRU */
7349 case TImode:
7350 /* In 32bit, we pass TImode in xmm registers. */
7351 case V16QImode:
7352 case V8HImode:
7353 case V4SImode:
7354 case V2DImode:
7355 case V4SFmode:
7356 case V2DFmode:
7357 if (!type || !AGGREGATE_TYPE_P (type))
7358 {
7359 if (cum->sse_nregs)
7360 return gen_reg_or_parallel (mode, orig_mode,
7361 cum->sse_regno + FIRST_SSE_REG);
7362 }
7363 break;
7364
7365 case OImode:
7366 case XImode:
7367 /* OImode and XImode shouldn't be used directly. */
7368 gcc_unreachable ();
7369
7370 case V64QImode:
7371 case V32HImode:
7372 case V16SImode:
7373 case V8DImode:
7374 case V16SFmode:
7375 case V8DFmode:
7376 case V8SFmode:
7377 case V8SImode:
7378 case V32QImode:
7379 case V16HImode:
7380 case V4DFmode:
7381 case V4DImode:
7382 if (!type || !AGGREGATE_TYPE_P (type))
7383 {
7384 if (cum->sse_nregs)
7385 return gen_reg_or_parallel (mode, orig_mode,
7386 cum->sse_regno + FIRST_SSE_REG);
7387 }
7388 break;
7389
7390 case V8QImode:
7391 case V4HImode:
7392 case V2SImode:
7393 case V2SFmode:
7394 case V1TImode:
7395 case V1DImode:
7396 if (!type || !AGGREGATE_TYPE_P (type))
7397 {
7398 if (cum->mmx_nregs)
7399 return gen_reg_or_parallel (mode, orig_mode,
7400 cum->mmx_regno + FIRST_MMX_REG);
7401 }
7402 break;
7403 }
7404
7405 return NULL_RTX;
7406 }
7407
7408 static rtx
7409 function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7410 enum machine_mode orig_mode, const_tree type, bool named)
7411 {
7412 /* Handle a hidden AL argument containing number of registers
7413 for varargs x86-64 functions. */
7414 if (mode == VOIDmode)
7415 return GEN_INT (cum->maybe_vaarg
7416 ? (cum->sse_nregs < 0
7417 ? X86_64_SSE_REGPARM_MAX
7418 : cum->sse_regno)
7419 : -1);
7420
7421 switch (mode)
7422 {
7423 default:
7424 break;
7425
7426 case V8SFmode:
7427 case V8SImode:
7428 case V32QImode:
7429 case V16HImode:
7430 case V4DFmode:
7431 case V4DImode:
7432 case V16SFmode:
7433 case V16SImode:
7434 case V64QImode:
7435 case V32HImode:
7436 case V8DFmode:
7437 case V8DImode:
7438 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
7439 if (!named)
7440 return NULL;
7441 break;
7442 }
7443
7444 return construct_container (mode, orig_mode, type, 0, cum->nregs,
7445 cum->sse_nregs,
7446 &x86_64_int_parameter_registers [cum->regno],
7447 cum->sse_regno);
7448 }
7449
7450 static rtx
7451 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7452 enum machine_mode orig_mode, bool named,
7453 HOST_WIDE_INT bytes)
7454 {
7455 unsigned int regno;
7456
7457 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
7458 We use value of -2 to specify that current function call is MSABI. */
7459 if (mode == VOIDmode)
7460 return GEN_INT (-2);
7461
7462 /* If we've run out of registers, it goes on the stack. */
7463 if (cum->nregs == 0)
7464 return NULL_RTX;
7465
7466 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
7467
7468 /* Only floating point modes are passed in anything but integer regs. */
7469 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
7470 {
7471 if (named)
7472 regno = cum->regno + FIRST_SSE_REG;
7473 else
7474 {
7475 rtx t1, t2;
7476
7477 /* Unnamed floating parameters are passed in both the
7478 SSE and integer registers. */
7479 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
7480 t2 = gen_rtx_REG (mode, regno);
7481 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
7482 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
7483 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
7484 }
7485 }
7486 /* Handle aggregated types passed in register. */
7487 if (orig_mode == BLKmode)
7488 {
7489 if (bytes > 0 && bytes <= 8)
7490 mode = (bytes > 4 ? DImode : SImode);
7491 if (mode == BLKmode)
7492 mode = DImode;
7493 }
7494
7495 return gen_reg_or_parallel (mode, orig_mode, regno);
7496 }
7497
7498 /* Return where to put the arguments to a function.
7499 Return zero to push the argument on the stack, or a hard register in which to store the argument.
7500
7501 MODE is the argument's machine mode. TYPE is the data type of the
7502 argument. It is null for libcalls where that information may not be
7503 available. CUM gives information about the preceding args and about
7504 the function being called. NAMED is nonzero if this argument is a
7505 named parameter (otherwise it is an extra parameter matching an
7506 ellipsis). */
7507
7508 static rtx
7509 ix86_function_arg (cumulative_args_t cum_v, enum machine_mode omode,
7510 const_tree type, bool named)
7511 {
7512 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7513 enum machine_mode mode = omode;
7514 HOST_WIDE_INT bytes, words;
7515 rtx arg;
7516
7517 if (mode == BLKmode)
7518 bytes = int_size_in_bytes (type);
7519 else
7520 bytes = GET_MODE_SIZE (mode);
7521 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7522
7523 /* To simplify the code below, represent vector types with a vector mode
7524 even if MMX/SSE are not active. */
7525 if (type && TREE_CODE (type) == VECTOR_TYPE)
7526 mode = type_natural_mode (type, cum, false);
7527
7528 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7529 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
7530 else if (TARGET_64BIT)
7531 arg = function_arg_64 (cum, mode, omode, type, named);
7532 else
7533 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
7534
7535 return arg;
7536 }
7537
7538 /* A C expression that indicates when an argument must be passed by
7539 reference. If nonzero for an argument, a copy of that argument is
7540 made in memory and a pointer to the argument is passed instead of
7541 the argument itself. The pointer is passed in whatever way is
7542 appropriate for passing a pointer to that type. */
7543
7544 static bool
7545 ix86_pass_by_reference (cumulative_args_t cum_v, enum machine_mode mode,
7546 const_tree type, bool)
7547 {
7548 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7549
7550 /* See Windows x64 Software Convention. */
7551 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7552 {
7553 int msize = (int) GET_MODE_SIZE (mode);
7554 if (type)
7555 {
7556 /* Arrays are passed by reference. */
7557 if (TREE_CODE (type) == ARRAY_TYPE)
7558 return true;
7559
7560 if (AGGREGATE_TYPE_P (type))
7561 {
7562 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
7563 are passed by reference. */
7564 msize = int_size_in_bytes (type);
7565 }
7566 }
7567
7568 /* __m128 is passed by reference. */
7569 switch (msize) {
7570 case 1: case 2: case 4: case 8:
7571 break;
7572 default:
7573 return true;
7574 }
7575 }
7576 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
7577 return 1;
7578
7579 return 0;
7580 }
7581
7582 /* Return true when TYPE should be 128bit aligned for 32bit argument
7583 passing ABI. XXX: This function is obsolete and is only used for
7584 checking psABI compatibility with previous versions of GCC. */
7585
7586 static bool
7587 ix86_compat_aligned_value_p (const_tree type)
7588 {
7589 enum machine_mode mode = TYPE_MODE (type);
7590 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
7591 || mode == TDmode
7592 || mode == TFmode
7593 || mode == TCmode)
7594 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
7595 return true;
7596 if (TYPE_ALIGN (type) < 128)
7597 return false;
7598
7599 if (AGGREGATE_TYPE_P (type))
7600 {
7601 /* Walk the aggregates recursively. */
7602 switch (TREE_CODE (type))
7603 {
7604 case RECORD_TYPE:
7605 case UNION_TYPE:
7606 case QUAL_UNION_TYPE:
7607 {
7608 tree field;
7609
7610 /* Walk all the structure fields. */
7611 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7612 {
7613 if (TREE_CODE (field) == FIELD_DECL
7614 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
7615 return true;
7616 }
7617 break;
7618 }
7619
7620 case ARRAY_TYPE:
7621 /* Just for use if some languages passes arrays by value. */
7622 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
7623 return true;
7624 break;
7625
7626 default:
7627 gcc_unreachable ();
7628 }
7629 }
7630 return false;
7631 }
7632
7633 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
7634 XXX: This function is obsolete and is only used for checking psABI
7635 compatibility with previous versions of GCC. */
7636
7637 static unsigned int
7638 ix86_compat_function_arg_boundary (enum machine_mode mode,
7639 const_tree type, unsigned int align)
7640 {
7641 /* In 32bit, only _Decimal128 and __float128 are aligned to their
7642 natural boundaries. */
7643 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
7644 {
7645 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
7646 make an exception for SSE modes since these require 128bit
7647 alignment.
7648
7649 The handling here differs from field_alignment. ICC aligns MMX
7650 arguments to 4 byte boundaries, while structure fields are aligned
7651 to 8 byte boundaries. */
7652 if (!type)
7653 {
7654 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
7655 align = PARM_BOUNDARY;
7656 }
7657 else
7658 {
7659 if (!ix86_compat_aligned_value_p (type))
7660 align = PARM_BOUNDARY;
7661 }
7662 }
7663 if (align > BIGGEST_ALIGNMENT)
7664 align = BIGGEST_ALIGNMENT;
7665 return align;
7666 }
7667
7668 /* Return true when TYPE should be 128bit aligned for 32bit argument
7669 passing ABI. */
7670
7671 static bool
7672 ix86_contains_aligned_value_p (const_tree type)
7673 {
7674 enum machine_mode mode = TYPE_MODE (type);
7675
7676 if (mode == XFmode || mode == XCmode)
7677 return false;
7678
7679 if (TYPE_ALIGN (type) < 128)
7680 return false;
7681
7682 if (AGGREGATE_TYPE_P (type))
7683 {
7684 /* Walk the aggregates recursively. */
7685 switch (TREE_CODE (type))
7686 {
7687 case RECORD_TYPE:
7688 case UNION_TYPE:
7689 case QUAL_UNION_TYPE:
7690 {
7691 tree field;
7692
7693 /* Walk all the structure fields. */
7694 for (field = TYPE_FIELDS (type);
7695 field;
7696 field = DECL_CHAIN (field))
7697 {
7698 if (TREE_CODE (field) == FIELD_DECL
7699 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
7700 return true;
7701 }
7702 break;
7703 }
7704
7705 case ARRAY_TYPE:
7706 /* Just for use if some languages passes arrays by value. */
7707 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
7708 return true;
7709 break;
7710
7711 default:
7712 gcc_unreachable ();
7713 }
7714 }
7715 else
7716 return TYPE_ALIGN (type) >= 128;
7717
7718 return false;
7719 }
7720
7721 /* Gives the alignment boundary, in bits, of an argument with the
7722 specified mode and type. */
7723
7724 static unsigned int
7725 ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
7726 {
7727 unsigned int align;
7728 if (type)
7729 {
7730 /* Since the main variant type is used for call, we convert it to
7731 the main variant type. */
7732 type = TYPE_MAIN_VARIANT (type);
7733 align = TYPE_ALIGN (type);
7734 }
7735 else
7736 align = GET_MODE_ALIGNMENT (mode);
7737 if (align < PARM_BOUNDARY)
7738 align = PARM_BOUNDARY;
7739 else
7740 {
7741 static bool warned;
7742 unsigned int saved_align = align;
7743
7744 if (!TARGET_64BIT)
7745 {
7746 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7747 if (!type)
7748 {
7749 if (mode == XFmode || mode == XCmode)
7750 align = PARM_BOUNDARY;
7751 }
7752 else if (!ix86_contains_aligned_value_p (type))
7753 align = PARM_BOUNDARY;
7754
7755 if (align < 128)
7756 align = PARM_BOUNDARY;
7757 }
7758
7759 if (warn_psabi
7760 && !warned
7761 && align != ix86_compat_function_arg_boundary (mode, type,
7762 saved_align))
7763 {
7764 warned = true;
7765 inform (input_location,
7766 "The ABI for passing parameters with %d-byte"
7767 " alignment has changed in GCC 4.6",
7768 align / BITS_PER_UNIT);
7769 }
7770 }
7771
7772 return align;
7773 }
7774
7775 /* Return true if N is a possible register number of function value. */
7776
7777 static bool
7778 ix86_function_value_regno_p (const unsigned int regno)
7779 {
7780 switch (regno)
7781 {
7782 case AX_REG:
7783 return true;
7784 case DX_REG:
7785 return (!TARGET_64BIT || ix86_abi != MS_ABI);
7786 case DI_REG:
7787 case SI_REG:
7788 return TARGET_64BIT && ix86_abi != MS_ABI;
7789
7790 /* Complex values are returned in %st(0)/%st(1) pair. */
7791 case ST0_REG:
7792 case ST1_REG:
7793 /* TODO: The function should depend on current function ABI but
7794 builtins.c would need updating then. Therefore we use the
7795 default ABI. */
7796 if (TARGET_64BIT && ix86_abi == MS_ABI)
7797 return false;
7798 return TARGET_FLOAT_RETURNS_IN_80387;
7799
7800 /* Complex values are returned in %xmm0/%xmm1 pair. */
7801 case XMM0_REG:
7802 case XMM1_REG:
7803 return TARGET_SSE;
7804
7805 case MM0_REG:
7806 if (TARGET_MACHO || TARGET_64BIT)
7807 return false;
7808 return TARGET_MMX;
7809 }
7810
7811 return false;
7812 }
7813
7814 /* Define how to find the value returned by a function.
7815 VALTYPE is the data type of the value (as a tree).
7816 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7817 otherwise, FUNC is 0. */
7818
7819 static rtx
7820 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
7821 const_tree fntype, const_tree fn)
7822 {
7823 unsigned int regno;
7824
7825 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7826 we normally prevent this case when mmx is not available. However
7827 some ABIs may require the result to be returned like DImode. */
7828 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7829 regno = FIRST_MMX_REG;
7830
7831 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7832 we prevent this case when sse is not available. However some ABIs
7833 may require the result to be returned like integer TImode. */
7834 else if (mode == TImode
7835 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7836 regno = FIRST_SSE_REG;
7837
7838 /* 32-byte vector modes in %ymm0. */
7839 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
7840 regno = FIRST_SSE_REG;
7841
7842 /* 64-byte vector modes in %zmm0. */
7843 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
7844 regno = FIRST_SSE_REG;
7845
7846 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7847 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
7848 regno = FIRST_FLOAT_REG;
7849 else
7850 /* Most things go in %eax. */
7851 regno = AX_REG;
7852
7853 /* Override FP return register with %xmm0 for local functions when
7854 SSE math is enabled or for functions with sseregparm attribute. */
7855 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
7856 {
7857 int sse_level = ix86_function_sseregparm (fntype, fn, false);
7858 if ((sse_level >= 1 && mode == SFmode)
7859 || (sse_level == 2 && mode == DFmode))
7860 regno = FIRST_SSE_REG;
7861 }
7862
7863 /* OImode shouldn't be used directly. */
7864 gcc_assert (mode != OImode);
7865
7866 return gen_rtx_REG (orig_mode, regno);
7867 }
7868
7869 static rtx
7870 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
7871 const_tree valtype)
7872 {
7873 rtx ret;
7874
7875 /* Handle libcalls, which don't provide a type node. */
7876 if (valtype == NULL)
7877 {
7878 unsigned int regno;
7879
7880 switch (mode)
7881 {
7882 case SFmode:
7883 case SCmode:
7884 case DFmode:
7885 case DCmode:
7886 case TFmode:
7887 case SDmode:
7888 case DDmode:
7889 case TDmode:
7890 regno = FIRST_SSE_REG;
7891 break;
7892 case XFmode:
7893 case XCmode:
7894 regno = FIRST_FLOAT_REG;
7895 break;
7896 case TCmode:
7897 return NULL;
7898 default:
7899 regno = AX_REG;
7900 }
7901
7902 return gen_rtx_REG (mode, regno);
7903 }
7904 else if (POINTER_TYPE_P (valtype))
7905 {
7906 /* Pointers are always returned in word_mode. */
7907 mode = word_mode;
7908 }
7909
7910 ret = construct_container (mode, orig_mode, valtype, 1,
7911 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
7912 x86_64_int_return_registers, 0);
7913
7914 /* For zero sized structures, construct_container returns NULL, but we
7915 need to keep rest of compiler happy by returning meaningful value. */
7916 if (!ret)
7917 ret = gen_rtx_REG (orig_mode, AX_REG);
7918
7919 return ret;
7920 }
7921
7922 static rtx
7923 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode,
7924 const_tree valtype)
7925 {
7926 unsigned int regno = AX_REG;
7927
7928 if (TARGET_SSE)
7929 {
7930 switch (GET_MODE_SIZE (mode))
7931 {
7932 case 16:
7933 if (valtype != NULL_TREE
7934 && !VECTOR_INTEGER_TYPE_P (valtype)
7935 && !VECTOR_INTEGER_TYPE_P (valtype)
7936 && !INTEGRAL_TYPE_P (valtype)
7937 && !VECTOR_FLOAT_TYPE_P (valtype))
7938 break;
7939 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7940 && !COMPLEX_MODE_P (mode))
7941 regno = FIRST_SSE_REG;
7942 break;
7943 case 8:
7944 case 4:
7945 if (mode == SFmode || mode == DFmode)
7946 regno = FIRST_SSE_REG;
7947 break;
7948 default:
7949 break;
7950 }
7951 }
7952 return gen_rtx_REG (orig_mode, regno);
7953 }
7954
7955 static rtx
7956 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
7957 enum machine_mode orig_mode, enum machine_mode mode)
7958 {
7959 const_tree fn, fntype;
7960
7961 fn = NULL_TREE;
7962 if (fntype_or_decl && DECL_P (fntype_or_decl))
7963 fn = fntype_or_decl;
7964 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
7965
7966 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
7967 return function_value_ms_64 (orig_mode, mode, valtype);
7968 else if (TARGET_64BIT)
7969 return function_value_64 (orig_mode, mode, valtype);
7970 else
7971 return function_value_32 (orig_mode, mode, fntype, fn);
7972 }
7973
7974 static rtx
7975 ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool)
7976 {
7977 enum machine_mode mode, orig_mode;
7978
7979 orig_mode = TYPE_MODE (valtype);
7980 mode = type_natural_mode (valtype, NULL, true);
7981 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
7982 }
7983
7984 /* Pointer function arguments and return values are promoted to
7985 word_mode. */
7986
7987 static enum machine_mode
7988 ix86_promote_function_mode (const_tree type, enum machine_mode mode,
7989 int *punsignedp, const_tree fntype,
7990 int for_return)
7991 {
7992 if (type != NULL_TREE && POINTER_TYPE_P (type))
7993 {
7994 *punsignedp = POINTERS_EXTEND_UNSIGNED;
7995 return word_mode;
7996 }
7997 return default_promote_function_mode (type, mode, punsignedp, fntype,
7998 for_return);
7999 }
8000
8001 /* Return true if a structure, union or array with MODE containing FIELD
8002 should be accessed using BLKmode. */
8003
8004 static bool
8005 ix86_member_type_forces_blk (const_tree field, enum machine_mode mode)
8006 {
8007 /* Union with XFmode must be in BLKmode. */
8008 return (mode == XFmode
8009 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
8010 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
8011 }
8012
8013 rtx
8014 ix86_libcall_value (enum machine_mode mode)
8015 {
8016 return ix86_function_value_1 (NULL, NULL, mode, mode);
8017 }
8018
8019 /* Return true iff type is returned in memory. */
8020
8021 static bool
8022 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
8023 {
8024 #ifdef SUBTARGET_RETURN_IN_MEMORY
8025 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
8026 #else
8027 const enum machine_mode mode = type_natural_mode (type, NULL, true);
8028 HOST_WIDE_INT size;
8029
8030 if (TARGET_64BIT)
8031 {
8032 if (ix86_function_type_abi (fntype) == MS_ABI)
8033 {
8034 size = int_size_in_bytes (type);
8035
8036 /* __m128 is returned in xmm0. */
8037 if ((!type || VECTOR_INTEGER_TYPE_P (type)
8038 || INTEGRAL_TYPE_P (type)
8039 || VECTOR_FLOAT_TYPE_P (type))
8040 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8041 && !COMPLEX_MODE_P (mode)
8042 && (GET_MODE_SIZE (mode) == 16 || size == 16))
8043 return false;
8044
8045 /* Otherwise, the size must be exactly in [1248]. */
8046 return size != 1 && size != 2 && size != 4 && size != 8;
8047 }
8048 else
8049 {
8050 int needed_intregs, needed_sseregs;
8051
8052 return examine_argument (mode, type, 1,
8053 &needed_intregs, &needed_sseregs);
8054 }
8055 }
8056 else
8057 {
8058 if (mode == BLKmode)
8059 return true;
8060
8061 size = int_size_in_bytes (type);
8062
8063 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
8064 return false;
8065
8066 if (VECTOR_MODE_P (mode) || mode == TImode)
8067 {
8068 /* User-created vectors small enough to fit in EAX. */
8069 if (size < 8)
8070 return false;
8071
8072 /* Unless ABI prescibes otherwise,
8073 MMX/3dNow values are returned in MM0 if available. */
8074
8075 if (size == 8)
8076 return TARGET_VECT8_RETURNS || !TARGET_MMX;
8077
8078 /* SSE values are returned in XMM0 if available. */
8079 if (size == 16)
8080 return !TARGET_SSE;
8081
8082 /* AVX values are returned in YMM0 if available. */
8083 if (size == 32)
8084 return !TARGET_AVX;
8085
8086 /* AVX512F values are returned in ZMM0 if available. */
8087 if (size == 64)
8088 return !TARGET_AVX512F;
8089 }
8090
8091 if (mode == XFmode)
8092 return false;
8093
8094 if (size > 12)
8095 return true;
8096
8097 /* OImode shouldn't be used directly. */
8098 gcc_assert (mode != OImode);
8099
8100 return false;
8101 }
8102 #endif
8103 }
8104
8105 \f
8106 /* Create the va_list data type. */
8107
8108 /* Returns the calling convention specific va_list date type.
8109 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
8110
8111 static tree
8112 ix86_build_builtin_va_list_abi (enum calling_abi abi)
8113 {
8114 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
8115
8116 /* For i386 we use plain pointer to argument area. */
8117 if (!TARGET_64BIT || abi == MS_ABI)
8118 return build_pointer_type (char_type_node);
8119
8120 record = lang_hooks.types.make_type (RECORD_TYPE);
8121 type_decl = build_decl (BUILTINS_LOCATION,
8122 TYPE_DECL, get_identifier ("__va_list_tag"), record);
8123
8124 f_gpr = build_decl (BUILTINS_LOCATION,
8125 FIELD_DECL, get_identifier ("gp_offset"),
8126 unsigned_type_node);
8127 f_fpr = build_decl (BUILTINS_LOCATION,
8128 FIELD_DECL, get_identifier ("fp_offset"),
8129 unsigned_type_node);
8130 f_ovf = build_decl (BUILTINS_LOCATION,
8131 FIELD_DECL, get_identifier ("overflow_arg_area"),
8132 ptr_type_node);
8133 f_sav = build_decl (BUILTINS_LOCATION,
8134 FIELD_DECL, get_identifier ("reg_save_area"),
8135 ptr_type_node);
8136
8137 va_list_gpr_counter_field = f_gpr;
8138 va_list_fpr_counter_field = f_fpr;
8139
8140 DECL_FIELD_CONTEXT (f_gpr) = record;
8141 DECL_FIELD_CONTEXT (f_fpr) = record;
8142 DECL_FIELD_CONTEXT (f_ovf) = record;
8143 DECL_FIELD_CONTEXT (f_sav) = record;
8144
8145 TYPE_STUB_DECL (record) = type_decl;
8146 TYPE_NAME (record) = type_decl;
8147 TYPE_FIELDS (record) = f_gpr;
8148 DECL_CHAIN (f_gpr) = f_fpr;
8149 DECL_CHAIN (f_fpr) = f_ovf;
8150 DECL_CHAIN (f_ovf) = f_sav;
8151
8152 layout_type (record);
8153
8154 /* The correct type is an array type of one element. */
8155 return build_array_type (record, build_index_type (size_zero_node));
8156 }
8157
8158 /* Setup the builtin va_list data type and for 64-bit the additional
8159 calling convention specific va_list data types. */
8160
8161 static tree
8162 ix86_build_builtin_va_list (void)
8163 {
8164 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
8165
8166 /* Initialize abi specific va_list builtin types. */
8167 if (TARGET_64BIT)
8168 {
8169 tree t;
8170 if (ix86_abi == MS_ABI)
8171 {
8172 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
8173 if (TREE_CODE (t) != RECORD_TYPE)
8174 t = build_variant_type_copy (t);
8175 sysv_va_list_type_node = t;
8176 }
8177 else
8178 {
8179 t = ret;
8180 if (TREE_CODE (t) != RECORD_TYPE)
8181 t = build_variant_type_copy (t);
8182 sysv_va_list_type_node = t;
8183 }
8184 if (ix86_abi != MS_ABI)
8185 {
8186 t = ix86_build_builtin_va_list_abi (MS_ABI);
8187 if (TREE_CODE (t) != RECORD_TYPE)
8188 t = build_variant_type_copy (t);
8189 ms_va_list_type_node = t;
8190 }
8191 else
8192 {
8193 t = ret;
8194 if (TREE_CODE (t) != RECORD_TYPE)
8195 t = build_variant_type_copy (t);
8196 ms_va_list_type_node = t;
8197 }
8198 }
8199
8200 return ret;
8201 }
8202
8203 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
8204
8205 static void
8206 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
8207 {
8208 rtx save_area, mem;
8209 alias_set_type set;
8210 int i, max;
8211
8212 /* GPR size of varargs save area. */
8213 if (cfun->va_list_gpr_size)
8214 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
8215 else
8216 ix86_varargs_gpr_size = 0;
8217
8218 /* FPR size of varargs save area. We don't need it if we don't pass
8219 anything in SSE registers. */
8220 if (TARGET_SSE && cfun->va_list_fpr_size)
8221 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
8222 else
8223 ix86_varargs_fpr_size = 0;
8224
8225 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
8226 return;
8227
8228 save_area = frame_pointer_rtx;
8229 set = get_varargs_alias_set ();
8230
8231 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
8232 if (max > X86_64_REGPARM_MAX)
8233 max = X86_64_REGPARM_MAX;
8234
8235 for (i = cum->regno; i < max; i++)
8236 {
8237 mem = gen_rtx_MEM (word_mode,
8238 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
8239 MEM_NOTRAP_P (mem) = 1;
8240 set_mem_alias_set (mem, set);
8241 emit_move_insn (mem,
8242 gen_rtx_REG (word_mode,
8243 x86_64_int_parameter_registers[i]));
8244 }
8245
8246 if (ix86_varargs_fpr_size)
8247 {
8248 enum machine_mode smode;
8249 rtx label, test;
8250
8251 /* Now emit code to save SSE registers. The AX parameter contains number
8252 of SSE parameter registers used to call this function, though all we
8253 actually check here is the zero/non-zero status. */
8254
8255 label = gen_label_rtx ();
8256 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
8257 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
8258 label));
8259
8260 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
8261 we used movdqa (i.e. TImode) instead? Perhaps even better would
8262 be if we could determine the real mode of the data, via a hook
8263 into pass_stdarg. Ignore all that for now. */
8264 smode = V4SFmode;
8265 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
8266 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
8267
8268 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
8269 if (max > X86_64_SSE_REGPARM_MAX)
8270 max = X86_64_SSE_REGPARM_MAX;
8271
8272 for (i = cum->sse_regno; i < max; ++i)
8273 {
8274 mem = plus_constant (Pmode, save_area,
8275 i * 16 + ix86_varargs_gpr_size);
8276 mem = gen_rtx_MEM (smode, mem);
8277 MEM_NOTRAP_P (mem) = 1;
8278 set_mem_alias_set (mem, set);
8279 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
8280
8281 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
8282 }
8283
8284 emit_label (label);
8285 }
8286 }
8287
8288 static void
8289 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
8290 {
8291 alias_set_type set = get_varargs_alias_set ();
8292 int i;
8293
8294 /* Reset to zero, as there might be a sysv vaarg used
8295 before. */
8296 ix86_varargs_gpr_size = 0;
8297 ix86_varargs_fpr_size = 0;
8298
8299 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
8300 {
8301 rtx reg, mem;
8302
8303 mem = gen_rtx_MEM (Pmode,
8304 plus_constant (Pmode, virtual_incoming_args_rtx,
8305 i * UNITS_PER_WORD));
8306 MEM_NOTRAP_P (mem) = 1;
8307 set_mem_alias_set (mem, set);
8308
8309 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
8310 emit_move_insn (mem, reg);
8311 }
8312 }
8313
8314 static void
8315 ix86_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
8316 tree type, int *, int no_rtl)
8317 {
8318 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8319 CUMULATIVE_ARGS next_cum;
8320 tree fntype;
8321
8322 /* This argument doesn't appear to be used anymore. Which is good,
8323 because the old code here didn't suppress rtl generation. */
8324 gcc_assert (!no_rtl);
8325
8326 if (!TARGET_64BIT)
8327 return;
8328
8329 fntype = TREE_TYPE (current_function_decl);
8330
8331 /* For varargs, we do not want to skip the dummy va_dcl argument.
8332 For stdargs, we do want to skip the last named argument. */
8333 next_cum = *cum;
8334 if (stdarg_p (fntype))
8335 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
8336 true);
8337
8338 if (cum->call_abi == MS_ABI)
8339 setup_incoming_varargs_ms_64 (&next_cum);
8340 else
8341 setup_incoming_varargs_64 (&next_cum);
8342 }
8343
8344 /* Checks if TYPE is of kind va_list char *. */
8345
8346 static bool
8347 is_va_list_char_pointer (tree type)
8348 {
8349 tree canonic;
8350
8351 /* For 32-bit it is always true. */
8352 if (!TARGET_64BIT)
8353 return true;
8354 canonic = ix86_canonical_va_list_type (type);
8355 return (canonic == ms_va_list_type_node
8356 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
8357 }
8358
8359 /* Implement va_start. */
8360
8361 static void
8362 ix86_va_start (tree valist, rtx nextarg)
8363 {
8364 HOST_WIDE_INT words, n_gpr, n_fpr;
8365 tree f_gpr, f_fpr, f_ovf, f_sav;
8366 tree gpr, fpr, ovf, sav, t;
8367 tree type;
8368 rtx ovf_rtx;
8369
8370 if (flag_split_stack
8371 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8372 {
8373 unsigned int scratch_regno;
8374
8375 /* When we are splitting the stack, we can't refer to the stack
8376 arguments using internal_arg_pointer, because they may be on
8377 the old stack. The split stack prologue will arrange to
8378 leave a pointer to the old stack arguments in a scratch
8379 register, which we here copy to a pseudo-register. The split
8380 stack prologue can't set the pseudo-register directly because
8381 it (the prologue) runs before any registers have been saved. */
8382
8383 scratch_regno = split_stack_prologue_scratch_regno ();
8384 if (scratch_regno != INVALID_REGNUM)
8385 {
8386 rtx reg, seq;
8387
8388 reg = gen_reg_rtx (Pmode);
8389 cfun->machine->split_stack_varargs_pointer = reg;
8390
8391 start_sequence ();
8392 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
8393 seq = get_insns ();
8394 end_sequence ();
8395
8396 push_topmost_sequence ();
8397 emit_insn_after (seq, entry_of_function ());
8398 pop_topmost_sequence ();
8399 }
8400 }
8401
8402 /* Only 64bit target needs something special. */
8403 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8404 {
8405 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8406 std_expand_builtin_va_start (valist, nextarg);
8407 else
8408 {
8409 rtx va_r, next;
8410
8411 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
8412 next = expand_binop (ptr_mode, add_optab,
8413 cfun->machine->split_stack_varargs_pointer,
8414 crtl->args.arg_offset_rtx,
8415 NULL_RTX, 0, OPTAB_LIB_WIDEN);
8416 convert_move (va_r, next, 0);
8417 }
8418 return;
8419 }
8420
8421 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8422 f_fpr = DECL_CHAIN (f_gpr);
8423 f_ovf = DECL_CHAIN (f_fpr);
8424 f_sav = DECL_CHAIN (f_ovf);
8425
8426 valist = build_simple_mem_ref (valist);
8427 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
8428 /* The following should be folded into the MEM_REF offset. */
8429 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
8430 f_gpr, NULL_TREE);
8431 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
8432 f_fpr, NULL_TREE);
8433 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
8434 f_ovf, NULL_TREE);
8435 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
8436 f_sav, NULL_TREE);
8437
8438 /* Count number of gp and fp argument registers used. */
8439 words = crtl->args.info.words;
8440 n_gpr = crtl->args.info.regno;
8441 n_fpr = crtl->args.info.sse_regno;
8442
8443 if (cfun->va_list_gpr_size)
8444 {
8445 type = TREE_TYPE (gpr);
8446 t = build2 (MODIFY_EXPR, type,
8447 gpr, build_int_cst (type, n_gpr * 8));
8448 TREE_SIDE_EFFECTS (t) = 1;
8449 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8450 }
8451
8452 if (TARGET_SSE && cfun->va_list_fpr_size)
8453 {
8454 type = TREE_TYPE (fpr);
8455 t = build2 (MODIFY_EXPR, type, fpr,
8456 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
8457 TREE_SIDE_EFFECTS (t) = 1;
8458 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8459 }
8460
8461 /* Find the overflow area. */
8462 type = TREE_TYPE (ovf);
8463 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8464 ovf_rtx = crtl->args.internal_arg_pointer;
8465 else
8466 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
8467 t = make_tree (type, ovf_rtx);
8468 if (words != 0)
8469 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
8470 t = build2 (MODIFY_EXPR, type, ovf, t);
8471 TREE_SIDE_EFFECTS (t) = 1;
8472 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8473
8474 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
8475 {
8476 /* Find the register save area.
8477 Prologue of the function save it right above stack frame. */
8478 type = TREE_TYPE (sav);
8479 t = make_tree (type, frame_pointer_rtx);
8480 if (!ix86_varargs_gpr_size)
8481 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
8482 t = build2 (MODIFY_EXPR, type, sav, t);
8483 TREE_SIDE_EFFECTS (t) = 1;
8484 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8485 }
8486 }
8487
8488 /* Implement va_arg. */
8489
8490 static tree
8491 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
8492 gimple_seq *post_p)
8493 {
8494 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
8495 tree f_gpr, f_fpr, f_ovf, f_sav;
8496 tree gpr, fpr, ovf, sav, t;
8497 int size, rsize;
8498 tree lab_false, lab_over = NULL_TREE;
8499 tree addr, t2;
8500 rtx container;
8501 int indirect_p = 0;
8502 tree ptrtype;
8503 enum machine_mode nat_mode;
8504 unsigned int arg_boundary;
8505
8506 /* Only 64bit target needs something special. */
8507 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8508 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
8509
8510 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8511 f_fpr = DECL_CHAIN (f_gpr);
8512 f_ovf = DECL_CHAIN (f_fpr);
8513 f_sav = DECL_CHAIN (f_ovf);
8514
8515 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
8516 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
8517 valist = build_va_arg_indirect_ref (valist);
8518 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
8519 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
8520 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
8521
8522 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
8523 if (indirect_p)
8524 type = build_pointer_type (type);
8525 size = int_size_in_bytes (type);
8526 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
8527
8528 nat_mode = type_natural_mode (type, NULL, false);
8529 switch (nat_mode)
8530 {
8531 case V8SFmode:
8532 case V8SImode:
8533 case V32QImode:
8534 case V16HImode:
8535 case V4DFmode:
8536 case V4DImode:
8537 case V16SFmode:
8538 case V16SImode:
8539 case V64QImode:
8540 case V32HImode:
8541 case V8DFmode:
8542 case V8DImode:
8543 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
8544 if (!TARGET_64BIT_MS_ABI)
8545 {
8546 container = NULL;
8547 break;
8548 }
8549
8550 default:
8551 container = construct_container (nat_mode, TYPE_MODE (type),
8552 type, 0, X86_64_REGPARM_MAX,
8553 X86_64_SSE_REGPARM_MAX, intreg,
8554 0);
8555 break;
8556 }
8557
8558 /* Pull the value out of the saved registers. */
8559
8560 addr = create_tmp_var (ptr_type_node, "addr");
8561
8562 if (container)
8563 {
8564 int needed_intregs, needed_sseregs;
8565 bool need_temp;
8566 tree int_addr, sse_addr;
8567
8568 lab_false = create_artificial_label (UNKNOWN_LOCATION);
8569 lab_over = create_artificial_label (UNKNOWN_LOCATION);
8570
8571 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
8572
8573 need_temp = (!REG_P (container)
8574 && ((needed_intregs && TYPE_ALIGN (type) > 64)
8575 || TYPE_ALIGN (type) > 128));
8576
8577 /* In case we are passing structure, verify that it is consecutive block
8578 on the register save area. If not we need to do moves. */
8579 if (!need_temp && !REG_P (container))
8580 {
8581 /* Verify that all registers are strictly consecutive */
8582 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
8583 {
8584 int i;
8585
8586 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8587 {
8588 rtx slot = XVECEXP (container, 0, i);
8589 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
8590 || INTVAL (XEXP (slot, 1)) != i * 16)
8591 need_temp = 1;
8592 }
8593 }
8594 else
8595 {
8596 int i;
8597
8598 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8599 {
8600 rtx slot = XVECEXP (container, 0, i);
8601 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
8602 || INTVAL (XEXP (slot, 1)) != i * 8)
8603 need_temp = 1;
8604 }
8605 }
8606 }
8607 if (!need_temp)
8608 {
8609 int_addr = addr;
8610 sse_addr = addr;
8611 }
8612 else
8613 {
8614 int_addr = create_tmp_var (ptr_type_node, "int_addr");
8615 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
8616 }
8617
8618 /* First ensure that we fit completely in registers. */
8619 if (needed_intregs)
8620 {
8621 t = build_int_cst (TREE_TYPE (gpr),
8622 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
8623 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
8624 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8625 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8626 gimplify_and_add (t, pre_p);
8627 }
8628 if (needed_sseregs)
8629 {
8630 t = build_int_cst (TREE_TYPE (fpr),
8631 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
8632 + X86_64_REGPARM_MAX * 8);
8633 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
8634 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8635 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8636 gimplify_and_add (t, pre_p);
8637 }
8638
8639 /* Compute index to start of area used for integer regs. */
8640 if (needed_intregs)
8641 {
8642 /* int_addr = gpr + sav; */
8643 t = fold_build_pointer_plus (sav, gpr);
8644 gimplify_assign (int_addr, t, pre_p);
8645 }
8646 if (needed_sseregs)
8647 {
8648 /* sse_addr = fpr + sav; */
8649 t = fold_build_pointer_plus (sav, fpr);
8650 gimplify_assign (sse_addr, t, pre_p);
8651 }
8652 if (need_temp)
8653 {
8654 int i, prev_size = 0;
8655 tree temp = create_tmp_var (type, "va_arg_tmp");
8656
8657 /* addr = &temp; */
8658 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
8659 gimplify_assign (addr, t, pre_p);
8660
8661 for (i = 0; i < XVECLEN (container, 0); i++)
8662 {
8663 rtx slot = XVECEXP (container, 0, i);
8664 rtx reg = XEXP (slot, 0);
8665 enum machine_mode mode = GET_MODE (reg);
8666 tree piece_type;
8667 tree addr_type;
8668 tree daddr_type;
8669 tree src_addr, src;
8670 int src_offset;
8671 tree dest_addr, dest;
8672 int cur_size = GET_MODE_SIZE (mode);
8673
8674 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
8675 prev_size = INTVAL (XEXP (slot, 1));
8676 if (prev_size + cur_size > size)
8677 {
8678 cur_size = size - prev_size;
8679 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
8680 if (mode == BLKmode)
8681 mode = QImode;
8682 }
8683 piece_type = lang_hooks.types.type_for_mode (mode, 1);
8684 if (mode == GET_MODE (reg))
8685 addr_type = build_pointer_type (piece_type);
8686 else
8687 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8688 true);
8689 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8690 true);
8691
8692 if (SSE_REGNO_P (REGNO (reg)))
8693 {
8694 src_addr = sse_addr;
8695 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
8696 }
8697 else
8698 {
8699 src_addr = int_addr;
8700 src_offset = REGNO (reg) * 8;
8701 }
8702 src_addr = fold_convert (addr_type, src_addr);
8703 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
8704
8705 dest_addr = fold_convert (daddr_type, addr);
8706 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
8707 if (cur_size == GET_MODE_SIZE (mode))
8708 {
8709 src = build_va_arg_indirect_ref (src_addr);
8710 dest = build_va_arg_indirect_ref (dest_addr);
8711
8712 gimplify_assign (dest, src, pre_p);
8713 }
8714 else
8715 {
8716 tree copy
8717 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
8718 3, dest_addr, src_addr,
8719 size_int (cur_size));
8720 gimplify_and_add (copy, pre_p);
8721 }
8722 prev_size += cur_size;
8723 }
8724 }
8725
8726 if (needed_intregs)
8727 {
8728 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
8729 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
8730 gimplify_assign (gpr, t, pre_p);
8731 }
8732
8733 if (needed_sseregs)
8734 {
8735 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
8736 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
8737 gimplify_assign (fpr, t, pre_p);
8738 }
8739
8740 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
8741
8742 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
8743 }
8744
8745 /* ... otherwise out of the overflow area. */
8746
8747 /* When we align parameter on stack for caller, if the parameter
8748 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
8749 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
8750 here with caller. */
8751 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
8752 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
8753 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
8754
8755 /* Care for on-stack alignment if needed. */
8756 if (arg_boundary <= 64 || size == 0)
8757 t = ovf;
8758 else
8759 {
8760 HOST_WIDE_INT align = arg_boundary / 8;
8761 t = fold_build_pointer_plus_hwi (ovf, align - 1);
8762 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
8763 build_int_cst (TREE_TYPE (t), -align));
8764 }
8765
8766 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
8767 gimplify_assign (addr, t, pre_p);
8768
8769 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
8770 gimplify_assign (unshare_expr (ovf), t, pre_p);
8771
8772 if (container)
8773 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
8774
8775 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
8776 addr = fold_convert (ptrtype, addr);
8777
8778 if (indirect_p)
8779 addr = build_va_arg_indirect_ref (addr);
8780 return build_va_arg_indirect_ref (addr);
8781 }
8782 \f
8783 /* Return true if OPNUM's MEM should be matched
8784 in movabs* patterns. */
8785
8786 bool
8787 ix86_check_movabs (rtx insn, int opnum)
8788 {
8789 rtx set, mem;
8790
8791 set = PATTERN (insn);
8792 if (GET_CODE (set) == PARALLEL)
8793 set = XVECEXP (set, 0, 0);
8794 gcc_assert (GET_CODE (set) == SET);
8795 mem = XEXP (set, opnum);
8796 while (GET_CODE (mem) == SUBREG)
8797 mem = SUBREG_REG (mem);
8798 gcc_assert (MEM_P (mem));
8799 return volatile_ok || !MEM_VOLATILE_P (mem);
8800 }
8801 \f
8802 /* Initialize the table of extra 80387 mathematical constants. */
8803
8804 static void
8805 init_ext_80387_constants (void)
8806 {
8807 static const char * cst[5] =
8808 {
8809 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8810 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8811 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8812 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8813 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8814 };
8815 int i;
8816
8817 for (i = 0; i < 5; i++)
8818 {
8819 real_from_string (&ext_80387_constants_table[i], cst[i]);
8820 /* Ensure each constant is rounded to XFmode precision. */
8821 real_convert (&ext_80387_constants_table[i],
8822 XFmode, &ext_80387_constants_table[i]);
8823 }
8824
8825 ext_80387_constants_init = 1;
8826 }
8827
8828 /* Return non-zero if the constant is something that
8829 can be loaded with a special instruction. */
8830
8831 int
8832 standard_80387_constant_p (rtx x)
8833 {
8834 enum machine_mode mode = GET_MODE (x);
8835
8836 REAL_VALUE_TYPE r;
8837
8838 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
8839 return -1;
8840
8841 if (x == CONST0_RTX (mode))
8842 return 1;
8843 if (x == CONST1_RTX (mode))
8844 return 2;
8845
8846 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8847
8848 /* For XFmode constants, try to find a special 80387 instruction when
8849 optimizing for size or on those CPUs that benefit from them. */
8850 if (mode == XFmode
8851 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
8852 {
8853 int i;
8854
8855 if (! ext_80387_constants_init)
8856 init_ext_80387_constants ();
8857
8858 for (i = 0; i < 5; i++)
8859 if (real_identical (&r, &ext_80387_constants_table[i]))
8860 return i + 3;
8861 }
8862
8863 /* Load of the constant -0.0 or -1.0 will be split as
8864 fldz;fchs or fld1;fchs sequence. */
8865 if (real_isnegzero (&r))
8866 return 8;
8867 if (real_identical (&r, &dconstm1))
8868 return 9;
8869
8870 return 0;
8871 }
8872
8873 /* Return the opcode of the special instruction to be used to load
8874 the constant X. */
8875
8876 const char *
8877 standard_80387_constant_opcode (rtx x)
8878 {
8879 switch (standard_80387_constant_p (x))
8880 {
8881 case 1:
8882 return "fldz";
8883 case 2:
8884 return "fld1";
8885 case 3:
8886 return "fldlg2";
8887 case 4:
8888 return "fldln2";
8889 case 5:
8890 return "fldl2e";
8891 case 6:
8892 return "fldl2t";
8893 case 7:
8894 return "fldpi";
8895 case 8:
8896 case 9:
8897 return "#";
8898 default:
8899 gcc_unreachable ();
8900 }
8901 }
8902
8903 /* Return the CONST_DOUBLE representing the 80387 constant that is
8904 loaded by the specified special instruction. The argument IDX
8905 matches the return value from standard_80387_constant_p. */
8906
8907 rtx
8908 standard_80387_constant_rtx (int idx)
8909 {
8910 int i;
8911
8912 if (! ext_80387_constants_init)
8913 init_ext_80387_constants ();
8914
8915 switch (idx)
8916 {
8917 case 3:
8918 case 4:
8919 case 5:
8920 case 6:
8921 case 7:
8922 i = idx - 3;
8923 break;
8924
8925 default:
8926 gcc_unreachable ();
8927 }
8928
8929 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
8930 XFmode);
8931 }
8932
8933 /* Return 1 if X is all 0s and 2 if x is all 1s
8934 in supported SSE/AVX vector mode. */
8935
8936 int
8937 standard_sse_constant_p (rtx x)
8938 {
8939 enum machine_mode mode = GET_MODE (x);
8940
8941 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
8942 return 1;
8943 if (vector_all_ones_operand (x, mode))
8944 switch (mode)
8945 {
8946 case V16QImode:
8947 case V8HImode:
8948 case V4SImode:
8949 case V2DImode:
8950 if (TARGET_SSE2)
8951 return 2;
8952 case V32QImode:
8953 case V16HImode:
8954 case V8SImode:
8955 case V4DImode:
8956 if (TARGET_AVX2)
8957 return 2;
8958 case V64QImode:
8959 case V32HImode:
8960 case V16SImode:
8961 case V8DImode:
8962 if (TARGET_AVX512F)
8963 return 2;
8964 default:
8965 break;
8966 }
8967
8968 return 0;
8969 }
8970
8971 /* Return the opcode of the special instruction to be used to load
8972 the constant X. */
8973
8974 const char *
8975 standard_sse_constant_opcode (rtx insn, rtx x)
8976 {
8977 switch (standard_sse_constant_p (x))
8978 {
8979 case 1:
8980 switch (get_attr_mode (insn))
8981 {
8982 case MODE_XI:
8983 case MODE_V16SF:
8984 return "vpxord\t%g0, %g0, %g0";
8985 case MODE_V8DF:
8986 return "vpxorq\t%g0, %g0, %g0";
8987 case MODE_TI:
8988 return "%vpxor\t%0, %d0";
8989 case MODE_V2DF:
8990 return "%vxorpd\t%0, %d0";
8991 case MODE_V4SF:
8992 return "%vxorps\t%0, %d0";
8993
8994 case MODE_OI:
8995 return "vpxor\t%x0, %x0, %x0";
8996 case MODE_V4DF:
8997 return "vxorpd\t%x0, %x0, %x0";
8998 case MODE_V8SF:
8999 return "vxorps\t%x0, %x0, %x0";
9000
9001 default:
9002 break;
9003 }
9004
9005 case 2:
9006 if (get_attr_mode (insn) == MODE_XI
9007 || get_attr_mode (insn) == MODE_V8DF
9008 || get_attr_mode (insn) == MODE_V16SF)
9009 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
9010 if (TARGET_AVX)
9011 return "vpcmpeqd\t%0, %0, %0";
9012 else
9013 return "pcmpeqd\t%0, %0";
9014
9015 default:
9016 break;
9017 }
9018 gcc_unreachable ();
9019 }
9020
9021 /* Returns true if OP contains a symbol reference */
9022
9023 bool
9024 symbolic_reference_mentioned_p (rtx op)
9025 {
9026 const char *fmt;
9027 int i;
9028
9029 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
9030 return true;
9031
9032 fmt = GET_RTX_FORMAT (GET_CODE (op));
9033 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
9034 {
9035 if (fmt[i] == 'E')
9036 {
9037 int j;
9038
9039 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
9040 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
9041 return true;
9042 }
9043
9044 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
9045 return true;
9046 }
9047
9048 return false;
9049 }
9050
9051 /* Return true if it is appropriate to emit `ret' instructions in the
9052 body of a function. Do this only if the epilogue is simple, needing a
9053 couple of insns. Prior to reloading, we can't tell how many registers
9054 must be saved, so return false then. Return false if there is no frame
9055 marker to de-allocate. */
9056
9057 bool
9058 ix86_can_use_return_insn_p (void)
9059 {
9060 struct ix86_frame frame;
9061
9062 if (! reload_completed || frame_pointer_needed)
9063 return 0;
9064
9065 /* Don't allow more than 32k pop, since that's all we can do
9066 with one instruction. */
9067 if (crtl->args.pops_args && crtl->args.size >= 32768)
9068 return 0;
9069
9070 ix86_compute_frame_layout (&frame);
9071 return (frame.stack_pointer_offset == UNITS_PER_WORD
9072 && (frame.nregs + frame.nsseregs) == 0);
9073 }
9074 \f
9075 /* Value should be nonzero if functions must have frame pointers.
9076 Zero means the frame pointer need not be set up (and parms may
9077 be accessed via the stack pointer) in functions that seem suitable. */
9078
9079 static bool
9080 ix86_frame_pointer_required (void)
9081 {
9082 /* If we accessed previous frames, then the generated code expects
9083 to be able to access the saved ebp value in our frame. */
9084 if (cfun->machine->accesses_prev_frame)
9085 return true;
9086
9087 /* Several x86 os'es need a frame pointer for other reasons,
9088 usually pertaining to setjmp. */
9089 if (SUBTARGET_FRAME_POINTER_REQUIRED)
9090 return true;
9091
9092 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
9093 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
9094 return true;
9095
9096 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
9097 allocation is 4GB. */
9098 if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
9099 return true;
9100
9101 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
9102 turns off the frame pointer by default. Turn it back on now if
9103 we've not got a leaf function. */
9104 if (TARGET_OMIT_LEAF_FRAME_POINTER
9105 && (!crtl->is_leaf
9106 || ix86_current_function_calls_tls_descriptor))
9107 return true;
9108
9109 if (crtl->profile && !flag_fentry)
9110 return true;
9111
9112 return false;
9113 }
9114
9115 /* Record that the current function accesses previous call frames. */
9116
9117 void
9118 ix86_setup_frame_addresses (void)
9119 {
9120 cfun->machine->accesses_prev_frame = 1;
9121 }
9122 \f
9123 #ifndef USE_HIDDEN_LINKONCE
9124 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
9125 # define USE_HIDDEN_LINKONCE 1
9126 # else
9127 # define USE_HIDDEN_LINKONCE 0
9128 # endif
9129 #endif
9130
9131 static int pic_labels_used;
9132
9133 /* Fills in the label name that should be used for a pc thunk for
9134 the given register. */
9135
9136 static void
9137 get_pc_thunk_name (char name[32], unsigned int regno)
9138 {
9139 gcc_assert (!TARGET_64BIT);
9140
9141 if (USE_HIDDEN_LINKONCE)
9142 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
9143 else
9144 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
9145 }
9146
9147
9148 /* This function generates code for -fpic that loads %ebx with
9149 the return address of the caller and then returns. */
9150
9151 static void
9152 ix86_code_end (void)
9153 {
9154 rtx xops[2];
9155 int regno;
9156
9157 for (regno = AX_REG; regno <= SP_REG; regno++)
9158 {
9159 char name[32];
9160 tree decl;
9161
9162 if (!(pic_labels_used & (1 << regno)))
9163 continue;
9164
9165 get_pc_thunk_name (name, regno);
9166
9167 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
9168 get_identifier (name),
9169 build_function_type_list (void_type_node, NULL_TREE));
9170 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
9171 NULL_TREE, void_type_node);
9172 TREE_PUBLIC (decl) = 1;
9173 TREE_STATIC (decl) = 1;
9174 DECL_IGNORED_P (decl) = 1;
9175
9176 #if TARGET_MACHO
9177 if (TARGET_MACHO)
9178 {
9179 switch_to_section (darwin_sections[text_coal_section]);
9180 fputs ("\t.weak_definition\t", asm_out_file);
9181 assemble_name (asm_out_file, name);
9182 fputs ("\n\t.private_extern\t", asm_out_file);
9183 assemble_name (asm_out_file, name);
9184 putc ('\n', asm_out_file);
9185 ASM_OUTPUT_LABEL (asm_out_file, name);
9186 DECL_WEAK (decl) = 1;
9187 }
9188 else
9189 #endif
9190 if (USE_HIDDEN_LINKONCE)
9191 {
9192 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
9193
9194 targetm.asm_out.unique_section (decl, 0);
9195 switch_to_section (get_named_section (decl, NULL, 0));
9196
9197 targetm.asm_out.globalize_label (asm_out_file, name);
9198 fputs ("\t.hidden\t", asm_out_file);
9199 assemble_name (asm_out_file, name);
9200 putc ('\n', asm_out_file);
9201 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
9202 }
9203 else
9204 {
9205 switch_to_section (text_section);
9206 ASM_OUTPUT_LABEL (asm_out_file, name);
9207 }
9208
9209 DECL_INITIAL (decl) = make_node (BLOCK);
9210 current_function_decl = decl;
9211 init_function_start (decl);
9212 first_function_block_is_cold = false;
9213 /* Make sure unwind info is emitted for the thunk if needed. */
9214 final_start_function (emit_barrier (), asm_out_file, 1);
9215
9216 /* Pad stack IP move with 4 instructions (two NOPs count
9217 as one instruction). */
9218 if (TARGET_PAD_SHORT_FUNCTION)
9219 {
9220 int i = 8;
9221
9222 while (i--)
9223 fputs ("\tnop\n", asm_out_file);
9224 }
9225
9226 xops[0] = gen_rtx_REG (Pmode, regno);
9227 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
9228 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
9229 fputs ("\tret\n", asm_out_file);
9230 final_end_function ();
9231 init_insn_lengths ();
9232 free_after_compilation (cfun);
9233 set_cfun (NULL);
9234 current_function_decl = NULL;
9235 }
9236
9237 if (flag_split_stack)
9238 file_end_indicate_split_stack ();
9239 }
9240
9241 /* Emit code for the SET_GOT patterns. */
9242
9243 const char *
9244 output_set_got (rtx dest, rtx label)
9245 {
9246 rtx xops[3];
9247
9248 xops[0] = dest;
9249
9250 if (TARGET_VXWORKS_RTP && flag_pic)
9251 {
9252 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
9253 xops[2] = gen_rtx_MEM (Pmode,
9254 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
9255 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
9256
9257 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
9258 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
9259 an unadorned address. */
9260 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
9261 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
9262 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
9263 return "";
9264 }
9265
9266 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
9267
9268 if (!flag_pic)
9269 {
9270 if (TARGET_MACHO)
9271 /* We don't need a pic base, we're not producing pic. */
9272 gcc_unreachable ();
9273
9274 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
9275 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
9276 targetm.asm_out.internal_label (asm_out_file, "L",
9277 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
9278 }
9279 else
9280 {
9281 char name[32];
9282 get_pc_thunk_name (name, REGNO (dest));
9283 pic_labels_used |= 1 << REGNO (dest);
9284
9285 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
9286 xops[2] = gen_rtx_MEM (QImode, xops[2]);
9287 output_asm_insn ("call\t%X2", xops);
9288
9289 #if TARGET_MACHO
9290 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
9291 This is what will be referenced by the Mach-O PIC subsystem. */
9292 if (machopic_should_output_picbase_label () || !label)
9293 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
9294
9295 /* When we are restoring the pic base at the site of a nonlocal label,
9296 and we decided to emit the pic base above, we will still output a
9297 local label used for calculating the correction offset (even though
9298 the offset will be 0 in that case). */
9299 if (label)
9300 targetm.asm_out.internal_label (asm_out_file, "L",
9301 CODE_LABEL_NUMBER (label));
9302 #endif
9303 }
9304
9305 if (!TARGET_MACHO)
9306 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
9307
9308 return "";
9309 }
9310
9311 /* Generate an "push" pattern for input ARG. */
9312
9313 static rtx
9314 gen_push (rtx arg)
9315 {
9316 struct machine_function *m = cfun->machine;
9317
9318 if (m->fs.cfa_reg == stack_pointer_rtx)
9319 m->fs.cfa_offset += UNITS_PER_WORD;
9320 m->fs.sp_offset += UNITS_PER_WORD;
9321
9322 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9323 arg = gen_rtx_REG (word_mode, REGNO (arg));
9324
9325 return gen_rtx_SET (VOIDmode,
9326 gen_rtx_MEM (word_mode,
9327 gen_rtx_PRE_DEC (Pmode,
9328 stack_pointer_rtx)),
9329 arg);
9330 }
9331
9332 /* Generate an "pop" pattern for input ARG. */
9333
9334 static rtx
9335 gen_pop (rtx arg)
9336 {
9337 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9338 arg = gen_rtx_REG (word_mode, REGNO (arg));
9339
9340 return gen_rtx_SET (VOIDmode,
9341 arg,
9342 gen_rtx_MEM (word_mode,
9343 gen_rtx_POST_INC (Pmode,
9344 stack_pointer_rtx)));
9345 }
9346
9347 /* Return >= 0 if there is an unused call-clobbered register available
9348 for the entire function. */
9349
9350 static unsigned int
9351 ix86_select_alt_pic_regnum (void)
9352 {
9353 if (crtl->is_leaf
9354 && !crtl->profile
9355 && !ix86_current_function_calls_tls_descriptor)
9356 {
9357 int i, drap;
9358 /* Can't use the same register for both PIC and DRAP. */
9359 if (crtl->drap_reg)
9360 drap = REGNO (crtl->drap_reg);
9361 else
9362 drap = -1;
9363 for (i = 2; i >= 0; --i)
9364 if (i != drap && !df_regs_ever_live_p (i))
9365 return i;
9366 }
9367
9368 return INVALID_REGNUM;
9369 }
9370
9371 /* Return TRUE if we need to save REGNO. */
9372
9373 static bool
9374 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
9375 {
9376 if (pic_offset_table_rtx
9377 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
9378 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
9379 || crtl->profile
9380 || crtl->calls_eh_return
9381 || crtl->uses_const_pool
9382 || cfun->has_nonlocal_label))
9383 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
9384
9385 if (crtl->calls_eh_return && maybe_eh_return)
9386 {
9387 unsigned i;
9388 for (i = 0; ; i++)
9389 {
9390 unsigned test = EH_RETURN_DATA_REGNO (i);
9391 if (test == INVALID_REGNUM)
9392 break;
9393 if (test == regno)
9394 return true;
9395 }
9396 }
9397
9398 if (crtl->drap_reg
9399 && regno == REGNO (crtl->drap_reg)
9400 && !cfun->machine->no_drap_save_restore)
9401 return true;
9402
9403 return (df_regs_ever_live_p (regno)
9404 && !call_used_regs[regno]
9405 && !fixed_regs[regno]
9406 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
9407 }
9408
9409 /* Return number of saved general prupose registers. */
9410
9411 static int
9412 ix86_nsaved_regs (void)
9413 {
9414 int nregs = 0;
9415 int regno;
9416
9417 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9418 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9419 nregs ++;
9420 return nregs;
9421 }
9422
9423 /* Return number of saved SSE registrers. */
9424
9425 static int
9426 ix86_nsaved_sseregs (void)
9427 {
9428 int nregs = 0;
9429 int regno;
9430
9431 if (!TARGET_64BIT_MS_ABI)
9432 return 0;
9433 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9434 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9435 nregs ++;
9436 return nregs;
9437 }
9438
9439 /* Given FROM and TO register numbers, say whether this elimination is
9440 allowed. If stack alignment is needed, we can only replace argument
9441 pointer with hard frame pointer, or replace frame pointer with stack
9442 pointer. Otherwise, frame pointer elimination is automatically
9443 handled and all other eliminations are valid. */
9444
9445 static bool
9446 ix86_can_eliminate (const int from, const int to)
9447 {
9448 if (stack_realign_fp)
9449 return ((from == ARG_POINTER_REGNUM
9450 && to == HARD_FRAME_POINTER_REGNUM)
9451 || (from == FRAME_POINTER_REGNUM
9452 && to == STACK_POINTER_REGNUM));
9453 else
9454 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
9455 }
9456
9457 /* Return the offset between two registers, one to be eliminated, and the other
9458 its replacement, at the start of a routine. */
9459
9460 HOST_WIDE_INT
9461 ix86_initial_elimination_offset (int from, int to)
9462 {
9463 struct ix86_frame frame;
9464 ix86_compute_frame_layout (&frame);
9465
9466 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9467 return frame.hard_frame_pointer_offset;
9468 else if (from == FRAME_POINTER_REGNUM
9469 && to == HARD_FRAME_POINTER_REGNUM)
9470 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
9471 else
9472 {
9473 gcc_assert (to == STACK_POINTER_REGNUM);
9474
9475 if (from == ARG_POINTER_REGNUM)
9476 return frame.stack_pointer_offset;
9477
9478 gcc_assert (from == FRAME_POINTER_REGNUM);
9479 return frame.stack_pointer_offset - frame.frame_pointer_offset;
9480 }
9481 }
9482
9483 /* In a dynamically-aligned function, we can't know the offset from
9484 stack pointer to frame pointer, so we must ensure that setjmp
9485 eliminates fp against the hard fp (%ebp) rather than trying to
9486 index from %esp up to the top of the frame across a gap that is
9487 of unknown (at compile-time) size. */
9488 static rtx
9489 ix86_builtin_setjmp_frame_value (void)
9490 {
9491 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
9492 }
9493
9494 /* When using -fsplit-stack, the allocation routines set a field in
9495 the TCB to the bottom of the stack plus this much space, measured
9496 in bytes. */
9497
9498 #define SPLIT_STACK_AVAILABLE 256
9499
9500 /* Fill structure ix86_frame about frame of currently computed function. */
9501
9502 static void
9503 ix86_compute_frame_layout (struct ix86_frame *frame)
9504 {
9505 unsigned HOST_WIDE_INT stack_alignment_needed;
9506 HOST_WIDE_INT offset;
9507 unsigned HOST_WIDE_INT preferred_alignment;
9508 HOST_WIDE_INT size = get_frame_size ();
9509 HOST_WIDE_INT to_allocate;
9510
9511 frame->nregs = ix86_nsaved_regs ();
9512 frame->nsseregs = ix86_nsaved_sseregs ();
9513
9514 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
9515 function prologues and leaf. */
9516 if ((TARGET_64BIT_MS_ABI && crtl->preferred_stack_boundary < 128)
9517 && (!crtl->is_leaf || cfun->calls_alloca != 0
9518 || ix86_current_function_calls_tls_descriptor))
9519 {
9520 crtl->preferred_stack_boundary = 128;
9521 crtl->stack_alignment_needed = 128;
9522 }
9523 /* preferred_stack_boundary is never updated for call
9524 expanded from tls descriptor. Update it here. We don't update it in
9525 expand stage because according to the comments before
9526 ix86_current_function_calls_tls_descriptor, tls calls may be optimized
9527 away. */
9528 else if (ix86_current_function_calls_tls_descriptor
9529 && crtl->preferred_stack_boundary < PREFERRED_STACK_BOUNDARY)
9530 {
9531 crtl->preferred_stack_boundary = PREFERRED_STACK_BOUNDARY;
9532 if (crtl->stack_alignment_needed < PREFERRED_STACK_BOUNDARY)
9533 crtl->stack_alignment_needed = PREFERRED_STACK_BOUNDARY;
9534 }
9535
9536 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
9537 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
9538
9539 gcc_assert (!size || stack_alignment_needed);
9540 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
9541 gcc_assert (preferred_alignment <= stack_alignment_needed);
9542
9543 /* For SEH we have to limit the amount of code movement into the prologue.
9544 At present we do this via a BLOCKAGE, at which point there's very little
9545 scheduling that can be done, which means that there's very little point
9546 in doing anything except PUSHs. */
9547 if (TARGET_SEH)
9548 cfun->machine->use_fast_prologue_epilogue = false;
9549
9550 /* During reload iteration the amount of registers saved can change.
9551 Recompute the value as needed. Do not recompute when amount of registers
9552 didn't change as reload does multiple calls to the function and does not
9553 expect the decision to change within single iteration. */
9554 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun))
9555 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
9556 {
9557 int count = frame->nregs;
9558 struct cgraph_node *node = cgraph_node::get (current_function_decl);
9559
9560 cfun->machine->use_fast_prologue_epilogue_nregs = count;
9561
9562 /* The fast prologue uses move instead of push to save registers. This
9563 is significantly longer, but also executes faster as modern hardware
9564 can execute the moves in parallel, but can't do that for push/pop.
9565
9566 Be careful about choosing what prologue to emit: When function takes
9567 many instructions to execute we may use slow version as well as in
9568 case function is known to be outside hot spot (this is known with
9569 feedback only). Weight the size of function by number of registers
9570 to save as it is cheap to use one or two push instructions but very
9571 slow to use many of them. */
9572 if (count)
9573 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
9574 if (node->frequency < NODE_FREQUENCY_NORMAL
9575 || (flag_branch_probabilities
9576 && node->frequency < NODE_FREQUENCY_HOT))
9577 cfun->machine->use_fast_prologue_epilogue = false;
9578 else
9579 cfun->machine->use_fast_prologue_epilogue
9580 = !expensive_function_p (count);
9581 }
9582
9583 frame->save_regs_using_mov
9584 = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
9585 /* If static stack checking is enabled and done with probes,
9586 the registers need to be saved before allocating the frame. */
9587 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
9588
9589 /* Skip return address. */
9590 offset = UNITS_PER_WORD;
9591
9592 /* Skip pushed static chain. */
9593 if (ix86_static_chain_on_stack)
9594 offset += UNITS_PER_WORD;
9595
9596 /* Skip saved base pointer. */
9597 if (frame_pointer_needed)
9598 offset += UNITS_PER_WORD;
9599 frame->hfp_save_offset = offset;
9600
9601 /* The traditional frame pointer location is at the top of the frame. */
9602 frame->hard_frame_pointer_offset = offset;
9603
9604 /* Register save area */
9605 offset += frame->nregs * UNITS_PER_WORD;
9606 frame->reg_save_offset = offset;
9607
9608 /* On SEH target, registers are pushed just before the frame pointer
9609 location. */
9610 if (TARGET_SEH)
9611 frame->hard_frame_pointer_offset = offset;
9612
9613 /* Align and set SSE register save area. */
9614 if (frame->nsseregs)
9615 {
9616 /* The only ABI that has saved SSE registers (Win64) also has a
9617 16-byte aligned default stack, and thus we don't need to be
9618 within the re-aligned local stack frame to save them. */
9619 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
9620 offset = (offset + 16 - 1) & -16;
9621 offset += frame->nsseregs * 16;
9622 }
9623 frame->sse_reg_save_offset = offset;
9624
9625 /* The re-aligned stack starts here. Values before this point are not
9626 directly comparable with values below this point. In order to make
9627 sure that no value happens to be the same before and after, force
9628 the alignment computation below to add a non-zero value. */
9629 if (stack_realign_fp)
9630 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
9631
9632 /* Va-arg area */
9633 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
9634 offset += frame->va_arg_size;
9635
9636 /* Align start of frame for local function. */
9637 if (stack_realign_fp
9638 || offset != frame->sse_reg_save_offset
9639 || size != 0
9640 || !crtl->is_leaf
9641 || cfun->calls_alloca
9642 || ix86_current_function_calls_tls_descriptor)
9643 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
9644
9645 /* Frame pointer points here. */
9646 frame->frame_pointer_offset = offset;
9647
9648 offset += size;
9649
9650 /* Add outgoing arguments area. Can be skipped if we eliminated
9651 all the function calls as dead code.
9652 Skipping is however impossible when function calls alloca. Alloca
9653 expander assumes that last crtl->outgoing_args_size
9654 of stack frame are unused. */
9655 if (ACCUMULATE_OUTGOING_ARGS
9656 && (!crtl->is_leaf || cfun->calls_alloca
9657 || ix86_current_function_calls_tls_descriptor))
9658 {
9659 offset += crtl->outgoing_args_size;
9660 frame->outgoing_arguments_size = crtl->outgoing_args_size;
9661 }
9662 else
9663 frame->outgoing_arguments_size = 0;
9664
9665 /* Align stack boundary. Only needed if we're calling another function
9666 or using alloca. */
9667 if (!crtl->is_leaf || cfun->calls_alloca
9668 || ix86_current_function_calls_tls_descriptor)
9669 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
9670
9671 /* We've reached end of stack frame. */
9672 frame->stack_pointer_offset = offset;
9673
9674 /* Size prologue needs to allocate. */
9675 to_allocate = offset - frame->sse_reg_save_offset;
9676
9677 if ((!to_allocate && frame->nregs <= 1)
9678 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
9679 frame->save_regs_using_mov = false;
9680
9681 if (ix86_using_red_zone ()
9682 && crtl->sp_is_unchanging
9683 && crtl->is_leaf
9684 && !ix86_current_function_calls_tls_descriptor)
9685 {
9686 frame->red_zone_size = to_allocate;
9687 if (frame->save_regs_using_mov)
9688 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
9689 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
9690 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
9691 }
9692 else
9693 frame->red_zone_size = 0;
9694 frame->stack_pointer_offset -= frame->red_zone_size;
9695
9696 /* The SEH frame pointer location is near the bottom of the frame.
9697 This is enforced by the fact that the difference between the
9698 stack pointer and the frame pointer is limited to 240 bytes in
9699 the unwind data structure. */
9700 if (TARGET_SEH)
9701 {
9702 HOST_WIDE_INT diff;
9703
9704 /* If we can leave the frame pointer where it is, do so. Also, returns
9705 the establisher frame for __builtin_frame_address (0). */
9706 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
9707 if (diff <= SEH_MAX_FRAME_SIZE
9708 && (diff > 240 || (diff & 15) != 0)
9709 && !crtl->accesses_prior_frames)
9710 {
9711 /* Ideally we'd determine what portion of the local stack frame
9712 (within the constraint of the lowest 240) is most heavily used.
9713 But without that complication, simply bias the frame pointer
9714 by 128 bytes so as to maximize the amount of the local stack
9715 frame that is addressable with 8-bit offsets. */
9716 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
9717 }
9718 }
9719 }
9720
9721 /* This is semi-inlined memory_address_length, but simplified
9722 since we know that we're always dealing with reg+offset, and
9723 to avoid having to create and discard all that rtl. */
9724
9725 static inline int
9726 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
9727 {
9728 int len = 4;
9729
9730 if (offset == 0)
9731 {
9732 /* EBP and R13 cannot be encoded without an offset. */
9733 len = (regno == BP_REG || regno == R13_REG);
9734 }
9735 else if (IN_RANGE (offset, -128, 127))
9736 len = 1;
9737
9738 /* ESP and R12 must be encoded with a SIB byte. */
9739 if (regno == SP_REG || regno == R12_REG)
9740 len++;
9741
9742 return len;
9743 }
9744
9745 /* Return an RTX that points to CFA_OFFSET within the stack frame.
9746 The valid base registers are taken from CFUN->MACHINE->FS. */
9747
9748 static rtx
9749 choose_baseaddr (HOST_WIDE_INT cfa_offset)
9750 {
9751 const struct machine_function *m = cfun->machine;
9752 rtx base_reg = NULL;
9753 HOST_WIDE_INT base_offset = 0;
9754
9755 if (m->use_fast_prologue_epilogue)
9756 {
9757 /* Choose the base register most likely to allow the most scheduling
9758 opportunities. Generally FP is valid throughout the function,
9759 while DRAP must be reloaded within the epilogue. But choose either
9760 over the SP due to increased encoding size. */
9761
9762 if (m->fs.fp_valid)
9763 {
9764 base_reg = hard_frame_pointer_rtx;
9765 base_offset = m->fs.fp_offset - cfa_offset;
9766 }
9767 else if (m->fs.drap_valid)
9768 {
9769 base_reg = crtl->drap_reg;
9770 base_offset = 0 - cfa_offset;
9771 }
9772 else if (m->fs.sp_valid)
9773 {
9774 base_reg = stack_pointer_rtx;
9775 base_offset = m->fs.sp_offset - cfa_offset;
9776 }
9777 }
9778 else
9779 {
9780 HOST_WIDE_INT toffset;
9781 int len = 16, tlen;
9782
9783 /* Choose the base register with the smallest address encoding.
9784 With a tie, choose FP > DRAP > SP. */
9785 if (m->fs.sp_valid)
9786 {
9787 base_reg = stack_pointer_rtx;
9788 base_offset = m->fs.sp_offset - cfa_offset;
9789 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
9790 }
9791 if (m->fs.drap_valid)
9792 {
9793 toffset = 0 - cfa_offset;
9794 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
9795 if (tlen <= len)
9796 {
9797 base_reg = crtl->drap_reg;
9798 base_offset = toffset;
9799 len = tlen;
9800 }
9801 }
9802 if (m->fs.fp_valid)
9803 {
9804 toffset = m->fs.fp_offset - cfa_offset;
9805 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
9806 if (tlen <= len)
9807 {
9808 base_reg = hard_frame_pointer_rtx;
9809 base_offset = toffset;
9810 len = tlen;
9811 }
9812 }
9813 }
9814 gcc_assert (base_reg != NULL);
9815
9816 return plus_constant (Pmode, base_reg, base_offset);
9817 }
9818
9819 /* Emit code to save registers in the prologue. */
9820
9821 static void
9822 ix86_emit_save_regs (void)
9823 {
9824 unsigned int regno;
9825 rtx insn;
9826
9827 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
9828 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9829 {
9830 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
9831 RTX_FRAME_RELATED_P (insn) = 1;
9832 }
9833 }
9834
9835 /* Emit a single register save at CFA - CFA_OFFSET. */
9836
9837 static void
9838 ix86_emit_save_reg_using_mov (enum machine_mode mode, unsigned int regno,
9839 HOST_WIDE_INT cfa_offset)
9840 {
9841 struct machine_function *m = cfun->machine;
9842 rtx reg = gen_rtx_REG (mode, regno);
9843 rtx mem, addr, base, insn;
9844
9845 addr = choose_baseaddr (cfa_offset);
9846 mem = gen_frame_mem (mode, addr);
9847
9848 /* For SSE saves, we need to indicate the 128-bit alignment. */
9849 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
9850
9851 insn = emit_move_insn (mem, reg);
9852 RTX_FRAME_RELATED_P (insn) = 1;
9853
9854 base = addr;
9855 if (GET_CODE (base) == PLUS)
9856 base = XEXP (base, 0);
9857 gcc_checking_assert (REG_P (base));
9858
9859 /* When saving registers into a re-aligned local stack frame, avoid
9860 any tricky guessing by dwarf2out. */
9861 if (m->fs.realigned)
9862 {
9863 gcc_checking_assert (stack_realign_drap);
9864
9865 if (regno == REGNO (crtl->drap_reg))
9866 {
9867 /* A bit of a hack. We force the DRAP register to be saved in
9868 the re-aligned stack frame, which provides us with a copy
9869 of the CFA that will last past the prologue. Install it. */
9870 gcc_checking_assert (cfun->machine->fs.fp_valid);
9871 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
9872 cfun->machine->fs.fp_offset - cfa_offset);
9873 mem = gen_rtx_MEM (mode, addr);
9874 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
9875 }
9876 else
9877 {
9878 /* The frame pointer is a stable reference within the
9879 aligned frame. Use it. */
9880 gcc_checking_assert (cfun->machine->fs.fp_valid);
9881 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
9882 cfun->machine->fs.fp_offset - cfa_offset);
9883 mem = gen_rtx_MEM (mode, addr);
9884 add_reg_note (insn, REG_CFA_EXPRESSION,
9885 gen_rtx_SET (VOIDmode, mem, reg));
9886 }
9887 }
9888
9889 /* The memory may not be relative to the current CFA register,
9890 which means that we may need to generate a new pattern for
9891 use by the unwind info. */
9892 else if (base != m->fs.cfa_reg)
9893 {
9894 addr = plus_constant (Pmode, m->fs.cfa_reg,
9895 m->fs.cfa_offset - cfa_offset);
9896 mem = gen_rtx_MEM (mode, addr);
9897 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
9898 }
9899 }
9900
9901 /* Emit code to save registers using MOV insns.
9902 First register is stored at CFA - CFA_OFFSET. */
9903 static void
9904 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
9905 {
9906 unsigned int regno;
9907
9908 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9909 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9910 {
9911 ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
9912 cfa_offset -= UNITS_PER_WORD;
9913 }
9914 }
9915
9916 /* Emit code to save SSE registers using MOV insns.
9917 First register is stored at CFA - CFA_OFFSET. */
9918 static void
9919 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
9920 {
9921 unsigned int regno;
9922
9923 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9924 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9925 {
9926 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
9927 cfa_offset -= 16;
9928 }
9929 }
9930
9931 static GTY(()) rtx queued_cfa_restores;
9932
9933 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9934 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9935 Don't add the note if the previously saved value will be left untouched
9936 within stack red-zone till return, as unwinders can find the same value
9937 in the register and on the stack. */
9938
9939 static void
9940 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
9941 {
9942 if (!crtl->shrink_wrapped
9943 && cfa_offset <= cfun->machine->fs.red_zone_offset)
9944 return;
9945
9946 if (insn)
9947 {
9948 add_reg_note (insn, REG_CFA_RESTORE, reg);
9949 RTX_FRAME_RELATED_P (insn) = 1;
9950 }
9951 else
9952 queued_cfa_restores
9953 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
9954 }
9955
9956 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9957
9958 static void
9959 ix86_add_queued_cfa_restore_notes (rtx insn)
9960 {
9961 rtx last;
9962 if (!queued_cfa_restores)
9963 return;
9964 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
9965 ;
9966 XEXP (last, 1) = REG_NOTES (insn);
9967 REG_NOTES (insn) = queued_cfa_restores;
9968 queued_cfa_restores = NULL_RTX;
9969 RTX_FRAME_RELATED_P (insn) = 1;
9970 }
9971
9972 /* Expand prologue or epilogue stack adjustment.
9973 The pattern exist to put a dependency on all ebp-based memory accesses.
9974 STYLE should be negative if instructions should be marked as frame related,
9975 zero if %r11 register is live and cannot be freely used and positive
9976 otherwise. */
9977
9978 static void
9979 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
9980 int style, bool set_cfa)
9981 {
9982 struct machine_function *m = cfun->machine;
9983 rtx insn;
9984 bool add_frame_related_expr = false;
9985
9986 if (Pmode == SImode)
9987 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
9988 else if (x86_64_immediate_operand (offset, DImode))
9989 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
9990 else
9991 {
9992 rtx tmp;
9993 /* r11 is used by indirect sibcall return as well, set before the
9994 epilogue and used after the epilogue. */
9995 if (style)
9996 tmp = gen_rtx_REG (DImode, R11_REG);
9997 else
9998 {
9999 gcc_assert (src != hard_frame_pointer_rtx
10000 && dest != hard_frame_pointer_rtx);
10001 tmp = hard_frame_pointer_rtx;
10002 }
10003 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
10004 if (style < 0)
10005 add_frame_related_expr = true;
10006
10007 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
10008 }
10009
10010 insn = emit_insn (insn);
10011 if (style >= 0)
10012 ix86_add_queued_cfa_restore_notes (insn);
10013
10014 if (set_cfa)
10015 {
10016 rtx r;
10017
10018 gcc_assert (m->fs.cfa_reg == src);
10019 m->fs.cfa_offset += INTVAL (offset);
10020 m->fs.cfa_reg = dest;
10021
10022 r = gen_rtx_PLUS (Pmode, src, offset);
10023 r = gen_rtx_SET (VOIDmode, dest, r);
10024 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
10025 RTX_FRAME_RELATED_P (insn) = 1;
10026 }
10027 else if (style < 0)
10028 {
10029 RTX_FRAME_RELATED_P (insn) = 1;
10030 if (add_frame_related_expr)
10031 {
10032 rtx r = gen_rtx_PLUS (Pmode, src, offset);
10033 r = gen_rtx_SET (VOIDmode, dest, r);
10034 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
10035 }
10036 }
10037
10038 if (dest == stack_pointer_rtx)
10039 {
10040 HOST_WIDE_INT ooffset = m->fs.sp_offset;
10041 bool valid = m->fs.sp_valid;
10042
10043 if (src == hard_frame_pointer_rtx)
10044 {
10045 valid = m->fs.fp_valid;
10046 ooffset = m->fs.fp_offset;
10047 }
10048 else if (src == crtl->drap_reg)
10049 {
10050 valid = m->fs.drap_valid;
10051 ooffset = 0;
10052 }
10053 else
10054 {
10055 /* Else there are two possibilities: SP itself, which we set
10056 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
10057 taken care of this by hand along the eh_return path. */
10058 gcc_checking_assert (src == stack_pointer_rtx
10059 || offset == const0_rtx);
10060 }
10061
10062 m->fs.sp_offset = ooffset - INTVAL (offset);
10063 m->fs.sp_valid = valid;
10064 }
10065 }
10066
10067 /* Find an available register to be used as dynamic realign argument
10068 pointer regsiter. Such a register will be written in prologue and
10069 used in begin of body, so it must not be
10070 1. parameter passing register.
10071 2. GOT pointer.
10072 We reuse static-chain register if it is available. Otherwise, we
10073 use DI for i386 and R13 for x86-64. We chose R13 since it has
10074 shorter encoding.
10075
10076 Return: the regno of chosen register. */
10077
10078 static unsigned int
10079 find_drap_reg (void)
10080 {
10081 tree decl = cfun->decl;
10082
10083 if (TARGET_64BIT)
10084 {
10085 /* Use R13 for nested function or function need static chain.
10086 Since function with tail call may use any caller-saved
10087 registers in epilogue, DRAP must not use caller-saved
10088 register in such case. */
10089 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10090 return R13_REG;
10091
10092 return R10_REG;
10093 }
10094 else
10095 {
10096 /* Use DI for nested function or function need static chain.
10097 Since function with tail call may use any caller-saved
10098 registers in epilogue, DRAP must not use caller-saved
10099 register in such case. */
10100 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10101 return DI_REG;
10102
10103 /* Reuse static chain register if it isn't used for parameter
10104 passing. */
10105 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
10106 {
10107 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
10108 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
10109 return CX_REG;
10110 }
10111 return DI_REG;
10112 }
10113 }
10114
10115 /* Return minimum incoming stack alignment. */
10116
10117 static unsigned int
10118 ix86_minimum_incoming_stack_boundary (bool sibcall)
10119 {
10120 unsigned int incoming_stack_boundary;
10121
10122 /* Prefer the one specified at command line. */
10123 if (ix86_user_incoming_stack_boundary)
10124 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
10125 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
10126 if -mstackrealign is used, it isn't used for sibcall check and
10127 estimated stack alignment is 128bit. */
10128 else if (!sibcall
10129 && !TARGET_64BIT
10130 && ix86_force_align_arg_pointer
10131 && crtl->stack_alignment_estimated == 128)
10132 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10133 else
10134 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
10135
10136 /* Incoming stack alignment can be changed on individual functions
10137 via force_align_arg_pointer attribute. We use the smallest
10138 incoming stack boundary. */
10139 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
10140 && lookup_attribute (ix86_force_align_arg_pointer_string,
10141 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
10142 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10143
10144 /* The incoming stack frame has to be aligned at least at
10145 parm_stack_boundary. */
10146 if (incoming_stack_boundary < crtl->parm_stack_boundary)
10147 incoming_stack_boundary = crtl->parm_stack_boundary;
10148
10149 /* Stack at entrance of main is aligned by runtime. We use the
10150 smallest incoming stack boundary. */
10151 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
10152 && DECL_NAME (current_function_decl)
10153 && MAIN_NAME_P (DECL_NAME (current_function_decl))
10154 && DECL_FILE_SCOPE_P (current_function_decl))
10155 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
10156
10157 return incoming_stack_boundary;
10158 }
10159
10160 /* Update incoming stack boundary and estimated stack alignment. */
10161
10162 static void
10163 ix86_update_stack_boundary (void)
10164 {
10165 ix86_incoming_stack_boundary
10166 = ix86_minimum_incoming_stack_boundary (false);
10167
10168 /* x86_64 vararg needs 16byte stack alignment for register save
10169 area. */
10170 if (TARGET_64BIT
10171 && cfun->stdarg
10172 && crtl->stack_alignment_estimated < 128)
10173 crtl->stack_alignment_estimated = 128;
10174 }
10175
10176 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
10177 needed or an rtx for DRAP otherwise. */
10178
10179 static rtx
10180 ix86_get_drap_rtx (void)
10181 {
10182 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
10183 crtl->need_drap = true;
10184
10185 if (stack_realign_drap)
10186 {
10187 /* Assign DRAP to vDRAP and returns vDRAP */
10188 unsigned int regno = find_drap_reg ();
10189 rtx drap_vreg;
10190 rtx arg_ptr;
10191 rtx seq, insn;
10192
10193 arg_ptr = gen_rtx_REG (Pmode, regno);
10194 crtl->drap_reg = arg_ptr;
10195
10196 start_sequence ();
10197 drap_vreg = copy_to_reg (arg_ptr);
10198 seq = get_insns ();
10199 end_sequence ();
10200
10201 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
10202 if (!optimize)
10203 {
10204 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
10205 RTX_FRAME_RELATED_P (insn) = 1;
10206 }
10207 return drap_vreg;
10208 }
10209 else
10210 return NULL;
10211 }
10212
10213 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
10214
10215 static rtx
10216 ix86_internal_arg_pointer (void)
10217 {
10218 return virtual_incoming_args_rtx;
10219 }
10220
10221 struct scratch_reg {
10222 rtx reg;
10223 bool saved;
10224 };
10225
10226 /* Return a short-lived scratch register for use on function entry.
10227 In 32-bit mode, it is valid only after the registers are saved
10228 in the prologue. This register must be released by means of
10229 release_scratch_register_on_entry once it is dead. */
10230
10231 static void
10232 get_scratch_register_on_entry (struct scratch_reg *sr)
10233 {
10234 int regno;
10235
10236 sr->saved = false;
10237
10238 if (TARGET_64BIT)
10239 {
10240 /* We always use R11 in 64-bit mode. */
10241 regno = R11_REG;
10242 }
10243 else
10244 {
10245 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
10246 bool fastcall_p
10247 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10248 bool thiscall_p
10249 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10250 bool static_chain_p = DECL_STATIC_CHAIN (decl);
10251 int regparm = ix86_function_regparm (fntype, decl);
10252 int drap_regno
10253 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
10254
10255 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
10256 for the static chain register. */
10257 if ((regparm < 1 || (fastcall_p && !static_chain_p))
10258 && drap_regno != AX_REG)
10259 regno = AX_REG;
10260 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
10261 for the static chain register. */
10262 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
10263 regno = AX_REG;
10264 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
10265 regno = DX_REG;
10266 /* ecx is the static chain register. */
10267 else if (regparm < 3 && !fastcall_p && !thiscall_p
10268 && !static_chain_p
10269 && drap_regno != CX_REG)
10270 regno = CX_REG;
10271 else if (ix86_save_reg (BX_REG, true))
10272 regno = BX_REG;
10273 /* esi is the static chain register. */
10274 else if (!(regparm == 3 && static_chain_p)
10275 && ix86_save_reg (SI_REG, true))
10276 regno = SI_REG;
10277 else if (ix86_save_reg (DI_REG, true))
10278 regno = DI_REG;
10279 else
10280 {
10281 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
10282 sr->saved = true;
10283 }
10284 }
10285
10286 sr->reg = gen_rtx_REG (Pmode, regno);
10287 if (sr->saved)
10288 {
10289 rtx insn = emit_insn (gen_push (sr->reg));
10290 RTX_FRAME_RELATED_P (insn) = 1;
10291 }
10292 }
10293
10294 /* Release a scratch register obtained from the preceding function. */
10295
10296 static void
10297 release_scratch_register_on_entry (struct scratch_reg *sr)
10298 {
10299 if (sr->saved)
10300 {
10301 struct machine_function *m = cfun->machine;
10302 rtx x, insn = emit_insn (gen_pop (sr->reg));
10303
10304 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
10305 RTX_FRAME_RELATED_P (insn) = 1;
10306 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
10307 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10308 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
10309 m->fs.sp_offset -= UNITS_PER_WORD;
10310 }
10311 }
10312
10313 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
10314
10315 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
10316
10317 static void
10318 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
10319 {
10320 /* We skip the probe for the first interval + a small dope of 4 words and
10321 probe that many bytes past the specified size to maintain a protection
10322 area at the botton of the stack. */
10323 const int dope = 4 * UNITS_PER_WORD;
10324 rtx size_rtx = GEN_INT (size), last;
10325
10326 /* See if we have a constant small number of probes to generate. If so,
10327 that's the easy case. The run-time loop is made up of 11 insns in the
10328 generic case while the compile-time loop is made up of 3+2*(n-1) insns
10329 for n # of intervals. */
10330 if (size <= 5 * PROBE_INTERVAL)
10331 {
10332 HOST_WIDE_INT i, adjust;
10333 bool first_probe = true;
10334
10335 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
10336 values of N from 1 until it exceeds SIZE. If only one probe is
10337 needed, this will not generate any code. Then adjust and probe
10338 to PROBE_INTERVAL + SIZE. */
10339 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10340 {
10341 if (first_probe)
10342 {
10343 adjust = 2 * PROBE_INTERVAL + dope;
10344 first_probe = false;
10345 }
10346 else
10347 adjust = PROBE_INTERVAL;
10348
10349 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10350 plus_constant (Pmode, stack_pointer_rtx,
10351 -adjust)));
10352 emit_stack_probe (stack_pointer_rtx);
10353 }
10354
10355 if (first_probe)
10356 adjust = size + PROBE_INTERVAL + dope;
10357 else
10358 adjust = size + PROBE_INTERVAL - i;
10359
10360 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10361 plus_constant (Pmode, stack_pointer_rtx,
10362 -adjust)));
10363 emit_stack_probe (stack_pointer_rtx);
10364
10365 /* Adjust back to account for the additional first interval. */
10366 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10367 plus_constant (Pmode, stack_pointer_rtx,
10368 PROBE_INTERVAL + dope)));
10369 }
10370
10371 /* Otherwise, do the same as above, but in a loop. Note that we must be
10372 extra careful with variables wrapping around because we might be at
10373 the very top (or the very bottom) of the address space and we have
10374 to be able to handle this case properly; in particular, we use an
10375 equality test for the loop condition. */
10376 else
10377 {
10378 HOST_WIDE_INT rounded_size;
10379 struct scratch_reg sr;
10380
10381 get_scratch_register_on_entry (&sr);
10382
10383
10384 /* Step 1: round SIZE to the previous multiple of the interval. */
10385
10386 rounded_size = size & -PROBE_INTERVAL;
10387
10388
10389 /* Step 2: compute initial and final value of the loop counter. */
10390
10391 /* SP = SP_0 + PROBE_INTERVAL. */
10392 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10393 plus_constant (Pmode, stack_pointer_rtx,
10394 - (PROBE_INTERVAL + dope))));
10395
10396 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
10397 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
10398 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
10399 gen_rtx_PLUS (Pmode, sr.reg,
10400 stack_pointer_rtx)));
10401
10402
10403 /* Step 3: the loop
10404
10405 while (SP != LAST_ADDR)
10406 {
10407 SP = SP + PROBE_INTERVAL
10408 probe at SP
10409 }
10410
10411 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
10412 values of N from 1 until it is equal to ROUNDED_SIZE. */
10413
10414 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
10415
10416
10417 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
10418 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
10419
10420 if (size != rounded_size)
10421 {
10422 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10423 plus_constant (Pmode, stack_pointer_rtx,
10424 rounded_size - size)));
10425 emit_stack_probe (stack_pointer_rtx);
10426 }
10427
10428 /* Adjust back to account for the additional first interval. */
10429 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10430 plus_constant (Pmode, stack_pointer_rtx,
10431 PROBE_INTERVAL + dope)));
10432
10433 release_scratch_register_on_entry (&sr);
10434 }
10435
10436 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
10437
10438 /* Even if the stack pointer isn't the CFA register, we need to correctly
10439 describe the adjustments made to it, in particular differentiate the
10440 frame-related ones from the frame-unrelated ones. */
10441 if (size > 0)
10442 {
10443 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
10444 XVECEXP (expr, 0, 0)
10445 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10446 plus_constant (Pmode, stack_pointer_rtx, -size));
10447 XVECEXP (expr, 0, 1)
10448 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10449 plus_constant (Pmode, stack_pointer_rtx,
10450 PROBE_INTERVAL + dope + size));
10451 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
10452 RTX_FRAME_RELATED_P (last) = 1;
10453
10454 cfun->machine->fs.sp_offset += size;
10455 }
10456
10457 /* Make sure nothing is scheduled before we are done. */
10458 emit_insn (gen_blockage ());
10459 }
10460
10461 /* Adjust the stack pointer up to REG while probing it. */
10462
10463 const char *
10464 output_adjust_stack_and_probe (rtx reg)
10465 {
10466 static int labelno = 0;
10467 char loop_lab[32], end_lab[32];
10468 rtx xops[2];
10469
10470 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10471 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10472
10473 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10474
10475 /* Jump to END_LAB if SP == LAST_ADDR. */
10476 xops[0] = stack_pointer_rtx;
10477 xops[1] = reg;
10478 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10479 fputs ("\tje\t", asm_out_file);
10480 assemble_name_raw (asm_out_file, end_lab);
10481 fputc ('\n', asm_out_file);
10482
10483 /* SP = SP + PROBE_INTERVAL. */
10484 xops[1] = GEN_INT (PROBE_INTERVAL);
10485 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10486
10487 /* Probe at SP. */
10488 xops[1] = const0_rtx;
10489 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
10490
10491 fprintf (asm_out_file, "\tjmp\t");
10492 assemble_name_raw (asm_out_file, loop_lab);
10493 fputc ('\n', asm_out_file);
10494
10495 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10496
10497 return "";
10498 }
10499
10500 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
10501 inclusive. These are offsets from the current stack pointer. */
10502
10503 static void
10504 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
10505 {
10506 /* See if we have a constant small number of probes to generate. If so,
10507 that's the easy case. The run-time loop is made up of 7 insns in the
10508 generic case while the compile-time loop is made up of n insns for n #
10509 of intervals. */
10510 if (size <= 7 * PROBE_INTERVAL)
10511 {
10512 HOST_WIDE_INT i;
10513
10514 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
10515 it exceeds SIZE. If only one probe is needed, this will not
10516 generate any code. Then probe at FIRST + SIZE. */
10517 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10518 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
10519 -(first + i)));
10520
10521 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
10522 -(first + size)));
10523 }
10524
10525 /* Otherwise, do the same as above, but in a loop. Note that we must be
10526 extra careful with variables wrapping around because we might be at
10527 the very top (or the very bottom) of the address space and we have
10528 to be able to handle this case properly; in particular, we use an
10529 equality test for the loop condition. */
10530 else
10531 {
10532 HOST_WIDE_INT rounded_size, last;
10533 struct scratch_reg sr;
10534
10535 get_scratch_register_on_entry (&sr);
10536
10537
10538 /* Step 1: round SIZE to the previous multiple of the interval. */
10539
10540 rounded_size = size & -PROBE_INTERVAL;
10541
10542
10543 /* Step 2: compute initial and final value of the loop counter. */
10544
10545 /* TEST_OFFSET = FIRST. */
10546 emit_move_insn (sr.reg, GEN_INT (-first));
10547
10548 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
10549 last = first + rounded_size;
10550
10551
10552 /* Step 3: the loop
10553
10554 while (TEST_ADDR != LAST_ADDR)
10555 {
10556 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
10557 probe at TEST_ADDR
10558 }
10559
10560 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
10561 until it is equal to ROUNDED_SIZE. */
10562
10563 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
10564
10565
10566 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
10567 that SIZE is equal to ROUNDED_SIZE. */
10568
10569 if (size != rounded_size)
10570 emit_stack_probe (plus_constant (Pmode,
10571 gen_rtx_PLUS (Pmode,
10572 stack_pointer_rtx,
10573 sr.reg),
10574 rounded_size - size));
10575
10576 release_scratch_register_on_entry (&sr);
10577 }
10578
10579 /* Make sure nothing is scheduled before we are done. */
10580 emit_insn (gen_blockage ());
10581 }
10582
10583 /* Probe a range of stack addresses from REG to END, inclusive. These are
10584 offsets from the current stack pointer. */
10585
10586 const char *
10587 output_probe_stack_range (rtx reg, rtx end)
10588 {
10589 static int labelno = 0;
10590 char loop_lab[32], end_lab[32];
10591 rtx xops[3];
10592
10593 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10594 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10595
10596 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10597
10598 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
10599 xops[0] = reg;
10600 xops[1] = end;
10601 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10602 fputs ("\tje\t", asm_out_file);
10603 assemble_name_raw (asm_out_file, end_lab);
10604 fputc ('\n', asm_out_file);
10605
10606 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
10607 xops[1] = GEN_INT (PROBE_INTERVAL);
10608 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10609
10610 /* Probe at TEST_ADDR. */
10611 xops[0] = stack_pointer_rtx;
10612 xops[1] = reg;
10613 xops[2] = const0_rtx;
10614 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
10615
10616 fprintf (asm_out_file, "\tjmp\t");
10617 assemble_name_raw (asm_out_file, loop_lab);
10618 fputc ('\n', asm_out_file);
10619
10620 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10621
10622 return "";
10623 }
10624
10625 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
10626 to be generated in correct form. */
10627 static void
10628 ix86_finalize_stack_realign_flags (void)
10629 {
10630 /* Check if stack realign is really needed after reload, and
10631 stores result in cfun */
10632 unsigned int incoming_stack_boundary
10633 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
10634 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
10635 unsigned int stack_realign = (incoming_stack_boundary
10636 < (crtl->is_leaf
10637 ? crtl->max_used_stack_slot_alignment
10638 : crtl->stack_alignment_needed));
10639
10640 if (crtl->stack_realign_finalized)
10641 {
10642 /* After stack_realign_needed is finalized, we can't no longer
10643 change it. */
10644 gcc_assert (crtl->stack_realign_needed == stack_realign);
10645 return;
10646 }
10647
10648 /* If the only reason for frame_pointer_needed is that we conservatively
10649 assumed stack realignment might be needed, but in the end nothing that
10650 needed the stack alignment had been spilled, clear frame_pointer_needed
10651 and say we don't need stack realignment. */
10652 if (stack_realign
10653 && frame_pointer_needed
10654 && crtl->is_leaf
10655 && flag_omit_frame_pointer
10656 && crtl->sp_is_unchanging
10657 && !ix86_current_function_calls_tls_descriptor
10658 && !crtl->accesses_prior_frames
10659 && !cfun->calls_alloca
10660 && !crtl->calls_eh_return
10661 && !(flag_stack_check && STACK_CHECK_MOVING_SP)
10662 && !ix86_frame_pointer_required ()
10663 && get_frame_size () == 0
10664 && ix86_nsaved_sseregs () == 0
10665 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
10666 {
10667 HARD_REG_SET set_up_by_prologue, prologue_used;
10668 basic_block bb;
10669
10670 CLEAR_HARD_REG_SET (prologue_used);
10671 CLEAR_HARD_REG_SET (set_up_by_prologue);
10672 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
10673 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
10674 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
10675 HARD_FRAME_POINTER_REGNUM);
10676 FOR_EACH_BB_FN (bb, cfun)
10677 {
10678 rtx insn;
10679 FOR_BB_INSNS (bb, insn)
10680 if (NONDEBUG_INSN_P (insn)
10681 && requires_stack_frame_p (insn, prologue_used,
10682 set_up_by_prologue))
10683 {
10684 crtl->stack_realign_needed = stack_realign;
10685 crtl->stack_realign_finalized = true;
10686 return;
10687 }
10688 }
10689
10690 /* If drap has been set, but it actually isn't live at the start
10691 of the function, there is no reason to set it up. */
10692 if (crtl->drap_reg)
10693 {
10694 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
10695 if (! REGNO_REG_SET_P (DF_LR_IN (bb), REGNO (crtl->drap_reg)))
10696 {
10697 crtl->drap_reg = NULL_RTX;
10698 crtl->need_drap = false;
10699 }
10700 }
10701 else
10702 cfun->machine->no_drap_save_restore = true;
10703
10704 frame_pointer_needed = false;
10705 stack_realign = false;
10706 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
10707 crtl->stack_alignment_needed = incoming_stack_boundary;
10708 crtl->stack_alignment_estimated = incoming_stack_boundary;
10709 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
10710 crtl->preferred_stack_boundary = incoming_stack_boundary;
10711 df_finish_pass (true);
10712 df_scan_alloc (NULL);
10713 df_scan_blocks ();
10714 df_compute_regs_ever_live (true);
10715 df_analyze ();
10716 }
10717
10718 crtl->stack_realign_needed = stack_realign;
10719 crtl->stack_realign_finalized = true;
10720 }
10721
10722 /* Expand the prologue into a bunch of separate insns. */
10723
10724 void
10725 ix86_expand_prologue (void)
10726 {
10727 struct machine_function *m = cfun->machine;
10728 rtx insn, t;
10729 bool pic_reg_used;
10730 struct ix86_frame frame;
10731 HOST_WIDE_INT allocate;
10732 bool int_registers_saved;
10733 bool sse_registers_saved;
10734
10735 ix86_finalize_stack_realign_flags ();
10736
10737 /* DRAP should not coexist with stack_realign_fp */
10738 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
10739
10740 memset (&m->fs, 0, sizeof (m->fs));
10741
10742 /* Initialize CFA state for before the prologue. */
10743 m->fs.cfa_reg = stack_pointer_rtx;
10744 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
10745
10746 /* Track SP offset to the CFA. We continue tracking this after we've
10747 swapped the CFA register away from SP. In the case of re-alignment
10748 this is fudged; we're interested to offsets within the local frame. */
10749 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10750 m->fs.sp_valid = true;
10751
10752 ix86_compute_frame_layout (&frame);
10753
10754 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
10755 {
10756 /* We should have already generated an error for any use of
10757 ms_hook on a nested function. */
10758 gcc_checking_assert (!ix86_static_chain_on_stack);
10759
10760 /* Check if profiling is active and we shall use profiling before
10761 prologue variant. If so sorry. */
10762 if (crtl->profile && flag_fentry != 0)
10763 sorry ("ms_hook_prologue attribute isn%'t compatible "
10764 "with -mfentry for 32-bit");
10765
10766 /* In ix86_asm_output_function_label we emitted:
10767 8b ff movl.s %edi,%edi
10768 55 push %ebp
10769 8b ec movl.s %esp,%ebp
10770
10771 This matches the hookable function prologue in Win32 API
10772 functions in Microsoft Windows XP Service Pack 2 and newer.
10773 Wine uses this to enable Windows apps to hook the Win32 API
10774 functions provided by Wine.
10775
10776 What that means is that we've already set up the frame pointer. */
10777
10778 if (frame_pointer_needed
10779 && !(crtl->drap_reg && crtl->stack_realign_needed))
10780 {
10781 rtx push, mov;
10782
10783 /* We've decided to use the frame pointer already set up.
10784 Describe this to the unwinder by pretending that both
10785 push and mov insns happen right here.
10786
10787 Putting the unwind info here at the end of the ms_hook
10788 is done so that we can make absolutely certain we get
10789 the required byte sequence at the start of the function,
10790 rather than relying on an assembler that can produce
10791 the exact encoding required.
10792
10793 However it does mean (in the unpatched case) that we have
10794 a 1 insn window where the asynchronous unwind info is
10795 incorrect. However, if we placed the unwind info at
10796 its correct location we would have incorrect unwind info
10797 in the patched case. Which is probably all moot since
10798 I don't expect Wine generates dwarf2 unwind info for the
10799 system libraries that use this feature. */
10800
10801 insn = emit_insn (gen_blockage ());
10802
10803 push = gen_push (hard_frame_pointer_rtx);
10804 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
10805 stack_pointer_rtx);
10806 RTX_FRAME_RELATED_P (push) = 1;
10807 RTX_FRAME_RELATED_P (mov) = 1;
10808
10809 RTX_FRAME_RELATED_P (insn) = 1;
10810 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10811 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
10812
10813 /* Note that gen_push incremented m->fs.cfa_offset, even
10814 though we didn't emit the push insn here. */
10815 m->fs.cfa_reg = hard_frame_pointer_rtx;
10816 m->fs.fp_offset = m->fs.cfa_offset;
10817 m->fs.fp_valid = true;
10818 }
10819 else
10820 {
10821 /* The frame pointer is not needed so pop %ebp again.
10822 This leaves us with a pristine state. */
10823 emit_insn (gen_pop (hard_frame_pointer_rtx));
10824 }
10825 }
10826
10827 /* The first insn of a function that accepts its static chain on the
10828 stack is to push the register that would be filled in by a direct
10829 call. This insn will be skipped by the trampoline. */
10830 else if (ix86_static_chain_on_stack)
10831 {
10832 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
10833 emit_insn (gen_blockage ());
10834
10835 /* We don't want to interpret this push insn as a register save,
10836 only as a stack adjustment. The real copy of the register as
10837 a save will be done later, if needed. */
10838 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
10839 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
10840 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
10841 RTX_FRAME_RELATED_P (insn) = 1;
10842 }
10843
10844 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
10845 of DRAP is needed and stack realignment is really needed after reload */
10846 if (stack_realign_drap)
10847 {
10848 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10849
10850 /* Only need to push parameter pointer reg if it is caller saved. */
10851 if (!call_used_regs[REGNO (crtl->drap_reg)])
10852 {
10853 /* Push arg pointer reg */
10854 insn = emit_insn (gen_push (crtl->drap_reg));
10855 RTX_FRAME_RELATED_P (insn) = 1;
10856 }
10857
10858 /* Grab the argument pointer. */
10859 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
10860 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10861 RTX_FRAME_RELATED_P (insn) = 1;
10862 m->fs.cfa_reg = crtl->drap_reg;
10863 m->fs.cfa_offset = 0;
10864
10865 /* Align the stack. */
10866 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10867 stack_pointer_rtx,
10868 GEN_INT (-align_bytes)));
10869 RTX_FRAME_RELATED_P (insn) = 1;
10870
10871 /* Replicate the return address on the stack so that return
10872 address can be reached via (argp - 1) slot. This is needed
10873 to implement macro RETURN_ADDR_RTX and intrinsic function
10874 expand_builtin_return_addr etc. */
10875 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
10876 t = gen_frame_mem (word_mode, t);
10877 insn = emit_insn (gen_push (t));
10878 RTX_FRAME_RELATED_P (insn) = 1;
10879
10880 /* For the purposes of frame and register save area addressing,
10881 we've started over with a new frame. */
10882 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10883 m->fs.realigned = true;
10884 }
10885
10886 int_registers_saved = (frame.nregs == 0);
10887 sse_registers_saved = (frame.nsseregs == 0);
10888
10889 if (frame_pointer_needed && !m->fs.fp_valid)
10890 {
10891 /* Note: AT&T enter does NOT have reversed args. Enter is probably
10892 slower on all targets. Also sdb doesn't like it. */
10893 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
10894 RTX_FRAME_RELATED_P (insn) = 1;
10895
10896 /* Push registers now, before setting the frame pointer
10897 on SEH target. */
10898 if (!int_registers_saved
10899 && TARGET_SEH
10900 && !frame.save_regs_using_mov)
10901 {
10902 ix86_emit_save_regs ();
10903 int_registers_saved = true;
10904 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
10905 }
10906
10907 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
10908 {
10909 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
10910 RTX_FRAME_RELATED_P (insn) = 1;
10911
10912 if (m->fs.cfa_reg == stack_pointer_rtx)
10913 m->fs.cfa_reg = hard_frame_pointer_rtx;
10914 m->fs.fp_offset = m->fs.sp_offset;
10915 m->fs.fp_valid = true;
10916 }
10917 }
10918
10919 if (!int_registers_saved)
10920 {
10921 /* If saving registers via PUSH, do so now. */
10922 if (!frame.save_regs_using_mov)
10923 {
10924 ix86_emit_save_regs ();
10925 int_registers_saved = true;
10926 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
10927 }
10928
10929 /* When using red zone we may start register saving before allocating
10930 the stack frame saving one cycle of the prologue. However, avoid
10931 doing this if we have to probe the stack; at least on x86_64 the
10932 stack probe can turn into a call that clobbers a red zone location. */
10933 else if (ix86_using_red_zone ()
10934 && (! TARGET_STACK_PROBE
10935 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
10936 {
10937 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10938 int_registers_saved = true;
10939 }
10940 }
10941
10942 if (stack_realign_fp)
10943 {
10944 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10945 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
10946
10947 /* The computation of the size of the re-aligned stack frame means
10948 that we must allocate the size of the register save area before
10949 performing the actual alignment. Otherwise we cannot guarantee
10950 that there's enough storage above the realignment point. */
10951 if (m->fs.sp_offset != frame.sse_reg_save_offset)
10952 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10953 GEN_INT (m->fs.sp_offset
10954 - frame.sse_reg_save_offset),
10955 -1, false);
10956
10957 /* Align the stack. */
10958 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10959 stack_pointer_rtx,
10960 GEN_INT (-align_bytes)));
10961
10962 /* For the purposes of register save area addressing, the stack
10963 pointer is no longer valid. As for the value of sp_offset,
10964 see ix86_compute_frame_layout, which we need to match in order
10965 to pass verification of stack_pointer_offset at the end. */
10966 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
10967 m->fs.sp_valid = false;
10968 }
10969
10970 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
10971
10972 if (flag_stack_usage_info)
10973 {
10974 /* We start to count from ARG_POINTER. */
10975 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
10976
10977 /* If it was realigned, take into account the fake frame. */
10978 if (stack_realign_drap)
10979 {
10980 if (ix86_static_chain_on_stack)
10981 stack_size += UNITS_PER_WORD;
10982
10983 if (!call_used_regs[REGNO (crtl->drap_reg)])
10984 stack_size += UNITS_PER_WORD;
10985
10986 /* This over-estimates by 1 minimal-stack-alignment-unit but
10987 mitigates that by counting in the new return address slot. */
10988 current_function_dynamic_stack_size
10989 += crtl->stack_alignment_needed / BITS_PER_UNIT;
10990 }
10991
10992 current_function_static_stack_size = stack_size;
10993 }
10994
10995 /* On SEH target with very large frame size, allocate an area to save
10996 SSE registers (as the very large allocation won't be described). */
10997 if (TARGET_SEH
10998 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
10999 && !sse_registers_saved)
11000 {
11001 HOST_WIDE_INT sse_size =
11002 frame.sse_reg_save_offset - frame.reg_save_offset;
11003
11004 gcc_assert (int_registers_saved);
11005
11006 /* No need to do stack checking as the area will be immediately
11007 written. */
11008 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11009 GEN_INT (-sse_size), -1,
11010 m->fs.cfa_reg == stack_pointer_rtx);
11011 allocate -= sse_size;
11012 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11013 sse_registers_saved = true;
11014 }
11015
11016 /* The stack has already been decremented by the instruction calling us
11017 so probe if the size is non-negative to preserve the protection area. */
11018 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
11019 {
11020 /* We expect the registers to be saved when probes are used. */
11021 gcc_assert (int_registers_saved);
11022
11023 if (STACK_CHECK_MOVING_SP)
11024 {
11025 if (!(crtl->is_leaf && !cfun->calls_alloca
11026 && allocate <= PROBE_INTERVAL))
11027 {
11028 ix86_adjust_stack_and_probe (allocate);
11029 allocate = 0;
11030 }
11031 }
11032 else
11033 {
11034 HOST_WIDE_INT size = allocate;
11035
11036 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
11037 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
11038
11039 if (TARGET_STACK_PROBE)
11040 {
11041 if (crtl->is_leaf && !cfun->calls_alloca)
11042 {
11043 if (size > PROBE_INTERVAL)
11044 ix86_emit_probe_stack_range (0, size);
11045 }
11046 else
11047 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
11048 }
11049 else
11050 {
11051 if (crtl->is_leaf && !cfun->calls_alloca)
11052 {
11053 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
11054 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT,
11055 size - STACK_CHECK_PROTECT);
11056 }
11057 else
11058 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
11059 }
11060 }
11061 }
11062
11063 if (allocate == 0)
11064 ;
11065 else if (!ix86_target_stack_probe ()
11066 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
11067 {
11068 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11069 GEN_INT (-allocate), -1,
11070 m->fs.cfa_reg == stack_pointer_rtx);
11071 }
11072 else
11073 {
11074 rtx eax = gen_rtx_REG (Pmode, AX_REG);
11075 rtx r10 = NULL;
11076 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
11077 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
11078 bool eax_live = ix86_eax_live_at_start_p ();
11079 bool r10_live = false;
11080
11081 if (TARGET_64BIT)
11082 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
11083
11084 if (eax_live)
11085 {
11086 insn = emit_insn (gen_push (eax));
11087 allocate -= UNITS_PER_WORD;
11088 /* Note that SEH directives need to continue tracking the stack
11089 pointer even after the frame pointer has been set up. */
11090 if (sp_is_cfa_reg || TARGET_SEH)
11091 {
11092 if (sp_is_cfa_reg)
11093 m->fs.cfa_offset += UNITS_PER_WORD;
11094 RTX_FRAME_RELATED_P (insn) = 1;
11095 }
11096 }
11097
11098 if (r10_live)
11099 {
11100 r10 = gen_rtx_REG (Pmode, R10_REG);
11101 insn = emit_insn (gen_push (r10));
11102 allocate -= UNITS_PER_WORD;
11103 if (sp_is_cfa_reg || TARGET_SEH)
11104 {
11105 if (sp_is_cfa_reg)
11106 m->fs.cfa_offset += UNITS_PER_WORD;
11107 RTX_FRAME_RELATED_P (insn) = 1;
11108 }
11109 }
11110
11111 emit_move_insn (eax, GEN_INT (allocate));
11112 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
11113
11114 /* Use the fact that AX still contains ALLOCATE. */
11115 adjust_stack_insn = (Pmode == DImode
11116 ? gen_pro_epilogue_adjust_stack_di_sub
11117 : gen_pro_epilogue_adjust_stack_si_sub);
11118
11119 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
11120 stack_pointer_rtx, eax));
11121
11122 if (sp_is_cfa_reg || TARGET_SEH)
11123 {
11124 if (sp_is_cfa_reg)
11125 m->fs.cfa_offset += allocate;
11126 RTX_FRAME_RELATED_P (insn) = 1;
11127 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11128 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11129 plus_constant (Pmode, stack_pointer_rtx,
11130 -allocate)));
11131 }
11132 m->fs.sp_offset += allocate;
11133
11134 /* Use stack_pointer_rtx for relative addressing so that code
11135 works for realigned stack, too. */
11136 if (r10_live && eax_live)
11137 {
11138 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11139 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
11140 gen_frame_mem (word_mode, t));
11141 t = plus_constant (Pmode, t, UNITS_PER_WORD);
11142 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
11143 gen_frame_mem (word_mode, t));
11144 }
11145 else if (eax_live || r10_live)
11146 {
11147 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11148 emit_move_insn (gen_rtx_REG (word_mode,
11149 (eax_live ? AX_REG : R10_REG)),
11150 gen_frame_mem (word_mode, t));
11151 }
11152 }
11153 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
11154
11155 /* If we havn't already set up the frame pointer, do so now. */
11156 if (frame_pointer_needed && !m->fs.fp_valid)
11157 {
11158 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
11159 GEN_INT (frame.stack_pointer_offset
11160 - frame.hard_frame_pointer_offset));
11161 insn = emit_insn (insn);
11162 RTX_FRAME_RELATED_P (insn) = 1;
11163 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
11164
11165 if (m->fs.cfa_reg == stack_pointer_rtx)
11166 m->fs.cfa_reg = hard_frame_pointer_rtx;
11167 m->fs.fp_offset = frame.hard_frame_pointer_offset;
11168 m->fs.fp_valid = true;
11169 }
11170
11171 if (!int_registers_saved)
11172 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11173 if (!sse_registers_saved)
11174 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11175
11176 pic_reg_used = false;
11177 /* We don't use pic-register for pe-coff target. */
11178 if (pic_offset_table_rtx
11179 && !TARGET_PECOFF
11180 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
11181 || crtl->profile))
11182 {
11183 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
11184
11185 if (alt_pic_reg_used != INVALID_REGNUM)
11186 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
11187
11188 pic_reg_used = true;
11189 }
11190
11191 if (pic_reg_used)
11192 {
11193 if (TARGET_64BIT)
11194 {
11195 if (ix86_cmodel == CM_LARGE_PIC)
11196 {
11197 rtx label, tmp_reg;
11198
11199 gcc_assert (Pmode == DImode);
11200 label = gen_label_rtx ();
11201 emit_label (label);
11202 LABEL_PRESERVE_P (label) = 1;
11203 tmp_reg = gen_rtx_REG (Pmode, R11_REG);
11204 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
11205 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
11206 label));
11207 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
11208 insn = emit_insn (ix86_gen_add3 (pic_offset_table_rtx,
11209 pic_offset_table_rtx, tmp_reg));
11210 }
11211 else
11212 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
11213 }
11214 else
11215 {
11216 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
11217 RTX_FRAME_RELATED_P (insn) = 1;
11218 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
11219 }
11220 }
11221
11222 /* In the pic_reg_used case, make sure that the got load isn't deleted
11223 when mcount needs it. Blockage to avoid call movement across mcount
11224 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
11225 note. */
11226 if (crtl->profile && !flag_fentry && pic_reg_used)
11227 emit_insn (gen_prologue_use (pic_offset_table_rtx));
11228
11229 if (crtl->drap_reg && !crtl->stack_realign_needed)
11230 {
11231 /* vDRAP is setup but after reload it turns out stack realign
11232 isn't necessary, here we will emit prologue to setup DRAP
11233 without stack realign adjustment */
11234 t = choose_baseaddr (0);
11235 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
11236 }
11237
11238 /* Prevent instructions from being scheduled into register save push
11239 sequence when access to the redzone area is done through frame pointer.
11240 The offset between the frame pointer and the stack pointer is calculated
11241 relative to the value of the stack pointer at the end of the function
11242 prologue, and moving instructions that access redzone area via frame
11243 pointer inside push sequence violates this assumption. */
11244 if (frame_pointer_needed && frame.red_zone_size)
11245 emit_insn (gen_memory_blockage ());
11246
11247 /* Emit cld instruction if stringops are used in the function. */
11248 if (TARGET_CLD && ix86_current_function_needs_cld)
11249 emit_insn (gen_cld ());
11250
11251 /* SEH requires that the prologue end within 256 bytes of the start of
11252 the function. Prevent instruction schedules that would extend that.
11253 Further, prevent alloca modifications to the stack pointer from being
11254 combined with prologue modifications. */
11255 if (TARGET_SEH)
11256 emit_insn (gen_prologue_use (stack_pointer_rtx));
11257 }
11258
11259 /* Emit code to restore REG using a POP insn. */
11260
11261 static void
11262 ix86_emit_restore_reg_using_pop (rtx reg)
11263 {
11264 struct machine_function *m = cfun->machine;
11265 rtx insn = emit_insn (gen_pop (reg));
11266
11267 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
11268 m->fs.sp_offset -= UNITS_PER_WORD;
11269
11270 if (m->fs.cfa_reg == crtl->drap_reg
11271 && REGNO (reg) == REGNO (crtl->drap_reg))
11272 {
11273 /* Previously we'd represented the CFA as an expression
11274 like *(%ebp - 8). We've just popped that value from
11275 the stack, which means we need to reset the CFA to
11276 the drap register. This will remain until we restore
11277 the stack pointer. */
11278 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11279 RTX_FRAME_RELATED_P (insn) = 1;
11280
11281 /* This means that the DRAP register is valid for addressing too. */
11282 m->fs.drap_valid = true;
11283 return;
11284 }
11285
11286 if (m->fs.cfa_reg == stack_pointer_rtx)
11287 {
11288 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
11289 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
11290 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
11291 RTX_FRAME_RELATED_P (insn) = 1;
11292
11293 m->fs.cfa_offset -= UNITS_PER_WORD;
11294 }
11295
11296 /* When the frame pointer is the CFA, and we pop it, we are
11297 swapping back to the stack pointer as the CFA. This happens
11298 for stack frames that don't allocate other data, so we assume
11299 the stack pointer is now pointing at the return address, i.e.
11300 the function entry state, which makes the offset be 1 word. */
11301 if (reg == hard_frame_pointer_rtx)
11302 {
11303 m->fs.fp_valid = false;
11304 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11305 {
11306 m->fs.cfa_reg = stack_pointer_rtx;
11307 m->fs.cfa_offset -= UNITS_PER_WORD;
11308
11309 add_reg_note (insn, REG_CFA_DEF_CFA,
11310 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11311 GEN_INT (m->fs.cfa_offset)));
11312 RTX_FRAME_RELATED_P (insn) = 1;
11313 }
11314 }
11315 }
11316
11317 /* Emit code to restore saved registers using POP insns. */
11318
11319 static void
11320 ix86_emit_restore_regs_using_pop (void)
11321 {
11322 unsigned int regno;
11323
11324 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11325 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
11326 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
11327 }
11328
11329 /* Emit code and notes for the LEAVE instruction. */
11330
11331 static void
11332 ix86_emit_leave (void)
11333 {
11334 struct machine_function *m = cfun->machine;
11335 rtx insn = emit_insn (ix86_gen_leave ());
11336
11337 ix86_add_queued_cfa_restore_notes (insn);
11338
11339 gcc_assert (m->fs.fp_valid);
11340 m->fs.sp_valid = true;
11341 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
11342 m->fs.fp_valid = false;
11343
11344 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11345 {
11346 m->fs.cfa_reg = stack_pointer_rtx;
11347 m->fs.cfa_offset = m->fs.sp_offset;
11348
11349 add_reg_note (insn, REG_CFA_DEF_CFA,
11350 plus_constant (Pmode, stack_pointer_rtx,
11351 m->fs.sp_offset));
11352 RTX_FRAME_RELATED_P (insn) = 1;
11353 }
11354 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
11355 m->fs.fp_offset);
11356 }
11357
11358 /* Emit code to restore saved registers using MOV insns.
11359 First register is restored from CFA - CFA_OFFSET. */
11360 static void
11361 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
11362 bool maybe_eh_return)
11363 {
11364 struct machine_function *m = cfun->machine;
11365 unsigned int regno;
11366
11367 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11368 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11369 {
11370 rtx reg = gen_rtx_REG (word_mode, regno);
11371 rtx insn, mem;
11372
11373 mem = choose_baseaddr (cfa_offset);
11374 mem = gen_frame_mem (word_mode, mem);
11375 insn = emit_move_insn (reg, mem);
11376
11377 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
11378 {
11379 /* Previously we'd represented the CFA as an expression
11380 like *(%ebp - 8). We've just popped that value from
11381 the stack, which means we need to reset the CFA to
11382 the drap register. This will remain until we restore
11383 the stack pointer. */
11384 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11385 RTX_FRAME_RELATED_P (insn) = 1;
11386
11387 /* This means that the DRAP register is valid for addressing. */
11388 m->fs.drap_valid = true;
11389 }
11390 else
11391 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11392
11393 cfa_offset -= UNITS_PER_WORD;
11394 }
11395 }
11396
11397 /* Emit code to restore saved registers using MOV insns.
11398 First register is restored from CFA - CFA_OFFSET. */
11399 static void
11400 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
11401 bool maybe_eh_return)
11402 {
11403 unsigned int regno;
11404
11405 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11406 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11407 {
11408 rtx reg = gen_rtx_REG (V4SFmode, regno);
11409 rtx mem;
11410
11411 mem = choose_baseaddr (cfa_offset);
11412 mem = gen_rtx_MEM (V4SFmode, mem);
11413 set_mem_align (mem, 128);
11414 emit_move_insn (reg, mem);
11415
11416 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11417
11418 cfa_offset -= 16;
11419 }
11420 }
11421
11422 /* Restore function stack, frame, and registers. */
11423
11424 void
11425 ix86_expand_epilogue (int style)
11426 {
11427 struct machine_function *m = cfun->machine;
11428 struct machine_frame_state frame_state_save = m->fs;
11429 struct ix86_frame frame;
11430 bool restore_regs_via_mov;
11431 bool using_drap;
11432
11433 ix86_finalize_stack_realign_flags ();
11434 ix86_compute_frame_layout (&frame);
11435
11436 m->fs.sp_valid = (!frame_pointer_needed
11437 || (crtl->sp_is_unchanging
11438 && !stack_realign_fp));
11439 gcc_assert (!m->fs.sp_valid
11440 || m->fs.sp_offset == frame.stack_pointer_offset);
11441
11442 /* The FP must be valid if the frame pointer is present. */
11443 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
11444 gcc_assert (!m->fs.fp_valid
11445 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
11446
11447 /* We must have *some* valid pointer to the stack frame. */
11448 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
11449
11450 /* The DRAP is never valid at this point. */
11451 gcc_assert (!m->fs.drap_valid);
11452
11453 /* See the comment about red zone and frame
11454 pointer usage in ix86_expand_prologue. */
11455 if (frame_pointer_needed && frame.red_zone_size)
11456 emit_insn (gen_memory_blockage ());
11457
11458 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
11459 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
11460
11461 /* Determine the CFA offset of the end of the red-zone. */
11462 m->fs.red_zone_offset = 0;
11463 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
11464 {
11465 /* The red-zone begins below the return address. */
11466 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
11467
11468 /* When the register save area is in the aligned portion of
11469 the stack, determine the maximum runtime displacement that
11470 matches up with the aligned frame. */
11471 if (stack_realign_drap)
11472 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
11473 + UNITS_PER_WORD);
11474 }
11475
11476 /* Special care must be taken for the normal return case of a function
11477 using eh_return: the eax and edx registers are marked as saved, but
11478 not restored along this path. Adjust the save location to match. */
11479 if (crtl->calls_eh_return && style != 2)
11480 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
11481
11482 /* EH_RETURN requires the use of moves to function properly. */
11483 if (crtl->calls_eh_return)
11484 restore_regs_via_mov = true;
11485 /* SEH requires the use of pops to identify the epilogue. */
11486 else if (TARGET_SEH)
11487 restore_regs_via_mov = false;
11488 /* If we're only restoring one register and sp is not valid then
11489 using a move instruction to restore the register since it's
11490 less work than reloading sp and popping the register. */
11491 else if (!m->fs.sp_valid && frame.nregs <= 1)
11492 restore_regs_via_mov = true;
11493 else if (TARGET_EPILOGUE_USING_MOVE
11494 && cfun->machine->use_fast_prologue_epilogue
11495 && (frame.nregs > 1
11496 || m->fs.sp_offset != frame.reg_save_offset))
11497 restore_regs_via_mov = true;
11498 else if (frame_pointer_needed
11499 && !frame.nregs
11500 && m->fs.sp_offset != frame.reg_save_offset)
11501 restore_regs_via_mov = true;
11502 else if (frame_pointer_needed
11503 && TARGET_USE_LEAVE
11504 && cfun->machine->use_fast_prologue_epilogue
11505 && frame.nregs == 1)
11506 restore_regs_via_mov = true;
11507 else
11508 restore_regs_via_mov = false;
11509
11510 if (restore_regs_via_mov || frame.nsseregs)
11511 {
11512 /* Ensure that the entire register save area is addressable via
11513 the stack pointer, if we will restore via sp. */
11514 if (TARGET_64BIT
11515 && m->fs.sp_offset > 0x7fffffff
11516 && !(m->fs.fp_valid || m->fs.drap_valid)
11517 && (frame.nsseregs + frame.nregs) != 0)
11518 {
11519 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11520 GEN_INT (m->fs.sp_offset
11521 - frame.sse_reg_save_offset),
11522 style,
11523 m->fs.cfa_reg == stack_pointer_rtx);
11524 }
11525 }
11526
11527 /* If there are any SSE registers to restore, then we have to do it
11528 via moves, since there's obviously no pop for SSE regs. */
11529 if (frame.nsseregs)
11530 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
11531 style == 2);
11532
11533 if (restore_regs_via_mov)
11534 {
11535 rtx t;
11536
11537 if (frame.nregs)
11538 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
11539
11540 /* eh_return epilogues need %ecx added to the stack pointer. */
11541 if (style == 2)
11542 {
11543 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
11544
11545 /* Stack align doesn't work with eh_return. */
11546 gcc_assert (!stack_realign_drap);
11547 /* Neither does regparm nested functions. */
11548 gcc_assert (!ix86_static_chain_on_stack);
11549
11550 if (frame_pointer_needed)
11551 {
11552 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
11553 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
11554 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
11555
11556 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
11557 insn = emit_move_insn (hard_frame_pointer_rtx, t);
11558
11559 /* Note that we use SA as a temporary CFA, as the return
11560 address is at the proper place relative to it. We
11561 pretend this happens at the FP restore insn because
11562 prior to this insn the FP would be stored at the wrong
11563 offset relative to SA, and after this insn we have no
11564 other reasonable register to use for the CFA. We don't
11565 bother resetting the CFA to the SP for the duration of
11566 the return insn. */
11567 add_reg_note (insn, REG_CFA_DEF_CFA,
11568 plus_constant (Pmode, sa, UNITS_PER_WORD));
11569 ix86_add_queued_cfa_restore_notes (insn);
11570 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
11571 RTX_FRAME_RELATED_P (insn) = 1;
11572
11573 m->fs.cfa_reg = sa;
11574 m->fs.cfa_offset = UNITS_PER_WORD;
11575 m->fs.fp_valid = false;
11576
11577 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
11578 const0_rtx, style, false);
11579 }
11580 else
11581 {
11582 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
11583 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
11584 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
11585 ix86_add_queued_cfa_restore_notes (insn);
11586
11587 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
11588 if (m->fs.cfa_offset != UNITS_PER_WORD)
11589 {
11590 m->fs.cfa_offset = UNITS_PER_WORD;
11591 add_reg_note (insn, REG_CFA_DEF_CFA,
11592 plus_constant (Pmode, stack_pointer_rtx,
11593 UNITS_PER_WORD));
11594 RTX_FRAME_RELATED_P (insn) = 1;
11595 }
11596 }
11597 m->fs.sp_offset = UNITS_PER_WORD;
11598 m->fs.sp_valid = true;
11599 }
11600 }
11601 else
11602 {
11603 /* SEH requires that the function end with (1) a stack adjustment
11604 if necessary, (2) a sequence of pops, and (3) a return or
11605 jump instruction. Prevent insns from the function body from
11606 being scheduled into this sequence. */
11607 if (TARGET_SEH)
11608 {
11609 /* Prevent a catch region from being adjacent to the standard
11610 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
11611 several other flags that would be interesting to test are
11612 not yet set up. */
11613 if (flag_non_call_exceptions)
11614 emit_insn (gen_nops (const1_rtx));
11615 else
11616 emit_insn (gen_blockage ());
11617 }
11618
11619 /* First step is to deallocate the stack frame so that we can
11620 pop the registers. Also do it on SEH target for very large
11621 frame as the emitted instructions aren't allowed by the ABI in
11622 epilogues. */
11623 if (!m->fs.sp_valid
11624 || (TARGET_SEH
11625 && (m->fs.sp_offset - frame.reg_save_offset
11626 >= SEH_MAX_FRAME_SIZE)))
11627 {
11628 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
11629 GEN_INT (m->fs.fp_offset
11630 - frame.reg_save_offset),
11631 style, false);
11632 }
11633 else if (m->fs.sp_offset != frame.reg_save_offset)
11634 {
11635 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11636 GEN_INT (m->fs.sp_offset
11637 - frame.reg_save_offset),
11638 style,
11639 m->fs.cfa_reg == stack_pointer_rtx);
11640 }
11641
11642 ix86_emit_restore_regs_using_pop ();
11643 }
11644
11645 /* If we used a stack pointer and haven't already got rid of it,
11646 then do so now. */
11647 if (m->fs.fp_valid)
11648 {
11649 /* If the stack pointer is valid and pointing at the frame
11650 pointer store address, then we only need a pop. */
11651 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
11652 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
11653 /* Leave results in shorter dependency chains on CPUs that are
11654 able to grok it fast. */
11655 else if (TARGET_USE_LEAVE
11656 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
11657 || !cfun->machine->use_fast_prologue_epilogue)
11658 ix86_emit_leave ();
11659 else
11660 {
11661 pro_epilogue_adjust_stack (stack_pointer_rtx,
11662 hard_frame_pointer_rtx,
11663 const0_rtx, style, !using_drap);
11664 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
11665 }
11666 }
11667
11668 if (using_drap)
11669 {
11670 int param_ptr_offset = UNITS_PER_WORD;
11671 rtx insn;
11672
11673 gcc_assert (stack_realign_drap);
11674
11675 if (ix86_static_chain_on_stack)
11676 param_ptr_offset += UNITS_PER_WORD;
11677 if (!call_used_regs[REGNO (crtl->drap_reg)])
11678 param_ptr_offset += UNITS_PER_WORD;
11679
11680 insn = emit_insn (gen_rtx_SET
11681 (VOIDmode, stack_pointer_rtx,
11682 gen_rtx_PLUS (Pmode,
11683 crtl->drap_reg,
11684 GEN_INT (-param_ptr_offset))));
11685 m->fs.cfa_reg = stack_pointer_rtx;
11686 m->fs.cfa_offset = param_ptr_offset;
11687 m->fs.sp_offset = param_ptr_offset;
11688 m->fs.realigned = false;
11689
11690 add_reg_note (insn, REG_CFA_DEF_CFA,
11691 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11692 GEN_INT (param_ptr_offset)));
11693 RTX_FRAME_RELATED_P (insn) = 1;
11694
11695 if (!call_used_regs[REGNO (crtl->drap_reg)])
11696 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
11697 }
11698
11699 /* At this point the stack pointer must be valid, and we must have
11700 restored all of the registers. We may not have deallocated the
11701 entire stack frame. We've delayed this until now because it may
11702 be possible to merge the local stack deallocation with the
11703 deallocation forced by ix86_static_chain_on_stack. */
11704 gcc_assert (m->fs.sp_valid);
11705 gcc_assert (!m->fs.fp_valid);
11706 gcc_assert (!m->fs.realigned);
11707 if (m->fs.sp_offset != UNITS_PER_WORD)
11708 {
11709 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11710 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
11711 style, true);
11712 }
11713 else
11714 ix86_add_queued_cfa_restore_notes (get_last_insn ());
11715
11716 /* Sibcall epilogues don't want a return instruction. */
11717 if (style == 0)
11718 {
11719 m->fs = frame_state_save;
11720 return;
11721 }
11722
11723 if (crtl->args.pops_args && crtl->args.size)
11724 {
11725 rtx popc = GEN_INT (crtl->args.pops_args);
11726
11727 /* i386 can only pop 64K bytes. If asked to pop more, pop return
11728 address, do explicit add, and jump indirectly to the caller. */
11729
11730 if (crtl->args.pops_args >= 65536)
11731 {
11732 rtx ecx = gen_rtx_REG (SImode, CX_REG);
11733 rtx insn;
11734
11735 /* There is no "pascal" calling convention in any 64bit ABI. */
11736 gcc_assert (!TARGET_64BIT);
11737
11738 insn = emit_insn (gen_pop (ecx));
11739 m->fs.cfa_offset -= UNITS_PER_WORD;
11740 m->fs.sp_offset -= UNITS_PER_WORD;
11741
11742 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
11743 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
11744 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
11745 add_reg_note (insn, REG_CFA_REGISTER,
11746 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
11747 RTX_FRAME_RELATED_P (insn) = 1;
11748
11749 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11750 popc, -1, true);
11751 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
11752 }
11753 else
11754 emit_jump_insn (gen_simple_return_pop_internal (popc));
11755 }
11756 else
11757 emit_jump_insn (gen_simple_return_internal ());
11758
11759 /* Restore the state back to the state from the prologue,
11760 so that it's correct for the next epilogue. */
11761 m->fs = frame_state_save;
11762 }
11763
11764 /* Reset from the function's potential modifications. */
11765
11766 static void
11767 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, HOST_WIDE_INT)
11768 {
11769 if (pic_offset_table_rtx)
11770 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
11771 #if TARGET_MACHO
11772 /* Mach-O doesn't support labels at the end of objects, so if
11773 it looks like we might want one, insert a NOP. */
11774 {
11775 rtx insn = get_last_insn ();
11776 rtx deleted_debug_label = NULL_RTX;
11777 while (insn
11778 && NOTE_P (insn)
11779 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
11780 {
11781 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
11782 notes only, instead set their CODE_LABEL_NUMBER to -1,
11783 otherwise there would be code generation differences
11784 in between -g and -g0. */
11785 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
11786 deleted_debug_label = insn;
11787 insn = PREV_INSN (insn);
11788 }
11789 if (insn
11790 && (LABEL_P (insn)
11791 || (NOTE_P (insn)
11792 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
11793 fputs ("\tnop\n", file);
11794 else if (deleted_debug_label)
11795 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
11796 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
11797 CODE_LABEL_NUMBER (insn) = -1;
11798 }
11799 #endif
11800
11801 }
11802
11803 /* Return a scratch register to use in the split stack prologue. The
11804 split stack prologue is used for -fsplit-stack. It is the first
11805 instructions in the function, even before the regular prologue.
11806 The scratch register can be any caller-saved register which is not
11807 used for parameters or for the static chain. */
11808
11809 static unsigned int
11810 split_stack_prologue_scratch_regno (void)
11811 {
11812 if (TARGET_64BIT)
11813 return R11_REG;
11814 else
11815 {
11816 bool is_fastcall, is_thiscall;
11817 int regparm;
11818
11819 is_fastcall = (lookup_attribute ("fastcall",
11820 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
11821 != NULL);
11822 is_thiscall = (lookup_attribute ("thiscall",
11823 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
11824 != NULL);
11825 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
11826
11827 if (is_fastcall)
11828 {
11829 if (DECL_STATIC_CHAIN (cfun->decl))
11830 {
11831 sorry ("-fsplit-stack does not support fastcall with "
11832 "nested function");
11833 return INVALID_REGNUM;
11834 }
11835 return AX_REG;
11836 }
11837 else if (is_thiscall)
11838 {
11839 if (!DECL_STATIC_CHAIN (cfun->decl))
11840 return DX_REG;
11841 return AX_REG;
11842 }
11843 else if (regparm < 3)
11844 {
11845 if (!DECL_STATIC_CHAIN (cfun->decl))
11846 return CX_REG;
11847 else
11848 {
11849 if (regparm >= 2)
11850 {
11851 sorry ("-fsplit-stack does not support 2 register "
11852 "parameters for a nested function");
11853 return INVALID_REGNUM;
11854 }
11855 return DX_REG;
11856 }
11857 }
11858 else
11859 {
11860 /* FIXME: We could make this work by pushing a register
11861 around the addition and comparison. */
11862 sorry ("-fsplit-stack does not support 3 register parameters");
11863 return INVALID_REGNUM;
11864 }
11865 }
11866 }
11867
11868 /* A SYMBOL_REF for the function which allocates new stackspace for
11869 -fsplit-stack. */
11870
11871 static GTY(()) rtx split_stack_fn;
11872
11873 /* A SYMBOL_REF for the more stack function when using the large
11874 model. */
11875
11876 static GTY(()) rtx split_stack_fn_large;
11877
11878 /* Handle -fsplit-stack. These are the first instructions in the
11879 function, even before the regular prologue. */
11880
11881 void
11882 ix86_expand_split_stack_prologue (void)
11883 {
11884 struct ix86_frame frame;
11885 HOST_WIDE_INT allocate;
11886 unsigned HOST_WIDE_INT args_size;
11887 rtx label, limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
11888 rtx scratch_reg = NULL_RTX;
11889 rtx varargs_label = NULL_RTX;
11890 rtx fn;
11891
11892 gcc_assert (flag_split_stack && reload_completed);
11893
11894 ix86_finalize_stack_realign_flags ();
11895 ix86_compute_frame_layout (&frame);
11896 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
11897
11898 /* This is the label we will branch to if we have enough stack
11899 space. We expect the basic block reordering pass to reverse this
11900 branch if optimizing, so that we branch in the unlikely case. */
11901 label = gen_label_rtx ();
11902
11903 /* We need to compare the stack pointer minus the frame size with
11904 the stack boundary in the TCB. The stack boundary always gives
11905 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
11906 can compare directly. Otherwise we need to do an addition. */
11907
11908 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
11909 UNSPEC_STACK_CHECK);
11910 limit = gen_rtx_CONST (Pmode, limit);
11911 limit = gen_rtx_MEM (Pmode, limit);
11912 if (allocate < SPLIT_STACK_AVAILABLE)
11913 current = stack_pointer_rtx;
11914 else
11915 {
11916 unsigned int scratch_regno;
11917 rtx offset;
11918
11919 /* We need a scratch register to hold the stack pointer minus
11920 the required frame size. Since this is the very start of the
11921 function, the scratch register can be any caller-saved
11922 register which is not used for parameters. */
11923 offset = GEN_INT (- allocate);
11924 scratch_regno = split_stack_prologue_scratch_regno ();
11925 if (scratch_regno == INVALID_REGNUM)
11926 return;
11927 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11928 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
11929 {
11930 /* We don't use ix86_gen_add3 in this case because it will
11931 want to split to lea, but when not optimizing the insn
11932 will not be split after this point. */
11933 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11934 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11935 offset)));
11936 }
11937 else
11938 {
11939 emit_move_insn (scratch_reg, offset);
11940 emit_insn (ix86_gen_add3 (scratch_reg, scratch_reg,
11941 stack_pointer_rtx));
11942 }
11943 current = scratch_reg;
11944 }
11945
11946 ix86_expand_branch (GEU, current, limit, label);
11947 jump_insn = get_last_insn ();
11948 JUMP_LABEL (jump_insn) = label;
11949
11950 /* Mark the jump as very likely to be taken. */
11951 add_int_reg_note (jump_insn, REG_BR_PROB,
11952 REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100);
11953
11954 if (split_stack_fn == NULL_RTX)
11955 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
11956 fn = split_stack_fn;
11957
11958 /* Get more stack space. We pass in the desired stack space and the
11959 size of the arguments to copy to the new stack. In 32-bit mode
11960 we push the parameters; __morestack will return on a new stack
11961 anyhow. In 64-bit mode we pass the parameters in r10 and
11962 r11. */
11963 allocate_rtx = GEN_INT (allocate);
11964 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
11965 call_fusage = NULL_RTX;
11966 if (TARGET_64BIT)
11967 {
11968 rtx reg10, reg11;
11969
11970 reg10 = gen_rtx_REG (Pmode, R10_REG);
11971 reg11 = gen_rtx_REG (Pmode, R11_REG);
11972
11973 /* If this function uses a static chain, it will be in %r10.
11974 Preserve it across the call to __morestack. */
11975 if (DECL_STATIC_CHAIN (cfun->decl))
11976 {
11977 rtx rax;
11978
11979 rax = gen_rtx_REG (word_mode, AX_REG);
11980 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
11981 use_reg (&call_fusage, rax);
11982 }
11983
11984 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
11985 && !TARGET_PECOFF)
11986 {
11987 HOST_WIDE_INT argval;
11988
11989 gcc_assert (Pmode == DImode);
11990 /* When using the large model we need to load the address
11991 into a register, and we've run out of registers. So we
11992 switch to a different calling convention, and we call a
11993 different function: __morestack_large. We pass the
11994 argument size in the upper 32 bits of r10 and pass the
11995 frame size in the lower 32 bits. */
11996 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
11997 gcc_assert ((args_size & 0xffffffff) == args_size);
11998
11999 if (split_stack_fn_large == NULL_RTX)
12000 split_stack_fn_large =
12001 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
12002
12003 if (ix86_cmodel == CM_LARGE_PIC)
12004 {
12005 rtx label, x;
12006
12007 label = gen_label_rtx ();
12008 emit_label (label);
12009 LABEL_PRESERVE_P (label) = 1;
12010 emit_insn (gen_set_rip_rex64 (reg10, label));
12011 emit_insn (gen_set_got_offset_rex64 (reg11, label));
12012 emit_insn (ix86_gen_add3 (reg10, reg10, reg11));
12013 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
12014 UNSPEC_GOT);
12015 x = gen_rtx_CONST (Pmode, x);
12016 emit_move_insn (reg11, x);
12017 x = gen_rtx_PLUS (Pmode, reg10, reg11);
12018 x = gen_const_mem (Pmode, x);
12019 emit_move_insn (reg11, x);
12020 }
12021 else
12022 emit_move_insn (reg11, split_stack_fn_large);
12023
12024 fn = reg11;
12025
12026 argval = ((args_size << 16) << 16) + allocate;
12027 emit_move_insn (reg10, GEN_INT (argval));
12028 }
12029 else
12030 {
12031 emit_move_insn (reg10, allocate_rtx);
12032 emit_move_insn (reg11, GEN_INT (args_size));
12033 use_reg (&call_fusage, reg11);
12034 }
12035
12036 use_reg (&call_fusage, reg10);
12037 }
12038 else
12039 {
12040 emit_insn (gen_push (GEN_INT (args_size)));
12041 emit_insn (gen_push (allocate_rtx));
12042 }
12043 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
12044 GEN_INT (UNITS_PER_WORD), constm1_rtx,
12045 NULL_RTX, false);
12046 add_function_usage_to (call_insn, call_fusage);
12047
12048 /* In order to make call/return prediction work right, we now need
12049 to execute a return instruction. See
12050 libgcc/config/i386/morestack.S for the details on how this works.
12051
12052 For flow purposes gcc must not see this as a return
12053 instruction--we need control flow to continue at the subsequent
12054 label. Therefore, we use an unspec. */
12055 gcc_assert (crtl->args.pops_args < 65536);
12056 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
12057
12058 /* If we are in 64-bit mode and this function uses a static chain,
12059 we saved %r10 in %rax before calling _morestack. */
12060 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
12061 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
12062 gen_rtx_REG (word_mode, AX_REG));
12063
12064 /* If this function calls va_start, we need to store a pointer to
12065 the arguments on the old stack, because they may not have been
12066 all copied to the new stack. At this point the old stack can be
12067 found at the frame pointer value used by __morestack, because
12068 __morestack has set that up before calling back to us. Here we
12069 store that pointer in a scratch register, and in
12070 ix86_expand_prologue we store the scratch register in a stack
12071 slot. */
12072 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12073 {
12074 unsigned int scratch_regno;
12075 rtx frame_reg;
12076 int words;
12077
12078 scratch_regno = split_stack_prologue_scratch_regno ();
12079 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12080 frame_reg = gen_rtx_REG (Pmode, BP_REG);
12081
12082 /* 64-bit:
12083 fp -> old fp value
12084 return address within this function
12085 return address of caller of this function
12086 stack arguments
12087 So we add three words to get to the stack arguments.
12088
12089 32-bit:
12090 fp -> old fp value
12091 return address within this function
12092 first argument to __morestack
12093 second argument to __morestack
12094 return address of caller of this function
12095 stack arguments
12096 So we add five words to get to the stack arguments.
12097 */
12098 words = TARGET_64BIT ? 3 : 5;
12099 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12100 gen_rtx_PLUS (Pmode, frame_reg,
12101 GEN_INT (words * UNITS_PER_WORD))));
12102
12103 varargs_label = gen_label_rtx ();
12104 emit_jump_insn (gen_jump (varargs_label));
12105 JUMP_LABEL (get_last_insn ()) = varargs_label;
12106
12107 emit_barrier ();
12108 }
12109
12110 emit_label (label);
12111 LABEL_NUSES (label) = 1;
12112
12113 /* If this function calls va_start, we now have to set the scratch
12114 register for the case where we do not call __morestack. In this
12115 case we need to set it based on the stack pointer. */
12116 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12117 {
12118 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12119 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12120 GEN_INT (UNITS_PER_WORD))));
12121
12122 emit_label (varargs_label);
12123 LABEL_NUSES (varargs_label) = 1;
12124 }
12125 }
12126
12127 /* We may have to tell the dataflow pass that the split stack prologue
12128 is initializing a scratch register. */
12129
12130 static void
12131 ix86_live_on_entry (bitmap regs)
12132 {
12133 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12134 {
12135 gcc_assert (flag_split_stack);
12136 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
12137 }
12138 }
12139 \f
12140 /* Extract the parts of an RTL expression that is a valid memory address
12141 for an instruction. Return 0 if the structure of the address is
12142 grossly off. Return -1 if the address contains ASHIFT, so it is not
12143 strictly valid, but still used for computing length of lea instruction. */
12144
12145 int
12146 ix86_decompose_address (rtx addr, struct ix86_address *out)
12147 {
12148 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
12149 rtx base_reg, index_reg;
12150 HOST_WIDE_INT scale = 1;
12151 rtx scale_rtx = NULL_RTX;
12152 rtx tmp;
12153 int retval = 1;
12154 enum ix86_address_seg seg = SEG_DEFAULT;
12155
12156 /* Allow zero-extended SImode addresses,
12157 they will be emitted with addr32 prefix. */
12158 if (TARGET_64BIT && GET_MODE (addr) == DImode)
12159 {
12160 if (GET_CODE (addr) == ZERO_EXTEND
12161 && GET_MODE (XEXP (addr, 0)) == SImode)
12162 {
12163 addr = XEXP (addr, 0);
12164 if (CONST_INT_P (addr))
12165 return 0;
12166 }
12167 else if (GET_CODE (addr) == AND
12168 && const_32bit_mask (XEXP (addr, 1), DImode))
12169 {
12170 addr = simplify_gen_subreg (SImode, XEXP (addr, 0), DImode, 0);
12171 if (addr == NULL_RTX)
12172 return 0;
12173
12174 if (CONST_INT_P (addr))
12175 return 0;
12176 }
12177 }
12178
12179 /* Allow SImode subregs of DImode addresses,
12180 they will be emitted with addr32 prefix. */
12181 if (TARGET_64BIT && GET_MODE (addr) == SImode)
12182 {
12183 if (GET_CODE (addr) == SUBREG
12184 && GET_MODE (SUBREG_REG (addr)) == DImode)
12185 {
12186 addr = SUBREG_REG (addr);
12187 if (CONST_INT_P (addr))
12188 return 0;
12189 }
12190 }
12191
12192 if (REG_P (addr))
12193 base = addr;
12194 else if (GET_CODE (addr) == SUBREG)
12195 {
12196 if (REG_P (SUBREG_REG (addr)))
12197 base = addr;
12198 else
12199 return 0;
12200 }
12201 else if (GET_CODE (addr) == PLUS)
12202 {
12203 rtx addends[4], op;
12204 int n = 0, i;
12205
12206 op = addr;
12207 do
12208 {
12209 if (n >= 4)
12210 return 0;
12211 addends[n++] = XEXP (op, 1);
12212 op = XEXP (op, 0);
12213 }
12214 while (GET_CODE (op) == PLUS);
12215 if (n >= 4)
12216 return 0;
12217 addends[n] = op;
12218
12219 for (i = n; i >= 0; --i)
12220 {
12221 op = addends[i];
12222 switch (GET_CODE (op))
12223 {
12224 case MULT:
12225 if (index)
12226 return 0;
12227 index = XEXP (op, 0);
12228 scale_rtx = XEXP (op, 1);
12229 break;
12230
12231 case ASHIFT:
12232 if (index)
12233 return 0;
12234 index = XEXP (op, 0);
12235 tmp = XEXP (op, 1);
12236 if (!CONST_INT_P (tmp))
12237 return 0;
12238 scale = INTVAL (tmp);
12239 if ((unsigned HOST_WIDE_INT) scale > 3)
12240 return 0;
12241 scale = 1 << scale;
12242 break;
12243
12244 case ZERO_EXTEND:
12245 op = XEXP (op, 0);
12246 if (GET_CODE (op) != UNSPEC)
12247 return 0;
12248 /* FALLTHRU */
12249
12250 case UNSPEC:
12251 if (XINT (op, 1) == UNSPEC_TP
12252 && TARGET_TLS_DIRECT_SEG_REFS
12253 && seg == SEG_DEFAULT)
12254 seg = DEFAULT_TLS_SEG_REG;
12255 else
12256 return 0;
12257 break;
12258
12259 case SUBREG:
12260 if (!REG_P (SUBREG_REG (op)))
12261 return 0;
12262 /* FALLTHRU */
12263
12264 case REG:
12265 if (!base)
12266 base = op;
12267 else if (!index)
12268 index = op;
12269 else
12270 return 0;
12271 break;
12272
12273 case CONST:
12274 case CONST_INT:
12275 case SYMBOL_REF:
12276 case LABEL_REF:
12277 if (disp)
12278 return 0;
12279 disp = op;
12280 break;
12281
12282 default:
12283 return 0;
12284 }
12285 }
12286 }
12287 else if (GET_CODE (addr) == MULT)
12288 {
12289 index = XEXP (addr, 0); /* index*scale */
12290 scale_rtx = XEXP (addr, 1);
12291 }
12292 else if (GET_CODE (addr) == ASHIFT)
12293 {
12294 /* We're called for lea too, which implements ashift on occasion. */
12295 index = XEXP (addr, 0);
12296 tmp = XEXP (addr, 1);
12297 if (!CONST_INT_P (tmp))
12298 return 0;
12299 scale = INTVAL (tmp);
12300 if ((unsigned HOST_WIDE_INT) scale > 3)
12301 return 0;
12302 scale = 1 << scale;
12303 retval = -1;
12304 }
12305 else
12306 disp = addr; /* displacement */
12307
12308 if (index)
12309 {
12310 if (REG_P (index))
12311 ;
12312 else if (GET_CODE (index) == SUBREG
12313 && REG_P (SUBREG_REG (index)))
12314 ;
12315 else
12316 return 0;
12317 }
12318
12319 /* Extract the integral value of scale. */
12320 if (scale_rtx)
12321 {
12322 if (!CONST_INT_P (scale_rtx))
12323 return 0;
12324 scale = INTVAL (scale_rtx);
12325 }
12326
12327 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
12328 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
12329
12330 /* Avoid useless 0 displacement. */
12331 if (disp == const0_rtx && (base || index))
12332 disp = NULL_RTX;
12333
12334 /* Allow arg pointer and stack pointer as index if there is not scaling. */
12335 if (base_reg && index_reg && scale == 1
12336 && (index_reg == arg_pointer_rtx
12337 || index_reg == frame_pointer_rtx
12338 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
12339 {
12340 rtx tmp;
12341 tmp = base, base = index, index = tmp;
12342 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
12343 }
12344
12345 /* Special case: %ebp cannot be encoded as a base without a displacement.
12346 Similarly %r13. */
12347 if (!disp
12348 && base_reg
12349 && (base_reg == hard_frame_pointer_rtx
12350 || base_reg == frame_pointer_rtx
12351 || base_reg == arg_pointer_rtx
12352 || (REG_P (base_reg)
12353 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
12354 || REGNO (base_reg) == R13_REG))))
12355 disp = const0_rtx;
12356
12357 /* Special case: on K6, [%esi] makes the instruction vector decoded.
12358 Avoid this by transforming to [%esi+0].
12359 Reload calls address legitimization without cfun defined, so we need
12360 to test cfun for being non-NULL. */
12361 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
12362 && base_reg && !index_reg && !disp
12363 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
12364 disp = const0_rtx;
12365
12366 /* Special case: encode reg+reg instead of reg*2. */
12367 if (!base && index && scale == 2)
12368 base = index, base_reg = index_reg, scale = 1;
12369
12370 /* Special case: scaling cannot be encoded without base or displacement. */
12371 if (!base && !disp && index && scale != 1)
12372 disp = const0_rtx;
12373
12374 out->base = base;
12375 out->index = index;
12376 out->disp = disp;
12377 out->scale = scale;
12378 out->seg = seg;
12379
12380 return retval;
12381 }
12382 \f
12383 /* Return cost of the memory address x.
12384 For i386, it is better to use a complex address than let gcc copy
12385 the address into a reg and make a new pseudo. But not if the address
12386 requires to two regs - that would mean more pseudos with longer
12387 lifetimes. */
12388 static int
12389 ix86_address_cost (rtx x, enum machine_mode, addr_space_t, bool)
12390 {
12391 struct ix86_address parts;
12392 int cost = 1;
12393 int ok = ix86_decompose_address (x, &parts);
12394
12395 gcc_assert (ok);
12396
12397 if (parts.base && GET_CODE (parts.base) == SUBREG)
12398 parts.base = SUBREG_REG (parts.base);
12399 if (parts.index && GET_CODE (parts.index) == SUBREG)
12400 parts.index = SUBREG_REG (parts.index);
12401
12402 /* Attempt to minimize number of registers in the address. */
12403 if ((parts.base
12404 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
12405 || (parts.index
12406 && (!REG_P (parts.index)
12407 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
12408 cost++;
12409
12410 if (parts.base
12411 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
12412 && parts.index
12413 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
12414 && parts.base != parts.index)
12415 cost++;
12416
12417 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
12418 since it's predecode logic can't detect the length of instructions
12419 and it degenerates to vector decoded. Increase cost of such
12420 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
12421 to split such addresses or even refuse such addresses at all.
12422
12423 Following addressing modes are affected:
12424 [base+scale*index]
12425 [scale*index+disp]
12426 [base+index]
12427
12428 The first and last case may be avoidable by explicitly coding the zero in
12429 memory address, but I don't have AMD-K6 machine handy to check this
12430 theory. */
12431
12432 if (TARGET_K6
12433 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
12434 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
12435 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
12436 cost += 10;
12437
12438 return cost;
12439 }
12440 \f
12441 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
12442 this is used for to form addresses to local data when -fPIC is in
12443 use. */
12444
12445 static bool
12446 darwin_local_data_pic (rtx disp)
12447 {
12448 return (GET_CODE (disp) == UNSPEC
12449 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
12450 }
12451
12452 /* Determine if a given RTX is a valid constant. We already know this
12453 satisfies CONSTANT_P. */
12454
12455 static bool
12456 ix86_legitimate_constant_p (enum machine_mode, rtx x)
12457 {
12458 switch (GET_CODE (x))
12459 {
12460 case CONST:
12461 x = XEXP (x, 0);
12462
12463 if (GET_CODE (x) == PLUS)
12464 {
12465 if (!CONST_INT_P (XEXP (x, 1)))
12466 return false;
12467 x = XEXP (x, 0);
12468 }
12469
12470 if (TARGET_MACHO && darwin_local_data_pic (x))
12471 return true;
12472
12473 /* Only some unspecs are valid as "constants". */
12474 if (GET_CODE (x) == UNSPEC)
12475 switch (XINT (x, 1))
12476 {
12477 case UNSPEC_GOT:
12478 case UNSPEC_GOTOFF:
12479 case UNSPEC_PLTOFF:
12480 return TARGET_64BIT;
12481 case UNSPEC_TPOFF:
12482 case UNSPEC_NTPOFF:
12483 x = XVECEXP (x, 0, 0);
12484 return (GET_CODE (x) == SYMBOL_REF
12485 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
12486 case UNSPEC_DTPOFF:
12487 x = XVECEXP (x, 0, 0);
12488 return (GET_CODE (x) == SYMBOL_REF
12489 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
12490 default:
12491 return false;
12492 }
12493
12494 /* We must have drilled down to a symbol. */
12495 if (GET_CODE (x) == LABEL_REF)
12496 return true;
12497 if (GET_CODE (x) != SYMBOL_REF)
12498 return false;
12499 /* FALLTHRU */
12500
12501 case SYMBOL_REF:
12502 /* TLS symbols are never valid. */
12503 if (SYMBOL_REF_TLS_MODEL (x))
12504 return false;
12505
12506 /* DLLIMPORT symbols are never valid. */
12507 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12508 && SYMBOL_REF_DLLIMPORT_P (x))
12509 return false;
12510
12511 #if TARGET_MACHO
12512 /* mdynamic-no-pic */
12513 if (MACHO_DYNAMIC_NO_PIC_P)
12514 return machopic_symbol_defined_p (x);
12515 #endif
12516 break;
12517
12518 case CONST_DOUBLE:
12519 if (GET_MODE (x) == TImode
12520 && x != CONST0_RTX (TImode)
12521 && !TARGET_64BIT)
12522 return false;
12523 break;
12524
12525 case CONST_VECTOR:
12526 if (!standard_sse_constant_p (x))
12527 return false;
12528
12529 default:
12530 break;
12531 }
12532
12533 /* Otherwise we handle everything else in the move patterns. */
12534 return true;
12535 }
12536
12537 /* Determine if it's legal to put X into the constant pool. This
12538 is not possible for the address of thread-local symbols, which
12539 is checked above. */
12540
12541 static bool
12542 ix86_cannot_force_const_mem (enum machine_mode mode, rtx x)
12543 {
12544 /* We can always put integral constants and vectors in memory. */
12545 switch (GET_CODE (x))
12546 {
12547 case CONST_INT:
12548 case CONST_DOUBLE:
12549 case CONST_VECTOR:
12550 return false;
12551
12552 default:
12553 break;
12554 }
12555 return !ix86_legitimate_constant_p (mode, x);
12556 }
12557
12558 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
12559 otherwise zero. */
12560
12561 static bool
12562 is_imported_p (rtx x)
12563 {
12564 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
12565 || GET_CODE (x) != SYMBOL_REF)
12566 return false;
12567
12568 return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
12569 }
12570
12571
12572 /* Nonzero if the constant value X is a legitimate general operand
12573 when generating PIC code. It is given that flag_pic is on and
12574 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
12575
12576 bool
12577 legitimate_pic_operand_p (rtx x)
12578 {
12579 rtx inner;
12580
12581 switch (GET_CODE (x))
12582 {
12583 case CONST:
12584 inner = XEXP (x, 0);
12585 if (GET_CODE (inner) == PLUS
12586 && CONST_INT_P (XEXP (inner, 1)))
12587 inner = XEXP (inner, 0);
12588
12589 /* Only some unspecs are valid as "constants". */
12590 if (GET_CODE (inner) == UNSPEC)
12591 switch (XINT (inner, 1))
12592 {
12593 case UNSPEC_GOT:
12594 case UNSPEC_GOTOFF:
12595 case UNSPEC_PLTOFF:
12596 return TARGET_64BIT;
12597 case UNSPEC_TPOFF:
12598 x = XVECEXP (inner, 0, 0);
12599 return (GET_CODE (x) == SYMBOL_REF
12600 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
12601 case UNSPEC_MACHOPIC_OFFSET:
12602 return legitimate_pic_address_disp_p (x);
12603 default:
12604 return false;
12605 }
12606 /* FALLTHRU */
12607
12608 case SYMBOL_REF:
12609 case LABEL_REF:
12610 return legitimate_pic_address_disp_p (x);
12611
12612 default:
12613 return true;
12614 }
12615 }
12616
12617 /* Determine if a given CONST RTX is a valid memory displacement
12618 in PIC mode. */
12619
12620 bool
12621 legitimate_pic_address_disp_p (rtx disp)
12622 {
12623 bool saw_plus;
12624
12625 /* In 64bit mode we can allow direct addresses of symbols and labels
12626 when they are not dynamic symbols. */
12627 if (TARGET_64BIT)
12628 {
12629 rtx op0 = disp, op1;
12630
12631 switch (GET_CODE (disp))
12632 {
12633 case LABEL_REF:
12634 return true;
12635
12636 case CONST:
12637 if (GET_CODE (XEXP (disp, 0)) != PLUS)
12638 break;
12639 op0 = XEXP (XEXP (disp, 0), 0);
12640 op1 = XEXP (XEXP (disp, 0), 1);
12641 if (!CONST_INT_P (op1)
12642 || INTVAL (op1) >= 16*1024*1024
12643 || INTVAL (op1) < -16*1024*1024)
12644 break;
12645 if (GET_CODE (op0) == LABEL_REF)
12646 return true;
12647 if (GET_CODE (op0) == CONST
12648 && GET_CODE (XEXP (op0, 0)) == UNSPEC
12649 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
12650 return true;
12651 if (GET_CODE (op0) == UNSPEC
12652 && XINT (op0, 1) == UNSPEC_PCREL)
12653 return true;
12654 if (GET_CODE (op0) != SYMBOL_REF)
12655 break;
12656 /* FALLTHRU */
12657
12658 case SYMBOL_REF:
12659 /* TLS references should always be enclosed in UNSPEC.
12660 The dllimported symbol needs always to be resolved. */
12661 if (SYMBOL_REF_TLS_MODEL (op0)
12662 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
12663 return false;
12664
12665 if (TARGET_PECOFF)
12666 {
12667 if (is_imported_p (op0))
12668 return true;
12669
12670 if (SYMBOL_REF_FAR_ADDR_P (op0)
12671 || !SYMBOL_REF_LOCAL_P (op0))
12672 break;
12673
12674 /* Function-symbols need to be resolved only for
12675 large-model.
12676 For the small-model we don't need to resolve anything
12677 here. */
12678 if ((ix86_cmodel != CM_LARGE_PIC
12679 && SYMBOL_REF_FUNCTION_P (op0))
12680 || ix86_cmodel == CM_SMALL_PIC)
12681 return true;
12682 /* Non-external symbols don't need to be resolved for
12683 large, and medium-model. */
12684 if ((ix86_cmodel == CM_LARGE_PIC
12685 || ix86_cmodel == CM_MEDIUM_PIC)
12686 && !SYMBOL_REF_EXTERNAL_P (op0))
12687 return true;
12688 }
12689 else if (!SYMBOL_REF_FAR_ADDR_P (op0)
12690 && SYMBOL_REF_LOCAL_P (op0)
12691 && ix86_cmodel != CM_LARGE_PIC)
12692 return true;
12693 break;
12694
12695 default:
12696 break;
12697 }
12698 }
12699 if (GET_CODE (disp) != CONST)
12700 return false;
12701 disp = XEXP (disp, 0);
12702
12703 if (TARGET_64BIT)
12704 {
12705 /* We are unsafe to allow PLUS expressions. This limit allowed distance
12706 of GOT tables. We should not need these anyway. */
12707 if (GET_CODE (disp) != UNSPEC
12708 || (XINT (disp, 1) != UNSPEC_GOTPCREL
12709 && XINT (disp, 1) != UNSPEC_GOTOFF
12710 && XINT (disp, 1) != UNSPEC_PCREL
12711 && XINT (disp, 1) != UNSPEC_PLTOFF))
12712 return false;
12713
12714 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
12715 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
12716 return false;
12717 return true;
12718 }
12719
12720 saw_plus = false;
12721 if (GET_CODE (disp) == PLUS)
12722 {
12723 if (!CONST_INT_P (XEXP (disp, 1)))
12724 return false;
12725 disp = XEXP (disp, 0);
12726 saw_plus = true;
12727 }
12728
12729 if (TARGET_MACHO && darwin_local_data_pic (disp))
12730 return true;
12731
12732 if (GET_CODE (disp) != UNSPEC)
12733 return false;
12734
12735 switch (XINT (disp, 1))
12736 {
12737 case UNSPEC_GOT:
12738 if (saw_plus)
12739 return false;
12740 /* We need to check for both symbols and labels because VxWorks loads
12741 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
12742 details. */
12743 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
12744 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
12745 case UNSPEC_GOTOFF:
12746 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
12747 While ABI specify also 32bit relocation but we don't produce it in
12748 small PIC model at all. */
12749 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
12750 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
12751 && !TARGET_64BIT)
12752 return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
12753 return false;
12754 case UNSPEC_GOTTPOFF:
12755 case UNSPEC_GOTNTPOFF:
12756 case UNSPEC_INDNTPOFF:
12757 if (saw_plus)
12758 return false;
12759 disp = XVECEXP (disp, 0, 0);
12760 return (GET_CODE (disp) == SYMBOL_REF
12761 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
12762 case UNSPEC_NTPOFF:
12763 disp = XVECEXP (disp, 0, 0);
12764 return (GET_CODE (disp) == SYMBOL_REF
12765 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
12766 case UNSPEC_DTPOFF:
12767 disp = XVECEXP (disp, 0, 0);
12768 return (GET_CODE (disp) == SYMBOL_REF
12769 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
12770 }
12771
12772 return false;
12773 }
12774
12775 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
12776 replace the input X, or the original X if no replacement is called for.
12777 The output parameter *WIN is 1 if the calling macro should goto WIN,
12778 0 if it should not. */
12779
12780 bool
12781 ix86_legitimize_reload_address (rtx x, enum machine_mode, int opnum, int type,
12782 int)
12783 {
12784 /* Reload can generate:
12785
12786 (plus:DI (plus:DI (unspec:DI [(const_int 0 [0])] UNSPEC_TP)
12787 (reg:DI 97))
12788 (reg:DI 2 cx))
12789
12790 This RTX is rejected from ix86_legitimate_address_p due to
12791 non-strictness of base register 97. Following this rejection,
12792 reload pushes all three components into separate registers,
12793 creating invalid memory address RTX.
12794
12795 Following code reloads only the invalid part of the
12796 memory address RTX. */
12797
12798 if (GET_CODE (x) == PLUS
12799 && REG_P (XEXP (x, 1))
12800 && GET_CODE (XEXP (x, 0)) == PLUS
12801 && REG_P (XEXP (XEXP (x, 0), 1)))
12802 {
12803 rtx base, index;
12804 bool something_reloaded = false;
12805
12806 base = XEXP (XEXP (x, 0), 1);
12807 if (!REG_OK_FOR_BASE_STRICT_P (base))
12808 {
12809 push_reload (base, NULL_RTX, &XEXP (XEXP (x, 0), 1), NULL,
12810 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
12811 opnum, (enum reload_type) type);
12812 something_reloaded = true;
12813 }
12814
12815 index = XEXP (x, 1);
12816 if (!REG_OK_FOR_INDEX_STRICT_P (index))
12817 {
12818 push_reload (index, NULL_RTX, &XEXP (x, 1), NULL,
12819 INDEX_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
12820 opnum, (enum reload_type) type);
12821 something_reloaded = true;
12822 }
12823
12824 gcc_assert (something_reloaded);
12825 return true;
12826 }
12827
12828 return false;
12829 }
12830
12831 /* Determine if op is suitable RTX for an address register.
12832 Return naked register if a register or a register subreg is
12833 found, otherwise return NULL_RTX. */
12834
12835 static rtx
12836 ix86_validate_address_register (rtx op)
12837 {
12838 enum machine_mode mode = GET_MODE (op);
12839
12840 /* Only SImode or DImode registers can form the address. */
12841 if (mode != SImode && mode != DImode)
12842 return NULL_RTX;
12843
12844 if (REG_P (op))
12845 return op;
12846 else if (GET_CODE (op) == SUBREG)
12847 {
12848 rtx reg = SUBREG_REG (op);
12849
12850 if (!REG_P (reg))
12851 return NULL_RTX;
12852
12853 mode = GET_MODE (reg);
12854
12855 /* Don't allow SUBREGs that span more than a word. It can
12856 lead to spill failures when the register is one word out
12857 of a two word structure. */
12858 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
12859 return NULL_RTX;
12860
12861 /* Allow only SUBREGs of non-eliminable hard registers. */
12862 if (register_no_elim_operand (reg, mode))
12863 return reg;
12864 }
12865
12866 /* Op is not a register. */
12867 return NULL_RTX;
12868 }
12869
12870 /* Recognizes RTL expressions that are valid memory addresses for an
12871 instruction. The MODE argument is the machine mode for the MEM
12872 expression that wants to use this address.
12873
12874 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
12875 convert common non-canonical forms to canonical form so that they will
12876 be recognized. */
12877
12878 static bool
12879 ix86_legitimate_address_p (enum machine_mode, rtx addr, bool strict)
12880 {
12881 struct ix86_address parts;
12882 rtx base, index, disp;
12883 HOST_WIDE_INT scale;
12884 enum ix86_address_seg seg;
12885
12886 if (ix86_decompose_address (addr, &parts) <= 0)
12887 /* Decomposition failed. */
12888 return false;
12889
12890 base = parts.base;
12891 index = parts.index;
12892 disp = parts.disp;
12893 scale = parts.scale;
12894 seg = parts.seg;
12895
12896 /* Validate base register. */
12897 if (base)
12898 {
12899 rtx reg = ix86_validate_address_register (base);
12900
12901 if (reg == NULL_RTX)
12902 return false;
12903
12904 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
12905 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
12906 /* Base is not valid. */
12907 return false;
12908 }
12909
12910 /* Validate index register. */
12911 if (index)
12912 {
12913 rtx reg = ix86_validate_address_register (index);
12914
12915 if (reg == NULL_RTX)
12916 return false;
12917
12918 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
12919 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
12920 /* Index is not valid. */
12921 return false;
12922 }
12923
12924 /* Index and base should have the same mode. */
12925 if (base && index
12926 && GET_MODE (base) != GET_MODE (index))
12927 return false;
12928
12929 /* Address override works only on the (%reg) part of %fs:(%reg). */
12930 if (seg != SEG_DEFAULT
12931 && ((base && GET_MODE (base) != word_mode)
12932 || (index && GET_MODE (index) != word_mode)))
12933 return false;
12934
12935 /* Validate scale factor. */
12936 if (scale != 1)
12937 {
12938 if (!index)
12939 /* Scale without index. */
12940 return false;
12941
12942 if (scale != 2 && scale != 4 && scale != 8)
12943 /* Scale is not a valid multiplier. */
12944 return false;
12945 }
12946
12947 /* Validate displacement. */
12948 if (disp)
12949 {
12950 if (GET_CODE (disp) == CONST
12951 && GET_CODE (XEXP (disp, 0)) == UNSPEC
12952 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
12953 switch (XINT (XEXP (disp, 0), 1))
12954 {
12955 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
12956 used. While ABI specify also 32bit relocations, we don't produce
12957 them at all and use IP relative instead. */
12958 case UNSPEC_GOT:
12959 case UNSPEC_GOTOFF:
12960 gcc_assert (flag_pic);
12961 if (!TARGET_64BIT)
12962 goto is_legitimate_pic;
12963
12964 /* 64bit address unspec. */
12965 return false;
12966
12967 case UNSPEC_GOTPCREL:
12968 case UNSPEC_PCREL:
12969 gcc_assert (flag_pic);
12970 goto is_legitimate_pic;
12971
12972 case UNSPEC_GOTTPOFF:
12973 case UNSPEC_GOTNTPOFF:
12974 case UNSPEC_INDNTPOFF:
12975 case UNSPEC_NTPOFF:
12976 case UNSPEC_DTPOFF:
12977 break;
12978
12979 case UNSPEC_STACK_CHECK:
12980 gcc_assert (flag_split_stack);
12981 break;
12982
12983 default:
12984 /* Invalid address unspec. */
12985 return false;
12986 }
12987
12988 else if (SYMBOLIC_CONST (disp)
12989 && (flag_pic
12990 || (TARGET_MACHO
12991 #if TARGET_MACHO
12992 && MACHOPIC_INDIRECT
12993 && !machopic_operand_p (disp)
12994 #endif
12995 )))
12996 {
12997
12998 is_legitimate_pic:
12999 if (TARGET_64BIT && (index || base))
13000 {
13001 /* foo@dtpoff(%rX) is ok. */
13002 if (GET_CODE (disp) != CONST
13003 || GET_CODE (XEXP (disp, 0)) != PLUS
13004 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
13005 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
13006 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
13007 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
13008 /* Non-constant pic memory reference. */
13009 return false;
13010 }
13011 else if ((!TARGET_MACHO || flag_pic)
13012 && ! legitimate_pic_address_disp_p (disp))
13013 /* Displacement is an invalid pic construct. */
13014 return false;
13015 #if TARGET_MACHO
13016 else if (MACHO_DYNAMIC_NO_PIC_P
13017 && !ix86_legitimate_constant_p (Pmode, disp))
13018 /* displacment must be referenced via non_lazy_pointer */
13019 return false;
13020 #endif
13021
13022 /* This code used to verify that a symbolic pic displacement
13023 includes the pic_offset_table_rtx register.
13024
13025 While this is good idea, unfortunately these constructs may
13026 be created by "adds using lea" optimization for incorrect
13027 code like:
13028
13029 int a;
13030 int foo(int i)
13031 {
13032 return *(&a+i);
13033 }
13034
13035 This code is nonsensical, but results in addressing
13036 GOT table with pic_offset_table_rtx base. We can't
13037 just refuse it easily, since it gets matched by
13038 "addsi3" pattern, that later gets split to lea in the
13039 case output register differs from input. While this
13040 can be handled by separate addsi pattern for this case
13041 that never results in lea, this seems to be easier and
13042 correct fix for crash to disable this test. */
13043 }
13044 else if (GET_CODE (disp) != LABEL_REF
13045 && !CONST_INT_P (disp)
13046 && (GET_CODE (disp) != CONST
13047 || !ix86_legitimate_constant_p (Pmode, disp))
13048 && (GET_CODE (disp) != SYMBOL_REF
13049 || !ix86_legitimate_constant_p (Pmode, disp)))
13050 /* Displacement is not constant. */
13051 return false;
13052 else if (TARGET_64BIT
13053 && !x86_64_immediate_operand (disp, VOIDmode))
13054 /* Displacement is out of range. */
13055 return false;
13056 /* In x32 mode, constant addresses are sign extended to 64bit, so
13057 we have to prevent addresses from 0x80000000 to 0xffffffff. */
13058 else if (TARGET_X32 && !(index || base)
13059 && CONST_INT_P (disp)
13060 && val_signbit_known_set_p (SImode, INTVAL (disp)))
13061 return false;
13062 }
13063
13064 /* Everything looks valid. */
13065 return true;
13066 }
13067
13068 /* Determine if a given RTX is a valid constant address. */
13069
13070 bool
13071 constant_address_p (rtx x)
13072 {
13073 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
13074 }
13075 \f
13076 /* Return a unique alias set for the GOT. */
13077
13078 static alias_set_type
13079 ix86_GOT_alias_set (void)
13080 {
13081 static alias_set_type set = -1;
13082 if (set == -1)
13083 set = new_alias_set ();
13084 return set;
13085 }
13086
13087 /* Return a legitimate reference for ORIG (an address) using the
13088 register REG. If REG is 0, a new pseudo is generated.
13089
13090 There are two types of references that must be handled:
13091
13092 1. Global data references must load the address from the GOT, via
13093 the PIC reg. An insn is emitted to do this load, and the reg is
13094 returned.
13095
13096 2. Static data references, constant pool addresses, and code labels
13097 compute the address as an offset from the GOT, whose base is in
13098 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
13099 differentiate them from global data objects. The returned
13100 address is the PIC reg + an unspec constant.
13101
13102 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
13103 reg also appears in the address. */
13104
13105 static rtx
13106 legitimize_pic_address (rtx orig, rtx reg)
13107 {
13108 rtx addr = orig;
13109 rtx new_rtx = orig;
13110
13111 #if TARGET_MACHO
13112 if (TARGET_MACHO && !TARGET_64BIT)
13113 {
13114 if (reg == 0)
13115 reg = gen_reg_rtx (Pmode);
13116 /* Use the generic Mach-O PIC machinery. */
13117 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
13118 }
13119 #endif
13120
13121 if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13122 {
13123 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13124 if (tmp)
13125 return tmp;
13126 }
13127
13128 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
13129 new_rtx = addr;
13130 else if (TARGET_64BIT && !TARGET_PECOFF
13131 && ix86_cmodel != CM_SMALL_PIC && gotoff_operand (addr, Pmode))
13132 {
13133 rtx tmpreg;
13134 /* This symbol may be referenced via a displacement from the PIC
13135 base address (@GOTOFF). */
13136
13137 if (reload_in_progress)
13138 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13139 if (GET_CODE (addr) == CONST)
13140 addr = XEXP (addr, 0);
13141 if (GET_CODE (addr) == PLUS)
13142 {
13143 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13144 UNSPEC_GOTOFF);
13145 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13146 }
13147 else
13148 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13149 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13150 if (!reg)
13151 tmpreg = gen_reg_rtx (Pmode);
13152 else
13153 tmpreg = reg;
13154 emit_move_insn (tmpreg, new_rtx);
13155
13156 if (reg != 0)
13157 {
13158 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
13159 tmpreg, 1, OPTAB_DIRECT);
13160 new_rtx = reg;
13161 }
13162 else
13163 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
13164 }
13165 else if (!TARGET_64BIT && !TARGET_PECOFF && gotoff_operand (addr, Pmode))
13166 {
13167 /* This symbol may be referenced via a displacement from the PIC
13168 base address (@GOTOFF). */
13169
13170 if (reload_in_progress)
13171 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13172 if (GET_CODE (addr) == CONST)
13173 addr = XEXP (addr, 0);
13174 if (GET_CODE (addr) == PLUS)
13175 {
13176 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13177 UNSPEC_GOTOFF);
13178 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13179 }
13180 else
13181 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13182 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13183 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13184
13185 if (reg != 0)
13186 {
13187 emit_move_insn (reg, new_rtx);
13188 new_rtx = reg;
13189 }
13190 }
13191 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
13192 /* We can't use @GOTOFF for text labels on VxWorks;
13193 see gotoff_operand. */
13194 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
13195 {
13196 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13197 if (tmp)
13198 return tmp;
13199
13200 /* For x64 PE-COFF there is no GOT table. So we use address
13201 directly. */
13202 if (TARGET_64BIT && TARGET_PECOFF)
13203 {
13204 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
13205 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13206
13207 if (reg == 0)
13208 reg = gen_reg_rtx (Pmode);
13209 emit_move_insn (reg, new_rtx);
13210 new_rtx = reg;
13211 }
13212 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
13213 {
13214 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
13215 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13216 new_rtx = gen_const_mem (Pmode, new_rtx);
13217 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13218
13219 if (reg == 0)
13220 reg = gen_reg_rtx (Pmode);
13221 /* Use directly gen_movsi, otherwise the address is loaded
13222 into register for CSE. We don't want to CSE this addresses,
13223 instead we CSE addresses from the GOT table, so skip this. */
13224 emit_insn (gen_movsi (reg, new_rtx));
13225 new_rtx = reg;
13226 }
13227 else
13228 {
13229 /* This symbol must be referenced via a load from the
13230 Global Offset Table (@GOT). */
13231
13232 if (reload_in_progress)
13233 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13234 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
13235 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13236 if (TARGET_64BIT)
13237 new_rtx = force_reg (Pmode, new_rtx);
13238 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13239 new_rtx = gen_const_mem (Pmode, new_rtx);
13240 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13241
13242 if (reg == 0)
13243 reg = gen_reg_rtx (Pmode);
13244 emit_move_insn (reg, new_rtx);
13245 new_rtx = reg;
13246 }
13247 }
13248 else
13249 {
13250 if (CONST_INT_P (addr)
13251 && !x86_64_immediate_operand (addr, VOIDmode))
13252 {
13253 if (reg)
13254 {
13255 emit_move_insn (reg, addr);
13256 new_rtx = reg;
13257 }
13258 else
13259 new_rtx = force_reg (Pmode, addr);
13260 }
13261 else if (GET_CODE (addr) == CONST)
13262 {
13263 addr = XEXP (addr, 0);
13264
13265 /* We must match stuff we generate before. Assume the only
13266 unspecs that can get here are ours. Not that we could do
13267 anything with them anyway.... */
13268 if (GET_CODE (addr) == UNSPEC
13269 || (GET_CODE (addr) == PLUS
13270 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
13271 return orig;
13272 gcc_assert (GET_CODE (addr) == PLUS);
13273 }
13274 if (GET_CODE (addr) == PLUS)
13275 {
13276 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
13277
13278 /* Check first to see if this is a constant offset from a @GOTOFF
13279 symbol reference. */
13280 if (!TARGET_PECOFF && gotoff_operand (op0, Pmode)
13281 && CONST_INT_P (op1))
13282 {
13283 if (!TARGET_64BIT)
13284 {
13285 if (reload_in_progress)
13286 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13287 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
13288 UNSPEC_GOTOFF);
13289 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
13290 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13291 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13292
13293 if (reg != 0)
13294 {
13295 emit_move_insn (reg, new_rtx);
13296 new_rtx = reg;
13297 }
13298 }
13299 else
13300 {
13301 if (INTVAL (op1) < -16*1024*1024
13302 || INTVAL (op1) >= 16*1024*1024)
13303 {
13304 if (!x86_64_immediate_operand (op1, Pmode))
13305 op1 = force_reg (Pmode, op1);
13306 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
13307 }
13308 }
13309 }
13310 else
13311 {
13312 rtx base = legitimize_pic_address (op0, reg);
13313 enum machine_mode mode = GET_MODE (base);
13314 new_rtx
13315 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
13316
13317 if (CONST_INT_P (new_rtx))
13318 {
13319 if (INTVAL (new_rtx) < -16*1024*1024
13320 || INTVAL (new_rtx) >= 16*1024*1024)
13321 {
13322 if (!x86_64_immediate_operand (new_rtx, mode))
13323 new_rtx = force_reg (mode, new_rtx);
13324 new_rtx
13325 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
13326 }
13327 else
13328 new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
13329 }
13330 else
13331 {
13332 if (GET_CODE (new_rtx) == PLUS
13333 && CONSTANT_P (XEXP (new_rtx, 1)))
13334 {
13335 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
13336 new_rtx = XEXP (new_rtx, 1);
13337 }
13338 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
13339 }
13340 }
13341 }
13342 }
13343 return new_rtx;
13344 }
13345 \f
13346 /* Load the thread pointer. If TO_REG is true, force it into a register. */
13347
13348 static rtx
13349 get_thread_pointer (enum machine_mode tp_mode, bool to_reg)
13350 {
13351 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
13352
13353 if (GET_MODE (tp) != tp_mode)
13354 {
13355 gcc_assert (GET_MODE (tp) == SImode);
13356 gcc_assert (tp_mode == DImode);
13357
13358 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
13359 }
13360
13361 if (to_reg)
13362 tp = copy_to_mode_reg (tp_mode, tp);
13363
13364 return tp;
13365 }
13366
13367 /* Construct the SYMBOL_REF for the tls_get_addr function. */
13368
13369 static GTY(()) rtx ix86_tls_symbol;
13370
13371 static rtx
13372 ix86_tls_get_addr (void)
13373 {
13374 if (!ix86_tls_symbol)
13375 {
13376 const char *sym
13377 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
13378 ? "___tls_get_addr" : "__tls_get_addr");
13379
13380 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
13381 }
13382
13383 if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
13384 {
13385 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
13386 UNSPEC_PLTOFF);
13387 return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
13388 gen_rtx_CONST (Pmode, unspec));
13389 }
13390
13391 return ix86_tls_symbol;
13392 }
13393
13394 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13395
13396 static GTY(()) rtx ix86_tls_module_base_symbol;
13397
13398 rtx
13399 ix86_tls_module_base (void)
13400 {
13401 if (!ix86_tls_module_base_symbol)
13402 {
13403 ix86_tls_module_base_symbol
13404 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
13405
13406 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
13407 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
13408 }
13409
13410 return ix86_tls_module_base_symbol;
13411 }
13412
13413 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
13414 false if we expect this to be used for a memory address and true if
13415 we expect to load the address into a register. */
13416
13417 static rtx
13418 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
13419 {
13420 rtx dest, base, off;
13421 rtx pic = NULL_RTX, tp = NULL_RTX;
13422 enum machine_mode tp_mode = Pmode;
13423 int type;
13424
13425 /* Fall back to global dynamic model if tool chain cannot support local
13426 dynamic. */
13427 if (TARGET_SUN_TLS && !TARGET_64BIT
13428 && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
13429 && model == TLS_MODEL_LOCAL_DYNAMIC)
13430 model = TLS_MODEL_GLOBAL_DYNAMIC;
13431
13432 switch (model)
13433 {
13434 case TLS_MODEL_GLOBAL_DYNAMIC:
13435 dest = gen_reg_rtx (Pmode);
13436
13437 if (!TARGET_64BIT)
13438 {
13439 if (flag_pic && !TARGET_PECOFF)
13440 pic = pic_offset_table_rtx;
13441 else
13442 {
13443 pic = gen_reg_rtx (Pmode);
13444 emit_insn (gen_set_got (pic));
13445 }
13446 }
13447
13448 if (TARGET_GNU2_TLS)
13449 {
13450 if (TARGET_64BIT)
13451 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
13452 else
13453 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
13454
13455 tp = get_thread_pointer (Pmode, true);
13456 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
13457
13458 if (GET_MODE (x) != Pmode)
13459 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13460
13461 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
13462 }
13463 else
13464 {
13465 rtx caddr = ix86_tls_get_addr ();
13466
13467 if (TARGET_64BIT)
13468 {
13469 rtx rax = gen_rtx_REG (Pmode, AX_REG);
13470 rtx insns;
13471
13472 start_sequence ();
13473 emit_call_insn
13474 (ix86_gen_tls_global_dynamic_64 (rax, x, caddr));
13475 insns = get_insns ();
13476 end_sequence ();
13477
13478 if (GET_MODE (x) != Pmode)
13479 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13480
13481 RTL_CONST_CALL_P (insns) = 1;
13482 emit_libcall_block (insns, dest, rax, x);
13483 }
13484 else
13485 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
13486 }
13487 break;
13488
13489 case TLS_MODEL_LOCAL_DYNAMIC:
13490 base = gen_reg_rtx (Pmode);
13491
13492 if (!TARGET_64BIT)
13493 {
13494 if (flag_pic)
13495 pic = pic_offset_table_rtx;
13496 else
13497 {
13498 pic = gen_reg_rtx (Pmode);
13499 emit_insn (gen_set_got (pic));
13500 }
13501 }
13502
13503 if (TARGET_GNU2_TLS)
13504 {
13505 rtx tmp = ix86_tls_module_base ();
13506
13507 if (TARGET_64BIT)
13508 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
13509 else
13510 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
13511
13512 tp = get_thread_pointer (Pmode, true);
13513 set_unique_reg_note (get_last_insn (), REG_EQUAL,
13514 gen_rtx_MINUS (Pmode, tmp, tp));
13515 }
13516 else
13517 {
13518 rtx caddr = ix86_tls_get_addr ();
13519
13520 if (TARGET_64BIT)
13521 {
13522 rtx rax = gen_rtx_REG (Pmode, AX_REG);
13523 rtx insns, eqv;
13524
13525 start_sequence ();
13526 emit_call_insn
13527 (ix86_gen_tls_local_dynamic_base_64 (rax, caddr));
13528 insns = get_insns ();
13529 end_sequence ();
13530
13531 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
13532 share the LD_BASE result with other LD model accesses. */
13533 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
13534 UNSPEC_TLS_LD_BASE);
13535
13536 RTL_CONST_CALL_P (insns) = 1;
13537 emit_libcall_block (insns, base, rax, eqv);
13538 }
13539 else
13540 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
13541 }
13542
13543 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
13544 off = gen_rtx_CONST (Pmode, off);
13545
13546 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
13547
13548 if (TARGET_GNU2_TLS)
13549 {
13550 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
13551
13552 if (GET_MODE (x) != Pmode)
13553 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13554
13555 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
13556 }
13557 break;
13558
13559 case TLS_MODEL_INITIAL_EXEC:
13560 if (TARGET_64BIT)
13561 {
13562 if (TARGET_SUN_TLS && !TARGET_X32)
13563 {
13564 /* The Sun linker took the AMD64 TLS spec literally
13565 and can only handle %rax as destination of the
13566 initial executable code sequence. */
13567
13568 dest = gen_reg_rtx (DImode);
13569 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
13570 return dest;
13571 }
13572
13573 /* Generate DImode references to avoid %fs:(%reg32)
13574 problems and linker IE->LE relaxation bug. */
13575 tp_mode = DImode;
13576 pic = NULL;
13577 type = UNSPEC_GOTNTPOFF;
13578 }
13579 else if (flag_pic)
13580 {
13581 if (reload_in_progress)
13582 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13583 pic = pic_offset_table_rtx;
13584 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
13585 }
13586 else if (!TARGET_ANY_GNU_TLS)
13587 {
13588 pic = gen_reg_rtx (Pmode);
13589 emit_insn (gen_set_got (pic));
13590 type = UNSPEC_GOTTPOFF;
13591 }
13592 else
13593 {
13594 pic = NULL;
13595 type = UNSPEC_INDNTPOFF;
13596 }
13597
13598 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
13599 off = gen_rtx_CONST (tp_mode, off);
13600 if (pic)
13601 off = gen_rtx_PLUS (tp_mode, pic, off);
13602 off = gen_const_mem (tp_mode, off);
13603 set_mem_alias_set (off, ix86_GOT_alias_set ());
13604
13605 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13606 {
13607 base = get_thread_pointer (tp_mode,
13608 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
13609 off = force_reg (tp_mode, off);
13610 return gen_rtx_PLUS (tp_mode, base, off);
13611 }
13612 else
13613 {
13614 base = get_thread_pointer (Pmode, true);
13615 dest = gen_reg_rtx (Pmode);
13616 emit_insn (ix86_gen_sub3 (dest, base, off));
13617 }
13618 break;
13619
13620 case TLS_MODEL_LOCAL_EXEC:
13621 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
13622 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13623 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
13624 off = gen_rtx_CONST (Pmode, off);
13625
13626 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13627 {
13628 base = get_thread_pointer (Pmode,
13629 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
13630 return gen_rtx_PLUS (Pmode, base, off);
13631 }
13632 else
13633 {
13634 base = get_thread_pointer (Pmode, true);
13635 dest = gen_reg_rtx (Pmode);
13636 emit_insn (ix86_gen_sub3 (dest, base, off));
13637 }
13638 break;
13639
13640 default:
13641 gcc_unreachable ();
13642 }
13643
13644 return dest;
13645 }
13646
13647 /* Create or return the unique __imp_DECL dllimport symbol corresponding
13648 to symbol DECL if BEIMPORT is true. Otherwise create or return the
13649 unique refptr-DECL symbol corresponding to symbol DECL. */
13650
13651 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
13652 htab_t dllimport_map;
13653
13654 static tree
13655 get_dllimport_decl (tree decl, bool beimport)
13656 {
13657 struct tree_map *h, in;
13658 void **loc;
13659 const char *name;
13660 const char *prefix;
13661 size_t namelen, prefixlen;
13662 char *imp_name;
13663 tree to;
13664 rtx rtl;
13665
13666 if (!dllimport_map)
13667 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
13668
13669 in.hash = htab_hash_pointer (decl);
13670 in.base.from = decl;
13671 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
13672 h = (struct tree_map *) *loc;
13673 if (h)
13674 return h->to;
13675
13676 *loc = h = ggc_alloc<tree_map> ();
13677 h->hash = in.hash;
13678 h->base.from = decl;
13679 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
13680 VAR_DECL, NULL, ptr_type_node);
13681 DECL_ARTIFICIAL (to) = 1;
13682 DECL_IGNORED_P (to) = 1;
13683 DECL_EXTERNAL (to) = 1;
13684 TREE_READONLY (to) = 1;
13685
13686 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
13687 name = targetm.strip_name_encoding (name);
13688 if (beimport)
13689 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
13690 ? "*__imp_" : "*__imp__";
13691 else
13692 prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
13693 namelen = strlen (name);
13694 prefixlen = strlen (prefix);
13695 imp_name = (char *) alloca (namelen + prefixlen + 1);
13696 memcpy (imp_name, prefix, prefixlen);
13697 memcpy (imp_name + prefixlen, name, namelen + 1);
13698
13699 name = ggc_alloc_string (imp_name, namelen + prefixlen);
13700 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
13701 SET_SYMBOL_REF_DECL (rtl, to);
13702 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
13703 if (!beimport)
13704 {
13705 SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
13706 #ifdef SUB_TARGET_RECORD_STUB
13707 SUB_TARGET_RECORD_STUB (name);
13708 #endif
13709 }
13710
13711 rtl = gen_const_mem (Pmode, rtl);
13712 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
13713
13714 SET_DECL_RTL (to, rtl);
13715 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
13716
13717 return to;
13718 }
13719
13720 /* Expand SYMBOL into its corresponding far-addresse symbol.
13721 WANT_REG is true if we require the result be a register. */
13722
13723 static rtx
13724 legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
13725 {
13726 tree imp_decl;
13727 rtx x;
13728
13729 gcc_assert (SYMBOL_REF_DECL (symbol));
13730 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false);
13731
13732 x = DECL_RTL (imp_decl);
13733 if (want_reg)
13734 x = force_reg (Pmode, x);
13735 return x;
13736 }
13737
13738 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
13739 true if we require the result be a register. */
13740
13741 static rtx
13742 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
13743 {
13744 tree imp_decl;
13745 rtx x;
13746
13747 gcc_assert (SYMBOL_REF_DECL (symbol));
13748 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true);
13749
13750 x = DECL_RTL (imp_decl);
13751 if (want_reg)
13752 x = force_reg (Pmode, x);
13753 return x;
13754 }
13755
13756 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
13757 is true if we require the result be a register. */
13758
13759 static rtx
13760 legitimize_pe_coff_symbol (rtx addr, bool inreg)
13761 {
13762 if (!TARGET_PECOFF)
13763 return NULL_RTX;
13764
13765 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13766 {
13767 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
13768 return legitimize_dllimport_symbol (addr, inreg);
13769 if (GET_CODE (addr) == CONST
13770 && GET_CODE (XEXP (addr, 0)) == PLUS
13771 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
13772 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
13773 {
13774 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg);
13775 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
13776 }
13777 }
13778
13779 if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
13780 return NULL_RTX;
13781 if (GET_CODE (addr) == SYMBOL_REF
13782 && !is_imported_p (addr)
13783 && SYMBOL_REF_EXTERNAL_P (addr)
13784 && SYMBOL_REF_DECL (addr))
13785 return legitimize_pe_coff_extern_decl (addr, inreg);
13786
13787 if (GET_CODE (addr) == CONST
13788 && GET_CODE (XEXP (addr, 0)) == PLUS
13789 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
13790 && !is_imported_p (XEXP (XEXP (addr, 0), 0))
13791 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
13792 && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
13793 {
13794 rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg);
13795 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
13796 }
13797 return NULL_RTX;
13798 }
13799
13800 /* Try machine-dependent ways of modifying an illegitimate address
13801 to be legitimate. If we find one, return the new, valid address.
13802 This macro is used in only one place: `memory_address' in explow.c.
13803
13804 OLDX is the address as it was before break_out_memory_refs was called.
13805 In some cases it is useful to look at this to decide what needs to be done.
13806
13807 It is always safe for this macro to do nothing. It exists to recognize
13808 opportunities to optimize the output.
13809
13810 For the 80386, we handle X+REG by loading X into a register R and
13811 using R+REG. R will go in a general reg and indexing will be used.
13812 However, if REG is a broken-out memory address or multiplication,
13813 nothing needs to be done because REG can certainly go in a general reg.
13814
13815 When -fpic is used, special handling is needed for symbolic references.
13816 See comments by legitimize_pic_address in i386.c for details. */
13817
13818 static rtx
13819 ix86_legitimize_address (rtx x, rtx, enum machine_mode mode)
13820 {
13821 int changed = 0;
13822 unsigned log;
13823
13824 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
13825 if (log)
13826 return legitimize_tls_address (x, (enum tls_model) log, false);
13827 if (GET_CODE (x) == CONST
13828 && GET_CODE (XEXP (x, 0)) == PLUS
13829 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
13830 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
13831 {
13832 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
13833 (enum tls_model) log, false);
13834 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
13835 }
13836
13837 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13838 {
13839 rtx tmp = legitimize_pe_coff_symbol (x, true);
13840 if (tmp)
13841 return tmp;
13842 }
13843
13844 if (flag_pic && SYMBOLIC_CONST (x))
13845 return legitimize_pic_address (x, 0);
13846
13847 #if TARGET_MACHO
13848 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
13849 return machopic_indirect_data_reference (x, 0);
13850 #endif
13851
13852 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
13853 if (GET_CODE (x) == ASHIFT
13854 && CONST_INT_P (XEXP (x, 1))
13855 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
13856 {
13857 changed = 1;
13858 log = INTVAL (XEXP (x, 1));
13859 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
13860 GEN_INT (1 << log));
13861 }
13862
13863 if (GET_CODE (x) == PLUS)
13864 {
13865 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
13866
13867 if (GET_CODE (XEXP (x, 0)) == ASHIFT
13868 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
13869 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
13870 {
13871 changed = 1;
13872 log = INTVAL (XEXP (XEXP (x, 0), 1));
13873 XEXP (x, 0) = gen_rtx_MULT (Pmode,
13874 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
13875 GEN_INT (1 << log));
13876 }
13877
13878 if (GET_CODE (XEXP (x, 1)) == ASHIFT
13879 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
13880 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
13881 {
13882 changed = 1;
13883 log = INTVAL (XEXP (XEXP (x, 1), 1));
13884 XEXP (x, 1) = gen_rtx_MULT (Pmode,
13885 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
13886 GEN_INT (1 << log));
13887 }
13888
13889 /* Put multiply first if it isn't already. */
13890 if (GET_CODE (XEXP (x, 1)) == MULT)
13891 {
13892 rtx tmp = XEXP (x, 0);
13893 XEXP (x, 0) = XEXP (x, 1);
13894 XEXP (x, 1) = tmp;
13895 changed = 1;
13896 }
13897
13898 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
13899 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
13900 created by virtual register instantiation, register elimination, and
13901 similar optimizations. */
13902 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
13903 {
13904 changed = 1;
13905 x = gen_rtx_PLUS (Pmode,
13906 gen_rtx_PLUS (Pmode, XEXP (x, 0),
13907 XEXP (XEXP (x, 1), 0)),
13908 XEXP (XEXP (x, 1), 1));
13909 }
13910
13911 /* Canonicalize
13912 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
13913 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
13914 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
13915 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
13916 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
13917 && CONSTANT_P (XEXP (x, 1)))
13918 {
13919 rtx constant;
13920 rtx other = NULL_RTX;
13921
13922 if (CONST_INT_P (XEXP (x, 1)))
13923 {
13924 constant = XEXP (x, 1);
13925 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
13926 }
13927 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
13928 {
13929 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
13930 other = XEXP (x, 1);
13931 }
13932 else
13933 constant = 0;
13934
13935 if (constant)
13936 {
13937 changed = 1;
13938 x = gen_rtx_PLUS (Pmode,
13939 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
13940 XEXP (XEXP (XEXP (x, 0), 1), 0)),
13941 plus_constant (Pmode, other,
13942 INTVAL (constant)));
13943 }
13944 }
13945
13946 if (changed && ix86_legitimate_address_p (mode, x, false))
13947 return x;
13948
13949 if (GET_CODE (XEXP (x, 0)) == MULT)
13950 {
13951 changed = 1;
13952 XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
13953 }
13954
13955 if (GET_CODE (XEXP (x, 1)) == MULT)
13956 {
13957 changed = 1;
13958 XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
13959 }
13960
13961 if (changed
13962 && REG_P (XEXP (x, 1))
13963 && REG_P (XEXP (x, 0)))
13964 return x;
13965
13966 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
13967 {
13968 changed = 1;
13969 x = legitimize_pic_address (x, 0);
13970 }
13971
13972 if (changed && ix86_legitimate_address_p (mode, x, false))
13973 return x;
13974
13975 if (REG_P (XEXP (x, 0)))
13976 {
13977 rtx temp = gen_reg_rtx (Pmode);
13978 rtx val = force_operand (XEXP (x, 1), temp);
13979 if (val != temp)
13980 {
13981 val = convert_to_mode (Pmode, val, 1);
13982 emit_move_insn (temp, val);
13983 }
13984
13985 XEXP (x, 1) = temp;
13986 return x;
13987 }
13988
13989 else if (REG_P (XEXP (x, 1)))
13990 {
13991 rtx temp = gen_reg_rtx (Pmode);
13992 rtx val = force_operand (XEXP (x, 0), temp);
13993 if (val != temp)
13994 {
13995 val = convert_to_mode (Pmode, val, 1);
13996 emit_move_insn (temp, val);
13997 }
13998
13999 XEXP (x, 0) = temp;
14000 return x;
14001 }
14002 }
14003
14004 return x;
14005 }
14006 \f
14007 /* Print an integer constant expression in assembler syntax. Addition
14008 and subtraction are the only arithmetic that may appear in these
14009 expressions. FILE is the stdio stream to write to, X is the rtx, and
14010 CODE is the operand print code from the output string. */
14011
14012 static void
14013 output_pic_addr_const (FILE *file, rtx x, int code)
14014 {
14015 char buf[256];
14016
14017 switch (GET_CODE (x))
14018 {
14019 case PC:
14020 gcc_assert (flag_pic);
14021 putc ('.', file);
14022 break;
14023
14024 case SYMBOL_REF:
14025 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
14026 output_addr_const (file, x);
14027 else
14028 {
14029 const char *name = XSTR (x, 0);
14030
14031 /* Mark the decl as referenced so that cgraph will
14032 output the function. */
14033 if (SYMBOL_REF_DECL (x))
14034 mark_decl_referenced (SYMBOL_REF_DECL (x));
14035
14036 #if TARGET_MACHO
14037 if (MACHOPIC_INDIRECT
14038 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
14039 name = machopic_indirection_name (x, /*stub_p=*/true);
14040 #endif
14041 assemble_name (file, name);
14042 }
14043 if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
14044 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
14045 fputs ("@PLT", file);
14046 break;
14047
14048 case LABEL_REF:
14049 x = XEXP (x, 0);
14050 /* FALLTHRU */
14051 case CODE_LABEL:
14052 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
14053 assemble_name (asm_out_file, buf);
14054 break;
14055
14056 case CONST_INT:
14057 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14058 break;
14059
14060 case CONST:
14061 /* This used to output parentheses around the expression,
14062 but that does not work on the 386 (either ATT or BSD assembler). */
14063 output_pic_addr_const (file, XEXP (x, 0), code);
14064 break;
14065
14066 case CONST_DOUBLE:
14067 if (GET_MODE (x) == VOIDmode)
14068 {
14069 /* We can use %d if the number is <32 bits and positive. */
14070 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
14071 fprintf (file, "0x%lx%08lx",
14072 (unsigned long) CONST_DOUBLE_HIGH (x),
14073 (unsigned long) CONST_DOUBLE_LOW (x));
14074 else
14075 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
14076 }
14077 else
14078 /* We can't handle floating point constants;
14079 TARGET_PRINT_OPERAND must handle them. */
14080 output_operand_lossage ("floating constant misused");
14081 break;
14082
14083 case PLUS:
14084 /* Some assemblers need integer constants to appear first. */
14085 if (CONST_INT_P (XEXP (x, 0)))
14086 {
14087 output_pic_addr_const (file, XEXP (x, 0), code);
14088 putc ('+', file);
14089 output_pic_addr_const (file, XEXP (x, 1), code);
14090 }
14091 else
14092 {
14093 gcc_assert (CONST_INT_P (XEXP (x, 1)));
14094 output_pic_addr_const (file, XEXP (x, 1), code);
14095 putc ('+', file);
14096 output_pic_addr_const (file, XEXP (x, 0), code);
14097 }
14098 break;
14099
14100 case MINUS:
14101 if (!TARGET_MACHO)
14102 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
14103 output_pic_addr_const (file, XEXP (x, 0), code);
14104 putc ('-', file);
14105 output_pic_addr_const (file, XEXP (x, 1), code);
14106 if (!TARGET_MACHO)
14107 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
14108 break;
14109
14110 case UNSPEC:
14111 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
14112 {
14113 bool f = i386_asm_output_addr_const_extra (file, x);
14114 gcc_assert (f);
14115 break;
14116 }
14117
14118 gcc_assert (XVECLEN (x, 0) == 1);
14119 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
14120 switch (XINT (x, 1))
14121 {
14122 case UNSPEC_GOT:
14123 fputs ("@GOT", file);
14124 break;
14125 case UNSPEC_GOTOFF:
14126 fputs ("@GOTOFF", file);
14127 break;
14128 case UNSPEC_PLTOFF:
14129 fputs ("@PLTOFF", file);
14130 break;
14131 case UNSPEC_PCREL:
14132 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14133 "(%rip)" : "[rip]", file);
14134 break;
14135 case UNSPEC_GOTPCREL:
14136 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14137 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
14138 break;
14139 case UNSPEC_GOTTPOFF:
14140 /* FIXME: This might be @TPOFF in Sun ld too. */
14141 fputs ("@gottpoff", file);
14142 break;
14143 case UNSPEC_TPOFF:
14144 fputs ("@tpoff", file);
14145 break;
14146 case UNSPEC_NTPOFF:
14147 if (TARGET_64BIT)
14148 fputs ("@tpoff", file);
14149 else
14150 fputs ("@ntpoff", file);
14151 break;
14152 case UNSPEC_DTPOFF:
14153 fputs ("@dtpoff", file);
14154 break;
14155 case UNSPEC_GOTNTPOFF:
14156 if (TARGET_64BIT)
14157 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14158 "@gottpoff(%rip)": "@gottpoff[rip]", file);
14159 else
14160 fputs ("@gotntpoff", file);
14161 break;
14162 case UNSPEC_INDNTPOFF:
14163 fputs ("@indntpoff", file);
14164 break;
14165 #if TARGET_MACHO
14166 case UNSPEC_MACHOPIC_OFFSET:
14167 putc ('-', file);
14168 machopic_output_function_base_name (file);
14169 break;
14170 #endif
14171 default:
14172 output_operand_lossage ("invalid UNSPEC as operand");
14173 break;
14174 }
14175 break;
14176
14177 default:
14178 output_operand_lossage ("invalid expression as operand");
14179 }
14180 }
14181
14182 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
14183 We need to emit DTP-relative relocations. */
14184
14185 static void ATTRIBUTE_UNUSED
14186 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
14187 {
14188 fputs (ASM_LONG, file);
14189 output_addr_const (file, x);
14190 fputs ("@dtpoff", file);
14191 switch (size)
14192 {
14193 case 4:
14194 break;
14195 case 8:
14196 fputs (", 0", file);
14197 break;
14198 default:
14199 gcc_unreachable ();
14200 }
14201 }
14202
14203 /* Return true if X is a representation of the PIC register. This copes
14204 with calls from ix86_find_base_term, where the register might have
14205 been replaced by a cselib value. */
14206
14207 static bool
14208 ix86_pic_register_p (rtx x)
14209 {
14210 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
14211 return (pic_offset_table_rtx
14212 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
14213 else
14214 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
14215 }
14216
14217 /* Helper function for ix86_delegitimize_address.
14218 Attempt to delegitimize TLS local-exec accesses. */
14219
14220 static rtx
14221 ix86_delegitimize_tls_address (rtx orig_x)
14222 {
14223 rtx x = orig_x, unspec;
14224 struct ix86_address addr;
14225
14226 if (!TARGET_TLS_DIRECT_SEG_REFS)
14227 return orig_x;
14228 if (MEM_P (x))
14229 x = XEXP (x, 0);
14230 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
14231 return orig_x;
14232 if (ix86_decompose_address (x, &addr) == 0
14233 || addr.seg != DEFAULT_TLS_SEG_REG
14234 || addr.disp == NULL_RTX
14235 || GET_CODE (addr.disp) != CONST)
14236 return orig_x;
14237 unspec = XEXP (addr.disp, 0);
14238 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
14239 unspec = XEXP (unspec, 0);
14240 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
14241 return orig_x;
14242 x = XVECEXP (unspec, 0, 0);
14243 gcc_assert (GET_CODE (x) == SYMBOL_REF);
14244 if (unspec != XEXP (addr.disp, 0))
14245 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
14246 if (addr.index)
14247 {
14248 rtx idx = addr.index;
14249 if (addr.scale != 1)
14250 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
14251 x = gen_rtx_PLUS (Pmode, idx, x);
14252 }
14253 if (addr.base)
14254 x = gen_rtx_PLUS (Pmode, addr.base, x);
14255 if (MEM_P (orig_x))
14256 x = replace_equiv_address_nv (orig_x, x);
14257 return x;
14258 }
14259
14260 /* In the name of slightly smaller debug output, and to cater to
14261 general assembler lossage, recognize PIC+GOTOFF and turn it back
14262 into a direct symbol reference.
14263
14264 On Darwin, this is necessary to avoid a crash, because Darwin
14265 has a different PIC label for each routine but the DWARF debugging
14266 information is not associated with any particular routine, so it's
14267 necessary to remove references to the PIC label from RTL stored by
14268 the DWARF output code. */
14269
14270 static rtx
14271 ix86_delegitimize_address (rtx x)
14272 {
14273 rtx orig_x = delegitimize_mem_from_attrs (x);
14274 /* addend is NULL or some rtx if x is something+GOTOFF where
14275 something doesn't include the PIC register. */
14276 rtx addend = NULL_RTX;
14277 /* reg_addend is NULL or a multiple of some register. */
14278 rtx reg_addend = NULL_RTX;
14279 /* const_addend is NULL or a const_int. */
14280 rtx const_addend = NULL_RTX;
14281 /* This is the result, or NULL. */
14282 rtx result = NULL_RTX;
14283
14284 x = orig_x;
14285
14286 if (MEM_P (x))
14287 x = XEXP (x, 0);
14288
14289 if (TARGET_64BIT)
14290 {
14291 if (GET_CODE (x) == CONST
14292 && GET_CODE (XEXP (x, 0)) == PLUS
14293 && GET_MODE (XEXP (x, 0)) == Pmode
14294 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
14295 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
14296 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
14297 {
14298 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
14299 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
14300 if (MEM_P (orig_x))
14301 x = replace_equiv_address_nv (orig_x, x);
14302 return x;
14303 }
14304
14305 if (GET_CODE (x) == CONST
14306 && GET_CODE (XEXP (x, 0)) == UNSPEC
14307 && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
14308 || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
14309 && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
14310 {
14311 x = XVECEXP (XEXP (x, 0), 0, 0);
14312 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
14313 {
14314 x = simplify_gen_subreg (GET_MODE (orig_x), x,
14315 GET_MODE (x), 0);
14316 if (x == NULL_RTX)
14317 return orig_x;
14318 }
14319 return x;
14320 }
14321
14322 if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
14323 return ix86_delegitimize_tls_address (orig_x);
14324
14325 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
14326 and -mcmodel=medium -fpic. */
14327 }
14328
14329 if (GET_CODE (x) != PLUS
14330 || GET_CODE (XEXP (x, 1)) != CONST)
14331 return ix86_delegitimize_tls_address (orig_x);
14332
14333 if (ix86_pic_register_p (XEXP (x, 0)))
14334 /* %ebx + GOT/GOTOFF */
14335 ;
14336 else if (GET_CODE (XEXP (x, 0)) == PLUS)
14337 {
14338 /* %ebx + %reg * scale + GOT/GOTOFF */
14339 reg_addend = XEXP (x, 0);
14340 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
14341 reg_addend = XEXP (reg_addend, 1);
14342 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
14343 reg_addend = XEXP (reg_addend, 0);
14344 else
14345 {
14346 reg_addend = NULL_RTX;
14347 addend = XEXP (x, 0);
14348 }
14349 }
14350 else
14351 addend = XEXP (x, 0);
14352
14353 x = XEXP (XEXP (x, 1), 0);
14354 if (GET_CODE (x) == PLUS
14355 && CONST_INT_P (XEXP (x, 1)))
14356 {
14357 const_addend = XEXP (x, 1);
14358 x = XEXP (x, 0);
14359 }
14360
14361 if (GET_CODE (x) == UNSPEC
14362 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
14363 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
14364 || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
14365 && !MEM_P (orig_x) && !addend)))
14366 result = XVECEXP (x, 0, 0);
14367
14368 if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
14369 && !MEM_P (orig_x))
14370 result = XVECEXP (x, 0, 0);
14371
14372 if (! result)
14373 return ix86_delegitimize_tls_address (orig_x);
14374
14375 if (const_addend)
14376 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
14377 if (reg_addend)
14378 result = gen_rtx_PLUS (Pmode, reg_addend, result);
14379 if (addend)
14380 {
14381 /* If the rest of original X doesn't involve the PIC register, add
14382 addend and subtract pic_offset_table_rtx. This can happen e.g.
14383 for code like:
14384 leal (%ebx, %ecx, 4), %ecx
14385 ...
14386 movl foo@GOTOFF(%ecx), %edx
14387 in which case we return (%ecx - %ebx) + foo. */
14388 if (pic_offset_table_rtx)
14389 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
14390 pic_offset_table_rtx),
14391 result);
14392 else
14393 return orig_x;
14394 }
14395 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
14396 {
14397 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
14398 if (result == NULL_RTX)
14399 return orig_x;
14400 }
14401 return result;
14402 }
14403
14404 /* If X is a machine specific address (i.e. a symbol or label being
14405 referenced as a displacement from the GOT implemented using an
14406 UNSPEC), then return the base term. Otherwise return X. */
14407
14408 rtx
14409 ix86_find_base_term (rtx x)
14410 {
14411 rtx term;
14412
14413 if (TARGET_64BIT)
14414 {
14415 if (GET_CODE (x) != CONST)
14416 return x;
14417 term = XEXP (x, 0);
14418 if (GET_CODE (term) == PLUS
14419 && (CONST_INT_P (XEXP (term, 1))
14420 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
14421 term = XEXP (term, 0);
14422 if (GET_CODE (term) != UNSPEC
14423 || (XINT (term, 1) != UNSPEC_GOTPCREL
14424 && XINT (term, 1) != UNSPEC_PCREL))
14425 return x;
14426
14427 return XVECEXP (term, 0, 0);
14428 }
14429
14430 return ix86_delegitimize_address (x);
14431 }
14432 \f
14433 static void
14434 put_condition_code (enum rtx_code code, enum machine_mode mode, bool reverse,
14435 bool fp, FILE *file)
14436 {
14437 const char *suffix;
14438
14439 if (mode == CCFPmode || mode == CCFPUmode)
14440 {
14441 code = ix86_fp_compare_code_to_integer (code);
14442 mode = CCmode;
14443 }
14444 if (reverse)
14445 code = reverse_condition (code);
14446
14447 switch (code)
14448 {
14449 case EQ:
14450 switch (mode)
14451 {
14452 case CCAmode:
14453 suffix = "a";
14454 break;
14455
14456 case CCCmode:
14457 suffix = "c";
14458 break;
14459
14460 case CCOmode:
14461 suffix = "o";
14462 break;
14463
14464 case CCSmode:
14465 suffix = "s";
14466 break;
14467
14468 default:
14469 suffix = "e";
14470 }
14471 break;
14472 case NE:
14473 switch (mode)
14474 {
14475 case CCAmode:
14476 suffix = "na";
14477 break;
14478
14479 case CCCmode:
14480 suffix = "nc";
14481 break;
14482
14483 case CCOmode:
14484 suffix = "no";
14485 break;
14486
14487 case CCSmode:
14488 suffix = "ns";
14489 break;
14490
14491 default:
14492 suffix = "ne";
14493 }
14494 break;
14495 case GT:
14496 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
14497 suffix = "g";
14498 break;
14499 case GTU:
14500 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
14501 Those same assemblers have the same but opposite lossage on cmov. */
14502 if (mode == CCmode)
14503 suffix = fp ? "nbe" : "a";
14504 else
14505 gcc_unreachable ();
14506 break;
14507 case LT:
14508 switch (mode)
14509 {
14510 case CCNOmode:
14511 case CCGOCmode:
14512 suffix = "s";
14513 break;
14514
14515 case CCmode:
14516 case CCGCmode:
14517 suffix = "l";
14518 break;
14519
14520 default:
14521 gcc_unreachable ();
14522 }
14523 break;
14524 case LTU:
14525 if (mode == CCmode)
14526 suffix = "b";
14527 else if (mode == CCCmode)
14528 suffix = "c";
14529 else
14530 gcc_unreachable ();
14531 break;
14532 case GE:
14533 switch (mode)
14534 {
14535 case CCNOmode:
14536 case CCGOCmode:
14537 suffix = "ns";
14538 break;
14539
14540 case CCmode:
14541 case CCGCmode:
14542 suffix = "ge";
14543 break;
14544
14545 default:
14546 gcc_unreachable ();
14547 }
14548 break;
14549 case GEU:
14550 if (mode == CCmode)
14551 suffix = fp ? "nb" : "ae";
14552 else if (mode == CCCmode)
14553 suffix = "nc";
14554 else
14555 gcc_unreachable ();
14556 break;
14557 case LE:
14558 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
14559 suffix = "le";
14560 break;
14561 case LEU:
14562 if (mode == CCmode)
14563 suffix = "be";
14564 else
14565 gcc_unreachable ();
14566 break;
14567 case UNORDERED:
14568 suffix = fp ? "u" : "p";
14569 break;
14570 case ORDERED:
14571 suffix = fp ? "nu" : "np";
14572 break;
14573 default:
14574 gcc_unreachable ();
14575 }
14576 fputs (suffix, file);
14577 }
14578
14579 /* Print the name of register X to FILE based on its machine mode and number.
14580 If CODE is 'w', pretend the mode is HImode.
14581 If CODE is 'b', pretend the mode is QImode.
14582 If CODE is 'k', pretend the mode is SImode.
14583 If CODE is 'q', pretend the mode is DImode.
14584 If CODE is 'x', pretend the mode is V4SFmode.
14585 If CODE is 't', pretend the mode is V8SFmode.
14586 If CODE is 'g', pretend the mode is V16SFmode.
14587 If CODE is 'h', pretend the reg is the 'high' byte register.
14588 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
14589 If CODE is 'd', duplicate the operand for AVX instruction.
14590 */
14591
14592 void
14593 print_reg (rtx x, int code, FILE *file)
14594 {
14595 const char *reg;
14596 unsigned int regno;
14597 bool duplicated = code == 'd' && TARGET_AVX;
14598
14599 if (ASSEMBLER_DIALECT == ASM_ATT)
14600 putc ('%', file);
14601
14602 if (x == pc_rtx)
14603 {
14604 gcc_assert (TARGET_64BIT);
14605 fputs ("rip", file);
14606 return;
14607 }
14608
14609 regno = true_regnum (x);
14610 gcc_assert (regno != ARG_POINTER_REGNUM
14611 && regno != FRAME_POINTER_REGNUM
14612 && regno != FLAGS_REG
14613 && regno != FPSR_REG
14614 && regno != FPCR_REG);
14615
14616 if (code == 'w' || MMX_REG_P (x))
14617 code = 2;
14618 else if (code == 'b')
14619 code = 1;
14620 else if (code == 'k')
14621 code = 4;
14622 else if (code == 'q')
14623 code = 8;
14624 else if (code == 'y')
14625 code = 3;
14626 else if (code == 'h')
14627 code = 0;
14628 else if (code == 'x')
14629 code = 16;
14630 else if (code == 't')
14631 code = 32;
14632 else if (code == 'g')
14633 code = 64;
14634 else
14635 code = GET_MODE_SIZE (GET_MODE (x));
14636
14637 /* Irritatingly, AMD extended registers use different naming convention
14638 from the normal registers: "r%d[bwd]" */
14639 if (REX_INT_REGNO_P (regno))
14640 {
14641 gcc_assert (TARGET_64BIT);
14642 putc ('r', file);
14643 fprint_ul (file, regno - FIRST_REX_INT_REG + 8);
14644 switch (code)
14645 {
14646 case 0:
14647 error ("extended registers have no high halves");
14648 break;
14649 case 1:
14650 putc ('b', file);
14651 break;
14652 case 2:
14653 putc ('w', file);
14654 break;
14655 case 4:
14656 putc ('d', file);
14657 break;
14658 case 8:
14659 /* no suffix */
14660 break;
14661 default:
14662 error ("unsupported operand size for extended register");
14663 break;
14664 }
14665 return;
14666 }
14667
14668 reg = NULL;
14669 switch (code)
14670 {
14671 case 3:
14672 if (STACK_TOP_P (x))
14673 {
14674 reg = "st(0)";
14675 break;
14676 }
14677 /* FALLTHRU */
14678 case 8:
14679 case 4:
14680 case 12:
14681 if (! ANY_FP_REG_P (x))
14682 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
14683 /* FALLTHRU */
14684 case 16:
14685 case 2:
14686 normal:
14687 reg = hi_reg_name[regno];
14688 break;
14689 case 1:
14690 if (regno >= ARRAY_SIZE (qi_reg_name))
14691 goto normal;
14692 reg = qi_reg_name[regno];
14693 break;
14694 case 0:
14695 if (regno >= ARRAY_SIZE (qi_high_reg_name))
14696 goto normal;
14697 reg = qi_high_reg_name[regno];
14698 break;
14699 case 32:
14700 if (SSE_REG_P (x))
14701 {
14702 gcc_assert (!duplicated);
14703 putc ('y', file);
14704 fputs (hi_reg_name[regno] + 1, file);
14705 return;
14706 }
14707 case 64:
14708 if (SSE_REG_P (x))
14709 {
14710 gcc_assert (!duplicated);
14711 putc ('z', file);
14712 fputs (hi_reg_name[REGNO (x)] + 1, file);
14713 return;
14714 }
14715 break;
14716 default:
14717 gcc_unreachable ();
14718 }
14719
14720 fputs (reg, file);
14721 if (duplicated)
14722 {
14723 if (ASSEMBLER_DIALECT == ASM_ATT)
14724 fprintf (file, ", %%%s", reg);
14725 else
14726 fprintf (file, ", %s", reg);
14727 }
14728 }
14729
14730 /* Locate some local-dynamic symbol still in use by this function
14731 so that we can print its name in some tls_local_dynamic_base
14732 pattern. */
14733
14734 static int
14735 get_some_local_dynamic_name_1 (rtx *px, void *)
14736 {
14737 rtx x = *px;
14738
14739 if (GET_CODE (x) == SYMBOL_REF
14740 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
14741 {
14742 cfun->machine->some_ld_name = XSTR (x, 0);
14743 return 1;
14744 }
14745
14746 return 0;
14747 }
14748
14749 static const char *
14750 get_some_local_dynamic_name (void)
14751 {
14752 rtx insn;
14753
14754 if (cfun->machine->some_ld_name)
14755 return cfun->machine->some_ld_name;
14756
14757 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
14758 if (NONDEBUG_INSN_P (insn)
14759 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
14760 return cfun->machine->some_ld_name;
14761
14762 return NULL;
14763 }
14764
14765 /* Meaning of CODE:
14766 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
14767 C -- print opcode suffix for set/cmov insn.
14768 c -- like C, but print reversed condition
14769 F,f -- likewise, but for floating-point.
14770 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
14771 otherwise nothing
14772 R -- print embeded rounding and sae.
14773 r -- print only sae.
14774 z -- print the opcode suffix for the size of the current operand.
14775 Z -- likewise, with special suffixes for x87 instructions.
14776 * -- print a star (in certain assembler syntax)
14777 A -- print an absolute memory reference.
14778 E -- print address with DImode register names if TARGET_64BIT.
14779 w -- print the operand as if it's a "word" (HImode) even if it isn't.
14780 s -- print a shift double count, followed by the assemblers argument
14781 delimiter.
14782 b -- print the QImode name of the register for the indicated operand.
14783 %b0 would print %al if operands[0] is reg 0.
14784 w -- likewise, print the HImode name of the register.
14785 k -- likewise, print the SImode name of the register.
14786 q -- likewise, print the DImode name of the register.
14787 x -- likewise, print the V4SFmode name of the register.
14788 t -- likewise, print the V8SFmode name of the register.
14789 g -- likewise, print the V16SFmode name of the register.
14790 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
14791 y -- print "st(0)" instead of "st" as a register.
14792 d -- print duplicated register operand for AVX instruction.
14793 D -- print condition for SSE cmp instruction.
14794 P -- if PIC, print an @PLT suffix.
14795 p -- print raw symbol name.
14796 X -- don't print any sort of PIC '@' suffix for a symbol.
14797 & -- print some in-use local-dynamic symbol name.
14798 H -- print a memory address offset by 8; used for sse high-parts
14799 Y -- print condition for XOP pcom* instruction.
14800 + -- print a branch hint as 'cs' or 'ds' prefix
14801 ; -- print a semicolon (after prefixes due to bug in older gas).
14802 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
14803 @ -- print a segment register of thread base pointer load
14804 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
14805 */
14806
14807 void
14808 ix86_print_operand (FILE *file, rtx x, int code)
14809 {
14810 if (code)
14811 {
14812 switch (code)
14813 {
14814 case 'A':
14815 switch (ASSEMBLER_DIALECT)
14816 {
14817 case ASM_ATT:
14818 putc ('*', file);
14819 break;
14820
14821 case ASM_INTEL:
14822 /* Intel syntax. For absolute addresses, registers should not
14823 be surrounded by braces. */
14824 if (!REG_P (x))
14825 {
14826 putc ('[', file);
14827 ix86_print_operand (file, x, 0);
14828 putc (']', file);
14829 return;
14830 }
14831 break;
14832
14833 default:
14834 gcc_unreachable ();
14835 }
14836
14837 ix86_print_operand (file, x, 0);
14838 return;
14839
14840 case 'E':
14841 /* Wrap address in an UNSPEC to declare special handling. */
14842 if (TARGET_64BIT)
14843 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
14844
14845 output_address (x);
14846 return;
14847
14848 case 'L':
14849 if (ASSEMBLER_DIALECT == ASM_ATT)
14850 putc ('l', file);
14851 return;
14852
14853 case 'W':
14854 if (ASSEMBLER_DIALECT == ASM_ATT)
14855 putc ('w', file);
14856 return;
14857
14858 case 'B':
14859 if (ASSEMBLER_DIALECT == ASM_ATT)
14860 putc ('b', file);
14861 return;
14862
14863 case 'Q':
14864 if (ASSEMBLER_DIALECT == ASM_ATT)
14865 putc ('l', file);
14866 return;
14867
14868 case 'S':
14869 if (ASSEMBLER_DIALECT == ASM_ATT)
14870 putc ('s', file);
14871 return;
14872
14873 case 'T':
14874 if (ASSEMBLER_DIALECT == ASM_ATT)
14875 putc ('t', file);
14876 return;
14877
14878 case 'O':
14879 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14880 if (ASSEMBLER_DIALECT != ASM_ATT)
14881 return;
14882
14883 switch (GET_MODE_SIZE (GET_MODE (x)))
14884 {
14885 case 2:
14886 putc ('w', file);
14887 break;
14888
14889 case 4:
14890 putc ('l', file);
14891 break;
14892
14893 case 8:
14894 putc ('q', file);
14895 break;
14896
14897 default:
14898 output_operand_lossage
14899 ("invalid operand size for operand code 'O'");
14900 return;
14901 }
14902
14903 putc ('.', file);
14904 #endif
14905 return;
14906
14907 case 'z':
14908 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
14909 {
14910 /* Opcodes don't get size suffixes if using Intel opcodes. */
14911 if (ASSEMBLER_DIALECT == ASM_INTEL)
14912 return;
14913
14914 switch (GET_MODE_SIZE (GET_MODE (x)))
14915 {
14916 case 1:
14917 putc ('b', file);
14918 return;
14919
14920 case 2:
14921 putc ('w', file);
14922 return;
14923
14924 case 4:
14925 putc ('l', file);
14926 return;
14927
14928 case 8:
14929 putc ('q', file);
14930 return;
14931
14932 default:
14933 output_operand_lossage
14934 ("invalid operand size for operand code 'z'");
14935 return;
14936 }
14937 }
14938
14939 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14940 warning
14941 (0, "non-integer operand used with operand code 'z'");
14942 /* FALLTHRU */
14943
14944 case 'Z':
14945 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
14946 if (ASSEMBLER_DIALECT == ASM_INTEL)
14947 return;
14948
14949 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
14950 {
14951 switch (GET_MODE_SIZE (GET_MODE (x)))
14952 {
14953 case 2:
14954 #ifdef HAVE_AS_IX86_FILDS
14955 putc ('s', file);
14956 #endif
14957 return;
14958
14959 case 4:
14960 putc ('l', file);
14961 return;
14962
14963 case 8:
14964 #ifdef HAVE_AS_IX86_FILDQ
14965 putc ('q', file);
14966 #else
14967 fputs ("ll", file);
14968 #endif
14969 return;
14970
14971 default:
14972 break;
14973 }
14974 }
14975 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14976 {
14977 /* 387 opcodes don't get size suffixes
14978 if the operands are registers. */
14979 if (STACK_REG_P (x))
14980 return;
14981
14982 switch (GET_MODE_SIZE (GET_MODE (x)))
14983 {
14984 case 4:
14985 putc ('s', file);
14986 return;
14987
14988 case 8:
14989 putc ('l', file);
14990 return;
14991
14992 case 12:
14993 case 16:
14994 putc ('t', file);
14995 return;
14996
14997 default:
14998 break;
14999 }
15000 }
15001 else
15002 {
15003 output_operand_lossage
15004 ("invalid operand type used with operand code 'Z'");
15005 return;
15006 }
15007
15008 output_operand_lossage
15009 ("invalid operand size for operand code 'Z'");
15010 return;
15011
15012 case 'd':
15013 case 'b':
15014 case 'w':
15015 case 'k':
15016 case 'q':
15017 case 'h':
15018 case 't':
15019 case 'g':
15020 case 'y':
15021 case 'x':
15022 case 'X':
15023 case 'P':
15024 case 'p':
15025 break;
15026
15027 case 's':
15028 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
15029 {
15030 ix86_print_operand (file, x, 0);
15031 fputs (", ", file);
15032 }
15033 return;
15034
15035 case 'Y':
15036 switch (GET_CODE (x))
15037 {
15038 case NE:
15039 fputs ("neq", file);
15040 break;
15041 case EQ:
15042 fputs ("eq", file);
15043 break;
15044 case GE:
15045 case GEU:
15046 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
15047 break;
15048 case GT:
15049 case GTU:
15050 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
15051 break;
15052 case LE:
15053 case LEU:
15054 fputs ("le", file);
15055 break;
15056 case LT:
15057 case LTU:
15058 fputs ("lt", file);
15059 break;
15060 case UNORDERED:
15061 fputs ("unord", file);
15062 break;
15063 case ORDERED:
15064 fputs ("ord", file);
15065 break;
15066 case UNEQ:
15067 fputs ("ueq", file);
15068 break;
15069 case UNGE:
15070 fputs ("nlt", file);
15071 break;
15072 case UNGT:
15073 fputs ("nle", file);
15074 break;
15075 case UNLE:
15076 fputs ("ule", file);
15077 break;
15078 case UNLT:
15079 fputs ("ult", file);
15080 break;
15081 case LTGT:
15082 fputs ("une", file);
15083 break;
15084 default:
15085 output_operand_lossage ("operand is not a condition code, "
15086 "invalid operand code 'Y'");
15087 return;
15088 }
15089 return;
15090
15091 case 'D':
15092 /* Little bit of braindamage here. The SSE compare instructions
15093 does use completely different names for the comparisons that the
15094 fp conditional moves. */
15095 switch (GET_CODE (x))
15096 {
15097 case UNEQ:
15098 if (TARGET_AVX)
15099 {
15100 fputs ("eq_us", file);
15101 break;
15102 }
15103 case EQ:
15104 fputs ("eq", file);
15105 break;
15106 case UNLT:
15107 if (TARGET_AVX)
15108 {
15109 fputs ("nge", file);
15110 break;
15111 }
15112 case LT:
15113 fputs ("lt", file);
15114 break;
15115 case UNLE:
15116 if (TARGET_AVX)
15117 {
15118 fputs ("ngt", file);
15119 break;
15120 }
15121 case LE:
15122 fputs ("le", file);
15123 break;
15124 case UNORDERED:
15125 fputs ("unord", file);
15126 break;
15127 case LTGT:
15128 if (TARGET_AVX)
15129 {
15130 fputs ("neq_oq", file);
15131 break;
15132 }
15133 case NE:
15134 fputs ("neq", file);
15135 break;
15136 case GE:
15137 if (TARGET_AVX)
15138 {
15139 fputs ("ge", file);
15140 break;
15141 }
15142 case UNGE:
15143 fputs ("nlt", file);
15144 break;
15145 case GT:
15146 if (TARGET_AVX)
15147 {
15148 fputs ("gt", file);
15149 break;
15150 }
15151 case UNGT:
15152 fputs ("nle", file);
15153 break;
15154 case ORDERED:
15155 fputs ("ord", file);
15156 break;
15157 default:
15158 output_operand_lossage ("operand is not a condition code, "
15159 "invalid operand code 'D'");
15160 return;
15161 }
15162 return;
15163
15164 case 'F':
15165 case 'f':
15166 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15167 if (ASSEMBLER_DIALECT == ASM_ATT)
15168 putc ('.', file);
15169 #endif
15170
15171 case 'C':
15172 case 'c':
15173 if (!COMPARISON_P (x))
15174 {
15175 output_operand_lossage ("operand is not a condition code, "
15176 "invalid operand code '%c'", code);
15177 return;
15178 }
15179 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
15180 code == 'c' || code == 'f',
15181 code == 'F' || code == 'f',
15182 file);
15183 return;
15184
15185 case 'H':
15186 if (!offsettable_memref_p (x))
15187 {
15188 output_operand_lossage ("operand is not an offsettable memory "
15189 "reference, invalid operand code 'H'");
15190 return;
15191 }
15192 /* It doesn't actually matter what mode we use here, as we're
15193 only going to use this for printing. */
15194 x = adjust_address_nv (x, DImode, 8);
15195 /* Output 'qword ptr' for intel assembler dialect. */
15196 if (ASSEMBLER_DIALECT == ASM_INTEL)
15197 code = 'q';
15198 break;
15199
15200 case 'K':
15201 gcc_assert (CONST_INT_P (x));
15202
15203 if (INTVAL (x) & IX86_HLE_ACQUIRE)
15204 #ifdef HAVE_AS_IX86_HLE
15205 fputs ("xacquire ", file);
15206 #else
15207 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
15208 #endif
15209 else if (INTVAL (x) & IX86_HLE_RELEASE)
15210 #ifdef HAVE_AS_IX86_HLE
15211 fputs ("xrelease ", file);
15212 #else
15213 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
15214 #endif
15215 /* We do not want to print value of the operand. */
15216 return;
15217
15218 case 'N':
15219 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
15220 fputs ("{z}", file);
15221 return;
15222
15223 case 'r':
15224 gcc_assert (CONST_INT_P (x));
15225 gcc_assert (INTVAL (x) == ROUND_SAE);
15226
15227 if (ASSEMBLER_DIALECT == ASM_INTEL)
15228 fputs (", ", file);
15229
15230 fputs ("{sae}", file);
15231
15232 if (ASSEMBLER_DIALECT == ASM_ATT)
15233 fputs (", ", file);
15234
15235 return;
15236
15237 case 'R':
15238 gcc_assert (CONST_INT_P (x));
15239
15240 if (ASSEMBLER_DIALECT == ASM_INTEL)
15241 fputs (", ", file);
15242
15243 switch (INTVAL (x))
15244 {
15245 case ROUND_NEAREST_INT | ROUND_SAE:
15246 fputs ("{rn-sae}", file);
15247 break;
15248 case ROUND_NEG_INF | ROUND_SAE:
15249 fputs ("{rd-sae}", file);
15250 break;
15251 case ROUND_POS_INF | ROUND_SAE:
15252 fputs ("{ru-sae}", file);
15253 break;
15254 case ROUND_ZERO | ROUND_SAE:
15255 fputs ("{rz-sae}", file);
15256 break;
15257 default:
15258 gcc_unreachable ();
15259 }
15260
15261 if (ASSEMBLER_DIALECT == ASM_ATT)
15262 fputs (", ", file);
15263
15264 return;
15265
15266 case '*':
15267 if (ASSEMBLER_DIALECT == ASM_ATT)
15268 putc ('*', file);
15269 return;
15270
15271 case '&':
15272 {
15273 const char *name = get_some_local_dynamic_name ();
15274 if (name == NULL)
15275 output_operand_lossage ("'%%&' used without any "
15276 "local dynamic TLS references");
15277 else
15278 assemble_name (file, name);
15279 return;
15280 }
15281
15282 case '+':
15283 {
15284 rtx x;
15285
15286 if (!optimize
15287 || optimize_function_for_size_p (cfun)
15288 || !TARGET_BRANCH_PREDICTION_HINTS)
15289 return;
15290
15291 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
15292 if (x)
15293 {
15294 int pred_val = XINT (x, 0);
15295
15296 if (pred_val < REG_BR_PROB_BASE * 45 / 100
15297 || pred_val > REG_BR_PROB_BASE * 55 / 100)
15298 {
15299 bool taken = pred_val > REG_BR_PROB_BASE / 2;
15300 bool cputaken
15301 = final_forward_branch_p (current_output_insn) == 0;
15302
15303 /* Emit hints only in the case default branch prediction
15304 heuristics would fail. */
15305 if (taken != cputaken)
15306 {
15307 /* We use 3e (DS) prefix for taken branches and
15308 2e (CS) prefix for not taken branches. */
15309 if (taken)
15310 fputs ("ds ; ", file);
15311 else
15312 fputs ("cs ; ", file);
15313 }
15314 }
15315 }
15316 return;
15317 }
15318
15319 case ';':
15320 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
15321 putc (';', file);
15322 #endif
15323 return;
15324
15325 case '@':
15326 if (ASSEMBLER_DIALECT == ASM_ATT)
15327 putc ('%', file);
15328
15329 /* The kernel uses a different segment register for performance
15330 reasons; a system call would not have to trash the userspace
15331 segment register, which would be expensive. */
15332 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
15333 fputs ("fs", file);
15334 else
15335 fputs ("gs", file);
15336 return;
15337
15338 case '~':
15339 putc (TARGET_AVX2 ? 'i' : 'f', file);
15340 return;
15341
15342 case '^':
15343 if (TARGET_64BIT && Pmode != word_mode)
15344 fputs ("addr32 ", file);
15345 return;
15346
15347 default:
15348 output_operand_lossage ("invalid operand code '%c'", code);
15349 }
15350 }
15351
15352 if (REG_P (x))
15353 print_reg (x, code, file);
15354
15355 else if (MEM_P (x))
15356 {
15357 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
15358 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
15359 && GET_MODE (x) != BLKmode)
15360 {
15361 const char * size;
15362 switch (GET_MODE_SIZE (GET_MODE (x)))
15363 {
15364 case 1: size = "BYTE"; break;
15365 case 2: size = "WORD"; break;
15366 case 4: size = "DWORD"; break;
15367 case 8: size = "QWORD"; break;
15368 case 12: size = "TBYTE"; break;
15369 case 16:
15370 if (GET_MODE (x) == XFmode)
15371 size = "TBYTE";
15372 else
15373 size = "XMMWORD";
15374 break;
15375 case 32: size = "YMMWORD"; break;
15376 case 64: size = "ZMMWORD"; break;
15377 default:
15378 gcc_unreachable ();
15379 }
15380
15381 /* Check for explicit size override (codes 'b', 'w', 'k',
15382 'q' and 'x') */
15383 if (code == 'b')
15384 size = "BYTE";
15385 else if (code == 'w')
15386 size = "WORD";
15387 else if (code == 'k')
15388 size = "DWORD";
15389 else if (code == 'q')
15390 size = "QWORD";
15391 else if (code == 'x')
15392 size = "XMMWORD";
15393
15394 fputs (size, file);
15395 fputs (" PTR ", file);
15396 }
15397
15398 x = XEXP (x, 0);
15399 /* Avoid (%rip) for call operands. */
15400 if (CONSTANT_ADDRESS_P (x) && code == 'P'
15401 && !CONST_INT_P (x))
15402 output_addr_const (file, x);
15403 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
15404 output_operand_lossage ("invalid constraints for operand");
15405 else
15406 output_address (x);
15407 }
15408
15409 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
15410 {
15411 REAL_VALUE_TYPE r;
15412 long l;
15413
15414 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15415 REAL_VALUE_TO_TARGET_SINGLE (r, l);
15416
15417 if (ASSEMBLER_DIALECT == ASM_ATT)
15418 putc ('$', file);
15419 /* Sign extend 32bit SFmode immediate to 8 bytes. */
15420 if (code == 'q')
15421 fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
15422 (unsigned long long) (int) l);
15423 else
15424 fprintf (file, "0x%08x", (unsigned int) l);
15425 }
15426
15427 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
15428 {
15429 REAL_VALUE_TYPE r;
15430 long l[2];
15431
15432 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15433 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
15434
15435 if (ASSEMBLER_DIALECT == ASM_ATT)
15436 putc ('$', file);
15437 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
15438 }
15439
15440 /* These float cases don't actually occur as immediate operands. */
15441 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
15442 {
15443 char dstr[30];
15444
15445 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
15446 fputs (dstr, file);
15447 }
15448
15449 else
15450 {
15451 /* We have patterns that allow zero sets of memory, for instance.
15452 In 64-bit mode, we should probably support all 8-byte vectors,
15453 since we can in fact encode that into an immediate. */
15454 if (GET_CODE (x) == CONST_VECTOR)
15455 {
15456 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
15457 x = const0_rtx;
15458 }
15459
15460 if (code != 'P' && code != 'p')
15461 {
15462 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
15463 {
15464 if (ASSEMBLER_DIALECT == ASM_ATT)
15465 putc ('$', file);
15466 }
15467 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
15468 || GET_CODE (x) == LABEL_REF)
15469 {
15470 if (ASSEMBLER_DIALECT == ASM_ATT)
15471 putc ('$', file);
15472 else
15473 fputs ("OFFSET FLAT:", file);
15474 }
15475 }
15476 if (CONST_INT_P (x))
15477 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
15478 else if (flag_pic || MACHOPIC_INDIRECT)
15479 output_pic_addr_const (file, x, code);
15480 else
15481 output_addr_const (file, x);
15482 }
15483 }
15484
15485 static bool
15486 ix86_print_operand_punct_valid_p (unsigned char code)
15487 {
15488 return (code == '@' || code == '*' || code == '+' || code == '&'
15489 || code == ';' || code == '~' || code == '^');
15490 }
15491 \f
15492 /* Print a memory operand whose address is ADDR. */
15493
15494 static void
15495 ix86_print_operand_address (FILE *file, rtx addr)
15496 {
15497 struct ix86_address parts;
15498 rtx base, index, disp;
15499 int scale;
15500 int ok;
15501 bool vsib = false;
15502 int code = 0;
15503
15504 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
15505 {
15506 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15507 gcc_assert (parts.index == NULL_RTX);
15508 parts.index = XVECEXP (addr, 0, 1);
15509 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
15510 addr = XVECEXP (addr, 0, 0);
15511 vsib = true;
15512 }
15513 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
15514 {
15515 gcc_assert (TARGET_64BIT);
15516 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15517 code = 'q';
15518 }
15519 else
15520 ok = ix86_decompose_address (addr, &parts);
15521
15522 gcc_assert (ok);
15523
15524 base = parts.base;
15525 index = parts.index;
15526 disp = parts.disp;
15527 scale = parts.scale;
15528
15529 switch (parts.seg)
15530 {
15531 case SEG_DEFAULT:
15532 break;
15533 case SEG_FS:
15534 case SEG_GS:
15535 if (ASSEMBLER_DIALECT == ASM_ATT)
15536 putc ('%', file);
15537 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
15538 break;
15539 default:
15540 gcc_unreachable ();
15541 }
15542
15543 /* Use one byte shorter RIP relative addressing for 64bit mode. */
15544 if (TARGET_64BIT && !base && !index)
15545 {
15546 rtx symbol = disp;
15547
15548 if (GET_CODE (disp) == CONST
15549 && GET_CODE (XEXP (disp, 0)) == PLUS
15550 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
15551 symbol = XEXP (XEXP (disp, 0), 0);
15552
15553 if (GET_CODE (symbol) == LABEL_REF
15554 || (GET_CODE (symbol) == SYMBOL_REF
15555 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
15556 base = pc_rtx;
15557 }
15558 if (!base && !index)
15559 {
15560 /* Displacement only requires special attention. */
15561
15562 if (CONST_INT_P (disp))
15563 {
15564 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
15565 fputs ("ds:", file);
15566 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
15567 }
15568 else if (flag_pic)
15569 output_pic_addr_const (file, disp, 0);
15570 else
15571 output_addr_const (file, disp);
15572 }
15573 else
15574 {
15575 /* Print SImode register names to force addr32 prefix. */
15576 if (SImode_address_operand (addr, VOIDmode))
15577 {
15578 #ifdef ENABLE_CHECKING
15579 gcc_assert (TARGET_64BIT);
15580 switch (GET_CODE (addr))
15581 {
15582 case SUBREG:
15583 gcc_assert (GET_MODE (addr) == SImode);
15584 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
15585 break;
15586 case ZERO_EXTEND:
15587 case AND:
15588 gcc_assert (GET_MODE (addr) == DImode);
15589 break;
15590 default:
15591 gcc_unreachable ();
15592 }
15593 #endif
15594 gcc_assert (!code);
15595 code = 'k';
15596 }
15597 else if (code == 0
15598 && TARGET_X32
15599 && disp
15600 && CONST_INT_P (disp)
15601 && INTVAL (disp) < -16*1024*1024)
15602 {
15603 /* X32 runs in 64-bit mode, where displacement, DISP, in
15604 address DISP(%r64), is encoded as 32-bit immediate sign-
15605 extended from 32-bit to 64-bit. For -0x40000300(%r64),
15606 address is %r64 + 0xffffffffbffffd00. When %r64 <
15607 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
15608 which is invalid for x32. The correct address is %r64
15609 - 0x40000300 == 0xf7ffdd64. To properly encode
15610 -0x40000300(%r64) for x32, we zero-extend negative
15611 displacement by forcing addr32 prefix which truncates
15612 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
15613 zero-extend all negative displacements, including -1(%rsp).
15614 However, for small negative displacements, sign-extension
15615 won't cause overflow. We only zero-extend negative
15616 displacements if they < -16*1024*1024, which is also used
15617 to check legitimate address displacements for PIC. */
15618 code = 'k';
15619 }
15620
15621 if (ASSEMBLER_DIALECT == ASM_ATT)
15622 {
15623 if (disp)
15624 {
15625 if (flag_pic)
15626 output_pic_addr_const (file, disp, 0);
15627 else if (GET_CODE (disp) == LABEL_REF)
15628 output_asm_label (disp);
15629 else
15630 output_addr_const (file, disp);
15631 }
15632
15633 putc ('(', file);
15634 if (base)
15635 print_reg (base, code, file);
15636 if (index)
15637 {
15638 putc (',', file);
15639 print_reg (index, vsib ? 0 : code, file);
15640 if (scale != 1 || vsib)
15641 fprintf (file, ",%d", scale);
15642 }
15643 putc (')', file);
15644 }
15645 else
15646 {
15647 rtx offset = NULL_RTX;
15648
15649 if (disp)
15650 {
15651 /* Pull out the offset of a symbol; print any symbol itself. */
15652 if (GET_CODE (disp) == CONST
15653 && GET_CODE (XEXP (disp, 0)) == PLUS
15654 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
15655 {
15656 offset = XEXP (XEXP (disp, 0), 1);
15657 disp = gen_rtx_CONST (VOIDmode,
15658 XEXP (XEXP (disp, 0), 0));
15659 }
15660
15661 if (flag_pic)
15662 output_pic_addr_const (file, disp, 0);
15663 else if (GET_CODE (disp) == LABEL_REF)
15664 output_asm_label (disp);
15665 else if (CONST_INT_P (disp))
15666 offset = disp;
15667 else
15668 output_addr_const (file, disp);
15669 }
15670
15671 putc ('[', file);
15672 if (base)
15673 {
15674 print_reg (base, code, file);
15675 if (offset)
15676 {
15677 if (INTVAL (offset) >= 0)
15678 putc ('+', file);
15679 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
15680 }
15681 }
15682 else if (offset)
15683 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
15684 else
15685 putc ('0', file);
15686
15687 if (index)
15688 {
15689 putc ('+', file);
15690 print_reg (index, vsib ? 0 : code, file);
15691 if (scale != 1 || vsib)
15692 fprintf (file, "*%d", scale);
15693 }
15694 putc (']', file);
15695 }
15696 }
15697 }
15698
15699 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
15700
15701 static bool
15702 i386_asm_output_addr_const_extra (FILE *file, rtx x)
15703 {
15704 rtx op;
15705
15706 if (GET_CODE (x) != UNSPEC)
15707 return false;
15708
15709 op = XVECEXP (x, 0, 0);
15710 switch (XINT (x, 1))
15711 {
15712 case UNSPEC_GOTTPOFF:
15713 output_addr_const (file, op);
15714 /* FIXME: This might be @TPOFF in Sun ld. */
15715 fputs ("@gottpoff", file);
15716 break;
15717 case UNSPEC_TPOFF:
15718 output_addr_const (file, op);
15719 fputs ("@tpoff", file);
15720 break;
15721 case UNSPEC_NTPOFF:
15722 output_addr_const (file, op);
15723 if (TARGET_64BIT)
15724 fputs ("@tpoff", file);
15725 else
15726 fputs ("@ntpoff", file);
15727 break;
15728 case UNSPEC_DTPOFF:
15729 output_addr_const (file, op);
15730 fputs ("@dtpoff", file);
15731 break;
15732 case UNSPEC_GOTNTPOFF:
15733 output_addr_const (file, op);
15734 if (TARGET_64BIT)
15735 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
15736 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
15737 else
15738 fputs ("@gotntpoff", file);
15739 break;
15740 case UNSPEC_INDNTPOFF:
15741 output_addr_const (file, op);
15742 fputs ("@indntpoff", file);
15743 break;
15744 #if TARGET_MACHO
15745 case UNSPEC_MACHOPIC_OFFSET:
15746 output_addr_const (file, op);
15747 putc ('-', file);
15748 machopic_output_function_base_name (file);
15749 break;
15750 #endif
15751
15752 case UNSPEC_STACK_CHECK:
15753 {
15754 int offset;
15755
15756 gcc_assert (flag_split_stack);
15757
15758 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
15759 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
15760 #else
15761 gcc_unreachable ();
15762 #endif
15763
15764 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
15765 }
15766 break;
15767
15768 default:
15769 return false;
15770 }
15771
15772 return true;
15773 }
15774 \f
15775 /* Split one or more double-mode RTL references into pairs of half-mode
15776 references. The RTL can be REG, offsettable MEM, integer constant, or
15777 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
15778 split and "num" is its length. lo_half and hi_half are output arrays
15779 that parallel "operands". */
15780
15781 void
15782 split_double_mode (enum machine_mode mode, rtx operands[],
15783 int num, rtx lo_half[], rtx hi_half[])
15784 {
15785 enum machine_mode half_mode;
15786 unsigned int byte;
15787
15788 switch (mode)
15789 {
15790 case TImode:
15791 half_mode = DImode;
15792 break;
15793 case DImode:
15794 half_mode = SImode;
15795 break;
15796 default:
15797 gcc_unreachable ();
15798 }
15799
15800 byte = GET_MODE_SIZE (half_mode);
15801
15802 while (num--)
15803 {
15804 rtx op = operands[num];
15805
15806 /* simplify_subreg refuse to split volatile memory addresses,
15807 but we still have to handle it. */
15808 if (MEM_P (op))
15809 {
15810 lo_half[num] = adjust_address (op, half_mode, 0);
15811 hi_half[num] = adjust_address (op, half_mode, byte);
15812 }
15813 else
15814 {
15815 lo_half[num] = simplify_gen_subreg (half_mode, op,
15816 GET_MODE (op) == VOIDmode
15817 ? mode : GET_MODE (op), 0);
15818 hi_half[num] = simplify_gen_subreg (half_mode, op,
15819 GET_MODE (op) == VOIDmode
15820 ? mode : GET_MODE (op), byte);
15821 }
15822 }
15823 }
15824 \f
15825 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
15826 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
15827 is the expression of the binary operation. The output may either be
15828 emitted here, or returned to the caller, like all output_* functions.
15829
15830 There is no guarantee that the operands are the same mode, as they
15831 might be within FLOAT or FLOAT_EXTEND expressions. */
15832
15833 #ifndef SYSV386_COMPAT
15834 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
15835 wants to fix the assemblers because that causes incompatibility
15836 with gcc. No-one wants to fix gcc because that causes
15837 incompatibility with assemblers... You can use the option of
15838 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
15839 #define SYSV386_COMPAT 1
15840 #endif
15841
15842 const char *
15843 output_387_binary_op (rtx insn, rtx *operands)
15844 {
15845 static char buf[40];
15846 const char *p;
15847 const char *ssep;
15848 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
15849
15850 #ifdef ENABLE_CHECKING
15851 /* Even if we do not want to check the inputs, this documents input
15852 constraints. Which helps in understanding the following code. */
15853 if (STACK_REG_P (operands[0])
15854 && ((REG_P (operands[1])
15855 && REGNO (operands[0]) == REGNO (operands[1])
15856 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
15857 || (REG_P (operands[2])
15858 && REGNO (operands[0]) == REGNO (operands[2])
15859 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
15860 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
15861 ; /* ok */
15862 else
15863 gcc_assert (is_sse);
15864 #endif
15865
15866 switch (GET_CODE (operands[3]))
15867 {
15868 case PLUS:
15869 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15870 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15871 p = "fiadd";
15872 else
15873 p = "fadd";
15874 ssep = "vadd";
15875 break;
15876
15877 case MINUS:
15878 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15879 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15880 p = "fisub";
15881 else
15882 p = "fsub";
15883 ssep = "vsub";
15884 break;
15885
15886 case MULT:
15887 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15888 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15889 p = "fimul";
15890 else
15891 p = "fmul";
15892 ssep = "vmul";
15893 break;
15894
15895 case DIV:
15896 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15897 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15898 p = "fidiv";
15899 else
15900 p = "fdiv";
15901 ssep = "vdiv";
15902 break;
15903
15904 default:
15905 gcc_unreachable ();
15906 }
15907
15908 if (is_sse)
15909 {
15910 if (TARGET_AVX)
15911 {
15912 strcpy (buf, ssep);
15913 if (GET_MODE (operands[0]) == SFmode)
15914 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
15915 else
15916 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
15917 }
15918 else
15919 {
15920 strcpy (buf, ssep + 1);
15921 if (GET_MODE (operands[0]) == SFmode)
15922 strcat (buf, "ss\t{%2, %0|%0, %2}");
15923 else
15924 strcat (buf, "sd\t{%2, %0|%0, %2}");
15925 }
15926 return buf;
15927 }
15928 strcpy (buf, p);
15929
15930 switch (GET_CODE (operands[3]))
15931 {
15932 case MULT:
15933 case PLUS:
15934 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
15935 {
15936 rtx temp = operands[2];
15937 operands[2] = operands[1];
15938 operands[1] = temp;
15939 }
15940
15941 /* know operands[0] == operands[1]. */
15942
15943 if (MEM_P (operands[2]))
15944 {
15945 p = "%Z2\t%2";
15946 break;
15947 }
15948
15949 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
15950 {
15951 if (STACK_TOP_P (operands[0]))
15952 /* How is it that we are storing to a dead operand[2]?
15953 Well, presumably operands[1] is dead too. We can't
15954 store the result to st(0) as st(0) gets popped on this
15955 instruction. Instead store to operands[2] (which I
15956 think has to be st(1)). st(1) will be popped later.
15957 gcc <= 2.8.1 didn't have this check and generated
15958 assembly code that the Unixware assembler rejected. */
15959 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15960 else
15961 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15962 break;
15963 }
15964
15965 if (STACK_TOP_P (operands[0]))
15966 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15967 else
15968 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15969 break;
15970
15971 case MINUS:
15972 case DIV:
15973 if (MEM_P (operands[1]))
15974 {
15975 p = "r%Z1\t%1";
15976 break;
15977 }
15978
15979 if (MEM_P (operands[2]))
15980 {
15981 p = "%Z2\t%2";
15982 break;
15983 }
15984
15985 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
15986 {
15987 #if SYSV386_COMPAT
15988 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
15989 derived assemblers, confusingly reverse the direction of
15990 the operation for fsub{r} and fdiv{r} when the
15991 destination register is not st(0). The Intel assembler
15992 doesn't have this brain damage. Read !SYSV386_COMPAT to
15993 figure out what the hardware really does. */
15994 if (STACK_TOP_P (operands[0]))
15995 p = "{p\t%0, %2|rp\t%2, %0}";
15996 else
15997 p = "{rp\t%2, %0|p\t%0, %2}";
15998 #else
15999 if (STACK_TOP_P (operands[0]))
16000 /* As above for fmul/fadd, we can't store to st(0). */
16001 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
16002 else
16003 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
16004 #endif
16005 break;
16006 }
16007
16008 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
16009 {
16010 #if SYSV386_COMPAT
16011 if (STACK_TOP_P (operands[0]))
16012 p = "{rp\t%0, %1|p\t%1, %0}";
16013 else
16014 p = "{p\t%1, %0|rp\t%0, %1}";
16015 #else
16016 if (STACK_TOP_P (operands[0]))
16017 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
16018 else
16019 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
16020 #endif
16021 break;
16022 }
16023
16024 if (STACK_TOP_P (operands[0]))
16025 {
16026 if (STACK_TOP_P (operands[1]))
16027 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
16028 else
16029 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
16030 break;
16031 }
16032 else if (STACK_TOP_P (operands[1]))
16033 {
16034 #if SYSV386_COMPAT
16035 p = "{\t%1, %0|r\t%0, %1}";
16036 #else
16037 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
16038 #endif
16039 }
16040 else
16041 {
16042 #if SYSV386_COMPAT
16043 p = "{r\t%2, %0|\t%0, %2}";
16044 #else
16045 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16046 #endif
16047 }
16048 break;
16049
16050 default:
16051 gcc_unreachable ();
16052 }
16053
16054 strcat (buf, p);
16055 return buf;
16056 }
16057
16058 /* Check if a 256bit AVX register is referenced inside of EXP. */
16059
16060 static int
16061 ix86_check_avx256_register (rtx *pexp, void *)
16062 {
16063 rtx exp = *pexp;
16064
16065 if (GET_CODE (exp) == SUBREG)
16066 exp = SUBREG_REG (exp);
16067
16068 if (REG_P (exp)
16069 && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp)))
16070 return 1;
16071
16072 return 0;
16073 }
16074
16075 /* Return needed mode for entity in optimize_mode_switching pass. */
16076
16077 static int
16078 ix86_avx_u128_mode_needed (rtx insn)
16079 {
16080 if (CALL_P (insn))
16081 {
16082 rtx link;
16083
16084 /* Needed mode is set to AVX_U128_CLEAN if there are
16085 no 256bit modes used in function arguments. */
16086 for (link = CALL_INSN_FUNCTION_USAGE (insn);
16087 link;
16088 link = XEXP (link, 1))
16089 {
16090 if (GET_CODE (XEXP (link, 0)) == USE)
16091 {
16092 rtx arg = XEXP (XEXP (link, 0), 0);
16093
16094 if (ix86_check_avx256_register (&arg, NULL))
16095 return AVX_U128_DIRTY;
16096 }
16097 }
16098
16099 return AVX_U128_CLEAN;
16100 }
16101
16102 /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware
16103 changes state only when a 256bit register is written to, but we need
16104 to prevent the compiler from moving optimal insertion point above
16105 eventual read from 256bit register. */
16106 if (for_each_rtx (&PATTERN (insn), ix86_check_avx256_register, NULL))
16107 return AVX_U128_DIRTY;
16108
16109 return AVX_U128_ANY;
16110 }
16111
16112 /* Return mode that i387 must be switched into
16113 prior to the execution of insn. */
16114
16115 static int
16116 ix86_i387_mode_needed (int entity, rtx insn)
16117 {
16118 enum attr_i387_cw mode;
16119
16120 /* The mode UNINITIALIZED is used to store control word after a
16121 function call or ASM pattern. The mode ANY specify that function
16122 has no requirements on the control word and make no changes in the
16123 bits we are interested in. */
16124
16125 if (CALL_P (insn)
16126 || (NONJUMP_INSN_P (insn)
16127 && (asm_noperands (PATTERN (insn)) >= 0
16128 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
16129 return I387_CW_UNINITIALIZED;
16130
16131 if (recog_memoized (insn) < 0)
16132 return I387_CW_ANY;
16133
16134 mode = get_attr_i387_cw (insn);
16135
16136 switch (entity)
16137 {
16138 case I387_TRUNC:
16139 if (mode == I387_CW_TRUNC)
16140 return mode;
16141 break;
16142
16143 case I387_FLOOR:
16144 if (mode == I387_CW_FLOOR)
16145 return mode;
16146 break;
16147
16148 case I387_CEIL:
16149 if (mode == I387_CW_CEIL)
16150 return mode;
16151 break;
16152
16153 case I387_MASK_PM:
16154 if (mode == I387_CW_MASK_PM)
16155 return mode;
16156 break;
16157
16158 default:
16159 gcc_unreachable ();
16160 }
16161
16162 return I387_CW_ANY;
16163 }
16164
16165 /* Return mode that entity must be switched into
16166 prior to the execution of insn. */
16167
16168 static int
16169 ix86_mode_needed (int entity, rtx insn)
16170 {
16171 switch (entity)
16172 {
16173 case AVX_U128:
16174 return ix86_avx_u128_mode_needed (insn);
16175 case I387_TRUNC:
16176 case I387_FLOOR:
16177 case I387_CEIL:
16178 case I387_MASK_PM:
16179 return ix86_i387_mode_needed (entity, insn);
16180 default:
16181 gcc_unreachable ();
16182 }
16183 return 0;
16184 }
16185
16186 /* Check if a 256bit AVX register is referenced in stores. */
16187
16188 static void
16189 ix86_check_avx256_stores (rtx dest, const_rtx, void *data)
16190 {
16191 if (ix86_check_avx256_register (&dest, NULL))
16192 {
16193 bool *used = (bool *) data;
16194 *used = true;
16195 }
16196 }
16197
16198 /* Calculate mode of upper 128bit AVX registers after the insn. */
16199
16200 static int
16201 ix86_avx_u128_mode_after (int mode, rtx insn)
16202 {
16203 rtx pat = PATTERN (insn);
16204
16205 if (vzeroupper_operation (pat, VOIDmode)
16206 || vzeroall_operation (pat, VOIDmode))
16207 return AVX_U128_CLEAN;
16208
16209 /* We know that state is clean after CALL insn if there are no
16210 256bit registers used in the function return register. */
16211 if (CALL_P (insn))
16212 {
16213 bool avx_reg256_found = false;
16214 note_stores (pat, ix86_check_avx256_stores, &avx_reg256_found);
16215
16216 return avx_reg256_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
16217 }
16218
16219 /* Otherwise, return current mode. Remember that if insn
16220 references AVX 256bit registers, the mode was already changed
16221 to DIRTY from MODE_NEEDED. */
16222 return mode;
16223 }
16224
16225 /* Return the mode that an insn results in. */
16226
16227 int
16228 ix86_mode_after (int entity, int mode, rtx insn)
16229 {
16230 switch (entity)
16231 {
16232 case AVX_U128:
16233 return ix86_avx_u128_mode_after (mode, insn);
16234 case I387_TRUNC:
16235 case I387_FLOOR:
16236 case I387_CEIL:
16237 case I387_MASK_PM:
16238 return mode;
16239 default:
16240 gcc_unreachable ();
16241 }
16242 }
16243
16244 static int
16245 ix86_avx_u128_mode_entry (void)
16246 {
16247 tree arg;
16248
16249 /* Entry mode is set to AVX_U128_DIRTY if there are
16250 256bit modes used in function arguments. */
16251 for (arg = DECL_ARGUMENTS (current_function_decl); arg;
16252 arg = TREE_CHAIN (arg))
16253 {
16254 rtx incoming = DECL_INCOMING_RTL (arg);
16255
16256 if (incoming && ix86_check_avx256_register (&incoming, NULL))
16257 return AVX_U128_DIRTY;
16258 }
16259
16260 return AVX_U128_CLEAN;
16261 }
16262
16263 /* Return a mode that ENTITY is assumed to be
16264 switched to at function entry. */
16265
16266 static int
16267 ix86_mode_entry (int entity)
16268 {
16269 switch (entity)
16270 {
16271 case AVX_U128:
16272 return ix86_avx_u128_mode_entry ();
16273 case I387_TRUNC:
16274 case I387_FLOOR:
16275 case I387_CEIL:
16276 case I387_MASK_PM:
16277 return I387_CW_ANY;
16278 default:
16279 gcc_unreachable ();
16280 }
16281 }
16282
16283 static int
16284 ix86_avx_u128_mode_exit (void)
16285 {
16286 rtx reg = crtl->return_rtx;
16287
16288 /* Exit mode is set to AVX_U128_DIRTY if there are
16289 256bit modes used in the function return register. */
16290 if (reg && ix86_check_avx256_register (&reg, NULL))
16291 return AVX_U128_DIRTY;
16292
16293 return AVX_U128_CLEAN;
16294 }
16295
16296 /* Return a mode that ENTITY is assumed to be
16297 switched to at function exit. */
16298
16299 static int
16300 ix86_mode_exit (int entity)
16301 {
16302 switch (entity)
16303 {
16304 case AVX_U128:
16305 return ix86_avx_u128_mode_exit ();
16306 case I387_TRUNC:
16307 case I387_FLOOR:
16308 case I387_CEIL:
16309 case I387_MASK_PM:
16310 return I387_CW_ANY;
16311 default:
16312 gcc_unreachable ();
16313 }
16314 }
16315
16316 static int
16317 ix86_mode_priority (int, int n)
16318 {
16319 return n;
16320 }
16321
16322 /* Output code to initialize control word copies used by trunc?f?i and
16323 rounding patterns. CURRENT_MODE is set to current control word,
16324 while NEW_MODE is set to new control word. */
16325
16326 static void
16327 emit_i387_cw_initialization (int mode)
16328 {
16329 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
16330 rtx new_mode;
16331
16332 enum ix86_stack_slot slot;
16333
16334 rtx reg = gen_reg_rtx (HImode);
16335
16336 emit_insn (gen_x86_fnstcw_1 (stored_mode));
16337 emit_move_insn (reg, copy_rtx (stored_mode));
16338
16339 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
16340 || optimize_insn_for_size_p ())
16341 {
16342 switch (mode)
16343 {
16344 case I387_CW_TRUNC:
16345 /* round toward zero (truncate) */
16346 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
16347 slot = SLOT_CW_TRUNC;
16348 break;
16349
16350 case I387_CW_FLOOR:
16351 /* round down toward -oo */
16352 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16353 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
16354 slot = SLOT_CW_FLOOR;
16355 break;
16356
16357 case I387_CW_CEIL:
16358 /* round up toward +oo */
16359 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16360 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
16361 slot = SLOT_CW_CEIL;
16362 break;
16363
16364 case I387_CW_MASK_PM:
16365 /* mask precision exception for nearbyint() */
16366 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16367 slot = SLOT_CW_MASK_PM;
16368 break;
16369
16370 default:
16371 gcc_unreachable ();
16372 }
16373 }
16374 else
16375 {
16376 switch (mode)
16377 {
16378 case I387_CW_TRUNC:
16379 /* round toward zero (truncate) */
16380 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
16381 slot = SLOT_CW_TRUNC;
16382 break;
16383
16384 case I387_CW_FLOOR:
16385 /* round down toward -oo */
16386 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
16387 slot = SLOT_CW_FLOOR;
16388 break;
16389
16390 case I387_CW_CEIL:
16391 /* round up toward +oo */
16392 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
16393 slot = SLOT_CW_CEIL;
16394 break;
16395
16396 case I387_CW_MASK_PM:
16397 /* mask precision exception for nearbyint() */
16398 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16399 slot = SLOT_CW_MASK_PM;
16400 break;
16401
16402 default:
16403 gcc_unreachable ();
16404 }
16405 }
16406
16407 gcc_assert (slot < MAX_386_STACK_LOCALS);
16408
16409 new_mode = assign_386_stack_local (HImode, slot);
16410 emit_move_insn (new_mode, reg);
16411 }
16412
16413 /* Emit vzeroupper. */
16414
16415 void
16416 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live)
16417 {
16418 int i;
16419
16420 /* Cancel automatic vzeroupper insertion if there are
16421 live call-saved SSE registers at the insertion point. */
16422
16423 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
16424 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16425 return;
16426
16427 if (TARGET_64BIT)
16428 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
16429 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16430 return;
16431
16432 emit_insn (gen_avx_vzeroupper ());
16433 }
16434
16435 /* Generate one or more insns to set ENTITY to MODE. */
16436
16437 /* Generate one or more insns to set ENTITY to MODE. HARD_REG_LIVE
16438 is the set of hard registers live at the point where the insn(s)
16439 are to be inserted. */
16440
16441 static void
16442 ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED,
16443 HARD_REG_SET regs_live)
16444 {
16445 switch (entity)
16446 {
16447 case AVX_U128:
16448 if (mode == AVX_U128_CLEAN)
16449 ix86_avx_emit_vzeroupper (regs_live);
16450 break;
16451 case I387_TRUNC:
16452 case I387_FLOOR:
16453 case I387_CEIL:
16454 case I387_MASK_PM:
16455 if (mode != I387_CW_ANY
16456 && mode != I387_CW_UNINITIALIZED)
16457 emit_i387_cw_initialization (mode);
16458 break;
16459 default:
16460 gcc_unreachable ();
16461 }
16462 }
16463
16464 /* Output code for INSN to convert a float to a signed int. OPERANDS
16465 are the insn operands. The output may be [HSD]Imode and the input
16466 operand may be [SDX]Fmode. */
16467
16468 const char *
16469 output_fix_trunc (rtx insn, rtx *operands, bool fisttp)
16470 {
16471 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
16472 int dimode_p = GET_MODE (operands[0]) == DImode;
16473 int round_mode = get_attr_i387_cw (insn);
16474
16475 /* Jump through a hoop or two for DImode, since the hardware has no
16476 non-popping instruction. We used to do this a different way, but
16477 that was somewhat fragile and broke with post-reload splitters. */
16478 if ((dimode_p || fisttp) && !stack_top_dies)
16479 output_asm_insn ("fld\t%y1", operands);
16480
16481 gcc_assert (STACK_TOP_P (operands[1]));
16482 gcc_assert (MEM_P (operands[0]));
16483 gcc_assert (GET_MODE (operands[1]) != TFmode);
16484
16485 if (fisttp)
16486 output_asm_insn ("fisttp%Z0\t%0", operands);
16487 else
16488 {
16489 if (round_mode != I387_CW_ANY)
16490 output_asm_insn ("fldcw\t%3", operands);
16491 if (stack_top_dies || dimode_p)
16492 output_asm_insn ("fistp%Z0\t%0", operands);
16493 else
16494 output_asm_insn ("fist%Z0\t%0", operands);
16495 if (round_mode != I387_CW_ANY)
16496 output_asm_insn ("fldcw\t%2", operands);
16497 }
16498
16499 return "";
16500 }
16501
16502 /* Output code for x87 ffreep insn. The OPNO argument, which may only
16503 have the values zero or one, indicates the ffreep insn's operand
16504 from the OPERANDS array. */
16505
16506 static const char *
16507 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
16508 {
16509 if (TARGET_USE_FFREEP)
16510 #ifdef HAVE_AS_IX86_FFREEP
16511 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
16512 #else
16513 {
16514 static char retval[32];
16515 int regno = REGNO (operands[opno]);
16516
16517 gcc_assert (STACK_REGNO_P (regno));
16518
16519 regno -= FIRST_STACK_REG;
16520
16521 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
16522 return retval;
16523 }
16524 #endif
16525
16526 return opno ? "fstp\t%y1" : "fstp\t%y0";
16527 }
16528
16529
16530 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
16531 should be used. UNORDERED_P is true when fucom should be used. */
16532
16533 const char *
16534 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
16535 {
16536 int stack_top_dies;
16537 rtx cmp_op0, cmp_op1;
16538 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
16539
16540 if (eflags_p)
16541 {
16542 cmp_op0 = operands[0];
16543 cmp_op1 = operands[1];
16544 }
16545 else
16546 {
16547 cmp_op0 = operands[1];
16548 cmp_op1 = operands[2];
16549 }
16550
16551 if (is_sse)
16552 {
16553 if (GET_MODE (operands[0]) == SFmode)
16554 if (unordered_p)
16555 return "%vucomiss\t{%1, %0|%0, %1}";
16556 else
16557 return "%vcomiss\t{%1, %0|%0, %1}";
16558 else
16559 if (unordered_p)
16560 return "%vucomisd\t{%1, %0|%0, %1}";
16561 else
16562 return "%vcomisd\t{%1, %0|%0, %1}";
16563 }
16564
16565 gcc_assert (STACK_TOP_P (cmp_op0));
16566
16567 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
16568
16569 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
16570 {
16571 if (stack_top_dies)
16572 {
16573 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
16574 return output_387_ffreep (operands, 1);
16575 }
16576 else
16577 return "ftst\n\tfnstsw\t%0";
16578 }
16579
16580 if (STACK_REG_P (cmp_op1)
16581 && stack_top_dies
16582 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
16583 && REGNO (cmp_op1) != FIRST_STACK_REG)
16584 {
16585 /* If both the top of the 387 stack dies, and the other operand
16586 is also a stack register that dies, then this must be a
16587 `fcompp' float compare */
16588
16589 if (eflags_p)
16590 {
16591 /* There is no double popping fcomi variant. Fortunately,
16592 eflags is immune from the fstp's cc clobbering. */
16593 if (unordered_p)
16594 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
16595 else
16596 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
16597 return output_387_ffreep (operands, 0);
16598 }
16599 else
16600 {
16601 if (unordered_p)
16602 return "fucompp\n\tfnstsw\t%0";
16603 else
16604 return "fcompp\n\tfnstsw\t%0";
16605 }
16606 }
16607 else
16608 {
16609 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
16610
16611 static const char * const alt[16] =
16612 {
16613 "fcom%Z2\t%y2\n\tfnstsw\t%0",
16614 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
16615 "fucom%Z2\t%y2\n\tfnstsw\t%0",
16616 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
16617
16618 "ficom%Z2\t%y2\n\tfnstsw\t%0",
16619 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
16620 NULL,
16621 NULL,
16622
16623 "fcomi\t{%y1, %0|%0, %y1}",
16624 "fcomip\t{%y1, %0|%0, %y1}",
16625 "fucomi\t{%y1, %0|%0, %y1}",
16626 "fucomip\t{%y1, %0|%0, %y1}",
16627
16628 NULL,
16629 NULL,
16630 NULL,
16631 NULL
16632 };
16633
16634 int mask;
16635 const char *ret;
16636
16637 mask = eflags_p << 3;
16638 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
16639 mask |= unordered_p << 1;
16640 mask |= stack_top_dies;
16641
16642 gcc_assert (mask < 16);
16643 ret = alt[mask];
16644 gcc_assert (ret);
16645
16646 return ret;
16647 }
16648 }
16649
16650 void
16651 ix86_output_addr_vec_elt (FILE *file, int value)
16652 {
16653 const char *directive = ASM_LONG;
16654
16655 #ifdef ASM_QUAD
16656 if (TARGET_LP64)
16657 directive = ASM_QUAD;
16658 #else
16659 gcc_assert (!TARGET_64BIT);
16660 #endif
16661
16662 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
16663 }
16664
16665 void
16666 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
16667 {
16668 const char *directive = ASM_LONG;
16669
16670 #ifdef ASM_QUAD
16671 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
16672 directive = ASM_QUAD;
16673 #else
16674 gcc_assert (!TARGET_64BIT);
16675 #endif
16676 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
16677 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
16678 fprintf (file, "%s%s%d-%s%d\n",
16679 directive, LPREFIX, value, LPREFIX, rel);
16680 else if (HAVE_AS_GOTOFF_IN_DATA)
16681 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
16682 #if TARGET_MACHO
16683 else if (TARGET_MACHO)
16684 {
16685 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
16686 machopic_output_function_base_name (file);
16687 putc ('\n', file);
16688 }
16689 #endif
16690 else
16691 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
16692 GOT_SYMBOL_NAME, LPREFIX, value);
16693 }
16694 \f
16695 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
16696 for the target. */
16697
16698 void
16699 ix86_expand_clear (rtx dest)
16700 {
16701 rtx tmp;
16702
16703 /* We play register width games, which are only valid after reload. */
16704 gcc_assert (reload_completed);
16705
16706 /* Avoid HImode and its attendant prefix byte. */
16707 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
16708 dest = gen_rtx_REG (SImode, REGNO (dest));
16709 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
16710
16711 if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
16712 {
16713 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16714 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
16715 }
16716
16717 emit_insn (tmp);
16718 }
16719
16720 /* X is an unchanging MEM. If it is a constant pool reference, return
16721 the constant pool rtx, else NULL. */
16722
16723 rtx
16724 maybe_get_pool_constant (rtx x)
16725 {
16726 x = ix86_delegitimize_address (XEXP (x, 0));
16727
16728 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
16729 return get_pool_constant (x);
16730
16731 return NULL_RTX;
16732 }
16733
16734 void
16735 ix86_expand_move (enum machine_mode mode, rtx operands[])
16736 {
16737 rtx op0, op1;
16738 enum tls_model model;
16739
16740 op0 = operands[0];
16741 op1 = operands[1];
16742
16743 if (GET_CODE (op1) == SYMBOL_REF)
16744 {
16745 rtx tmp;
16746
16747 model = SYMBOL_REF_TLS_MODEL (op1);
16748 if (model)
16749 {
16750 op1 = legitimize_tls_address (op1, model, true);
16751 op1 = force_operand (op1, op0);
16752 if (op1 == op0)
16753 return;
16754 op1 = convert_to_mode (mode, op1, 1);
16755 }
16756 else if ((tmp = legitimize_pe_coff_symbol (op1, false)) != NULL_RTX)
16757 op1 = tmp;
16758 }
16759 else if (GET_CODE (op1) == CONST
16760 && GET_CODE (XEXP (op1, 0)) == PLUS
16761 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
16762 {
16763 rtx addend = XEXP (XEXP (op1, 0), 1);
16764 rtx symbol = XEXP (XEXP (op1, 0), 0);
16765 rtx tmp;
16766
16767 model = SYMBOL_REF_TLS_MODEL (symbol);
16768 if (model)
16769 tmp = legitimize_tls_address (symbol, model, true);
16770 else
16771 tmp = legitimize_pe_coff_symbol (symbol, true);
16772
16773 if (tmp)
16774 {
16775 tmp = force_operand (tmp, NULL);
16776 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
16777 op0, 1, OPTAB_DIRECT);
16778 if (tmp == op0)
16779 return;
16780 op1 = convert_to_mode (mode, tmp, 1);
16781 }
16782 }
16783
16784 if ((flag_pic || MACHOPIC_INDIRECT)
16785 && symbolic_operand (op1, mode))
16786 {
16787 if (TARGET_MACHO && !TARGET_64BIT)
16788 {
16789 #if TARGET_MACHO
16790 /* dynamic-no-pic */
16791 if (MACHOPIC_INDIRECT)
16792 {
16793 rtx temp = ((reload_in_progress
16794 || ((op0 && REG_P (op0))
16795 && mode == Pmode))
16796 ? op0 : gen_reg_rtx (Pmode));
16797 op1 = machopic_indirect_data_reference (op1, temp);
16798 if (MACHOPIC_PURE)
16799 op1 = machopic_legitimize_pic_address (op1, mode,
16800 temp == op1 ? 0 : temp);
16801 }
16802 if (op0 != op1 && GET_CODE (op0) != MEM)
16803 {
16804 rtx insn = gen_rtx_SET (VOIDmode, op0, op1);
16805 emit_insn (insn);
16806 return;
16807 }
16808 if (GET_CODE (op0) == MEM)
16809 op1 = force_reg (Pmode, op1);
16810 else
16811 {
16812 rtx temp = op0;
16813 if (GET_CODE (temp) != REG)
16814 temp = gen_reg_rtx (Pmode);
16815 temp = legitimize_pic_address (op1, temp);
16816 if (temp == op0)
16817 return;
16818 op1 = temp;
16819 }
16820 /* dynamic-no-pic */
16821 #endif
16822 }
16823 else
16824 {
16825 if (MEM_P (op0))
16826 op1 = force_reg (mode, op1);
16827 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
16828 {
16829 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
16830 op1 = legitimize_pic_address (op1, reg);
16831 if (op0 == op1)
16832 return;
16833 op1 = convert_to_mode (mode, op1, 1);
16834 }
16835 }
16836 }
16837 else
16838 {
16839 if (MEM_P (op0)
16840 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
16841 || !push_operand (op0, mode))
16842 && MEM_P (op1))
16843 op1 = force_reg (mode, op1);
16844
16845 if (push_operand (op0, mode)
16846 && ! general_no_elim_operand (op1, mode))
16847 op1 = copy_to_mode_reg (mode, op1);
16848
16849 /* Force large constants in 64bit compilation into register
16850 to get them CSEed. */
16851 if (can_create_pseudo_p ()
16852 && (mode == DImode) && TARGET_64BIT
16853 && immediate_operand (op1, mode)
16854 && !x86_64_zext_immediate_operand (op1, VOIDmode)
16855 && !register_operand (op0, mode)
16856 && optimize)
16857 op1 = copy_to_mode_reg (mode, op1);
16858
16859 if (can_create_pseudo_p ()
16860 && FLOAT_MODE_P (mode)
16861 && GET_CODE (op1) == CONST_DOUBLE)
16862 {
16863 /* If we are loading a floating point constant to a register,
16864 force the value to memory now, since we'll get better code
16865 out the back end. */
16866
16867 op1 = validize_mem (force_const_mem (mode, op1));
16868 if (!register_operand (op0, mode))
16869 {
16870 rtx temp = gen_reg_rtx (mode);
16871 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
16872 emit_move_insn (op0, temp);
16873 return;
16874 }
16875 }
16876 }
16877
16878 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
16879 }
16880
16881 void
16882 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
16883 {
16884 rtx op0 = operands[0], op1 = operands[1];
16885 unsigned int align = GET_MODE_ALIGNMENT (mode);
16886
16887 if (push_operand (op0, VOIDmode))
16888 op0 = emit_move_resolve_push (mode, op0);
16889
16890 /* Force constants other than zero into memory. We do not know how
16891 the instructions used to build constants modify the upper 64 bits
16892 of the register, once we have that information we may be able
16893 to handle some of them more efficiently. */
16894 if (can_create_pseudo_p ()
16895 && register_operand (op0, mode)
16896 && (CONSTANT_P (op1)
16897 || (GET_CODE (op1) == SUBREG
16898 && CONSTANT_P (SUBREG_REG (op1))))
16899 && !standard_sse_constant_p (op1))
16900 op1 = validize_mem (force_const_mem (mode, op1));
16901
16902 /* We need to check memory alignment for SSE mode since attribute
16903 can make operands unaligned. */
16904 if (can_create_pseudo_p ()
16905 && SSE_REG_MODE_P (mode)
16906 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
16907 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
16908 {
16909 rtx tmp[2];
16910
16911 /* ix86_expand_vector_move_misalign() does not like constants ... */
16912 if (CONSTANT_P (op1)
16913 || (GET_CODE (op1) == SUBREG
16914 && CONSTANT_P (SUBREG_REG (op1))))
16915 op1 = validize_mem (force_const_mem (mode, op1));
16916
16917 /* ... nor both arguments in memory. */
16918 if (!register_operand (op0, mode)
16919 && !register_operand (op1, mode))
16920 op1 = force_reg (mode, op1);
16921
16922 tmp[0] = op0; tmp[1] = op1;
16923 ix86_expand_vector_move_misalign (mode, tmp);
16924 return;
16925 }
16926
16927 /* Make operand1 a register if it isn't already. */
16928 if (can_create_pseudo_p ()
16929 && !register_operand (op0, mode)
16930 && !register_operand (op1, mode))
16931 {
16932 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
16933 return;
16934 }
16935
16936 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
16937 }
16938
16939 /* Split 32-byte AVX unaligned load and store if needed. */
16940
16941 static void
16942 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
16943 {
16944 rtx m;
16945 rtx (*extract) (rtx, rtx, rtx);
16946 rtx (*load_unaligned) (rtx, rtx);
16947 rtx (*store_unaligned) (rtx, rtx);
16948 enum machine_mode mode;
16949
16950 switch (GET_MODE (op0))
16951 {
16952 default:
16953 gcc_unreachable ();
16954 case V32QImode:
16955 extract = gen_avx_vextractf128v32qi;
16956 load_unaligned = gen_avx_loaddquv32qi;
16957 store_unaligned = gen_avx_storedquv32qi;
16958 mode = V16QImode;
16959 break;
16960 case V8SFmode:
16961 extract = gen_avx_vextractf128v8sf;
16962 load_unaligned = gen_avx_loadups256;
16963 store_unaligned = gen_avx_storeups256;
16964 mode = V4SFmode;
16965 break;
16966 case V4DFmode:
16967 extract = gen_avx_vextractf128v4df;
16968 load_unaligned = gen_avx_loadupd256;
16969 store_unaligned = gen_avx_storeupd256;
16970 mode = V2DFmode;
16971 break;
16972 }
16973
16974 if (MEM_P (op1))
16975 {
16976 if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
16977 {
16978 rtx r = gen_reg_rtx (mode);
16979 m = adjust_address (op1, mode, 0);
16980 emit_move_insn (r, m);
16981 m = adjust_address (op1, mode, 16);
16982 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
16983 emit_move_insn (op0, r);
16984 }
16985 /* Normal *mov<mode>_internal pattern will handle
16986 unaligned loads just fine if misaligned_operand
16987 is true, and without the UNSPEC it can be combined
16988 with arithmetic instructions. */
16989 else if (misaligned_operand (op1, GET_MODE (op1)))
16990 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
16991 else
16992 emit_insn (load_unaligned (op0, op1));
16993 }
16994 else if (MEM_P (op0))
16995 {
16996 if (TARGET_AVX256_SPLIT_UNALIGNED_STORE)
16997 {
16998 m = adjust_address (op0, mode, 0);
16999 emit_insn (extract (m, op1, const0_rtx));
17000 m = adjust_address (op0, mode, 16);
17001 emit_insn (extract (m, op1, const1_rtx));
17002 }
17003 else
17004 emit_insn (store_unaligned (op0, op1));
17005 }
17006 else
17007 gcc_unreachable ();
17008 }
17009
17010 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
17011 straight to ix86_expand_vector_move. */
17012 /* Code generation for scalar reg-reg moves of single and double precision data:
17013 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
17014 movaps reg, reg
17015 else
17016 movss reg, reg
17017 if (x86_sse_partial_reg_dependency == true)
17018 movapd reg, reg
17019 else
17020 movsd reg, reg
17021
17022 Code generation for scalar loads of double precision data:
17023 if (x86_sse_split_regs == true)
17024 movlpd mem, reg (gas syntax)
17025 else
17026 movsd mem, reg
17027
17028 Code generation for unaligned packed loads of single precision data
17029 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
17030 if (x86_sse_unaligned_move_optimal)
17031 movups mem, reg
17032
17033 if (x86_sse_partial_reg_dependency == true)
17034 {
17035 xorps reg, reg
17036 movlps mem, reg
17037 movhps mem+8, reg
17038 }
17039 else
17040 {
17041 movlps mem, reg
17042 movhps mem+8, reg
17043 }
17044
17045 Code generation for unaligned packed loads of double precision data
17046 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
17047 if (x86_sse_unaligned_move_optimal)
17048 movupd mem, reg
17049
17050 if (x86_sse_split_regs == true)
17051 {
17052 movlpd mem, reg
17053 movhpd mem+8, reg
17054 }
17055 else
17056 {
17057 movsd mem, reg
17058 movhpd mem+8, reg
17059 }
17060 */
17061
17062 void
17063 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
17064 {
17065 rtx op0, op1, orig_op0 = NULL_RTX, m;
17066 rtx (*load_unaligned) (rtx, rtx);
17067 rtx (*store_unaligned) (rtx, rtx);
17068
17069 op0 = operands[0];
17070 op1 = operands[1];
17071
17072 if (GET_MODE_SIZE (mode) == 64)
17073 {
17074 switch (GET_MODE_CLASS (mode))
17075 {
17076 case MODE_VECTOR_INT:
17077 case MODE_INT:
17078 if (GET_MODE (op0) != V16SImode)
17079 {
17080 if (!MEM_P (op0))
17081 {
17082 orig_op0 = op0;
17083 op0 = gen_reg_rtx (V16SImode);
17084 }
17085 else
17086 op0 = gen_lowpart (V16SImode, op0);
17087 }
17088 op1 = gen_lowpart (V16SImode, op1);
17089 /* FALLTHRU */
17090
17091 case MODE_VECTOR_FLOAT:
17092 switch (GET_MODE (op0))
17093 {
17094 default:
17095 gcc_unreachable ();
17096 case V16SImode:
17097 load_unaligned = gen_avx512f_loaddquv16si;
17098 store_unaligned = gen_avx512f_storedquv16si;
17099 break;
17100 case V16SFmode:
17101 load_unaligned = gen_avx512f_loadups512;
17102 store_unaligned = gen_avx512f_storeups512;
17103 break;
17104 case V8DFmode:
17105 load_unaligned = gen_avx512f_loadupd512;
17106 store_unaligned = gen_avx512f_storeupd512;
17107 break;
17108 }
17109
17110 if (MEM_P (op1))
17111 emit_insn (load_unaligned (op0, op1));
17112 else if (MEM_P (op0))
17113 emit_insn (store_unaligned (op0, op1));
17114 else
17115 gcc_unreachable ();
17116 if (orig_op0)
17117 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17118 break;
17119
17120 default:
17121 gcc_unreachable ();
17122 }
17123
17124 return;
17125 }
17126
17127 if (TARGET_AVX
17128 && GET_MODE_SIZE (mode) == 32)
17129 {
17130 switch (GET_MODE_CLASS (mode))
17131 {
17132 case MODE_VECTOR_INT:
17133 case MODE_INT:
17134 if (GET_MODE (op0) != V32QImode)
17135 {
17136 if (!MEM_P (op0))
17137 {
17138 orig_op0 = op0;
17139 op0 = gen_reg_rtx (V32QImode);
17140 }
17141 else
17142 op0 = gen_lowpart (V32QImode, op0);
17143 }
17144 op1 = gen_lowpart (V32QImode, op1);
17145 /* FALLTHRU */
17146
17147 case MODE_VECTOR_FLOAT:
17148 ix86_avx256_split_vector_move_misalign (op0, op1);
17149 if (orig_op0)
17150 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17151 break;
17152
17153 default:
17154 gcc_unreachable ();
17155 }
17156
17157 return;
17158 }
17159
17160 if (MEM_P (op1))
17161 {
17162 /* Normal *mov<mode>_internal pattern will handle
17163 unaligned loads just fine if misaligned_operand
17164 is true, and without the UNSPEC it can be combined
17165 with arithmetic instructions. */
17166 if (TARGET_AVX
17167 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
17168 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
17169 && misaligned_operand (op1, GET_MODE (op1)))
17170 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17171 /* ??? If we have typed data, then it would appear that using
17172 movdqu is the only way to get unaligned data loaded with
17173 integer type. */
17174 else if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17175 {
17176 if (GET_MODE (op0) != V16QImode)
17177 {
17178 orig_op0 = op0;
17179 op0 = gen_reg_rtx (V16QImode);
17180 }
17181 op1 = gen_lowpart (V16QImode, op1);
17182 /* We will eventually emit movups based on insn attributes. */
17183 emit_insn (gen_sse2_loaddquv16qi (op0, op1));
17184 if (orig_op0)
17185 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17186 }
17187 else if (TARGET_SSE2 && mode == V2DFmode)
17188 {
17189 rtx zero;
17190
17191 if (TARGET_AVX
17192 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17193 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17194 || optimize_insn_for_size_p ())
17195 {
17196 /* We will eventually emit movups based on insn attributes. */
17197 emit_insn (gen_sse2_loadupd (op0, op1));
17198 return;
17199 }
17200
17201 /* When SSE registers are split into halves, we can avoid
17202 writing to the top half twice. */
17203 if (TARGET_SSE_SPLIT_REGS)
17204 {
17205 emit_clobber (op0);
17206 zero = op0;
17207 }
17208 else
17209 {
17210 /* ??? Not sure about the best option for the Intel chips.
17211 The following would seem to satisfy; the register is
17212 entirely cleared, breaking the dependency chain. We
17213 then store to the upper half, with a dependency depth
17214 of one. A rumor has it that Intel recommends two movsd
17215 followed by an unpacklpd, but this is unconfirmed. And
17216 given that the dependency depth of the unpacklpd would
17217 still be one, I'm not sure why this would be better. */
17218 zero = CONST0_RTX (V2DFmode);
17219 }
17220
17221 m = adjust_address (op1, DFmode, 0);
17222 emit_insn (gen_sse2_loadlpd (op0, zero, m));
17223 m = adjust_address (op1, DFmode, 8);
17224 emit_insn (gen_sse2_loadhpd (op0, op0, m));
17225 }
17226 else
17227 {
17228 rtx t;
17229
17230 if (TARGET_AVX
17231 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17232 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17233 || optimize_insn_for_size_p ())
17234 {
17235 if (GET_MODE (op0) != V4SFmode)
17236 {
17237 orig_op0 = op0;
17238 op0 = gen_reg_rtx (V4SFmode);
17239 }
17240 op1 = gen_lowpart (V4SFmode, op1);
17241 emit_insn (gen_sse_loadups (op0, op1));
17242 if (orig_op0)
17243 emit_move_insn (orig_op0,
17244 gen_lowpart (GET_MODE (orig_op0), op0));
17245 return;
17246 }
17247
17248 if (mode != V4SFmode)
17249 t = gen_reg_rtx (V4SFmode);
17250 else
17251 t = op0;
17252
17253 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
17254 emit_move_insn (t, CONST0_RTX (V4SFmode));
17255 else
17256 emit_clobber (t);
17257
17258 m = adjust_address (op1, V2SFmode, 0);
17259 emit_insn (gen_sse_loadlps (t, t, m));
17260 m = adjust_address (op1, V2SFmode, 8);
17261 emit_insn (gen_sse_loadhps (t, t, m));
17262 if (mode != V4SFmode)
17263 emit_move_insn (op0, gen_lowpart (mode, t));
17264 }
17265 }
17266 else if (MEM_P (op0))
17267 {
17268 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17269 {
17270 op0 = gen_lowpart (V16QImode, op0);
17271 op1 = gen_lowpart (V16QImode, op1);
17272 /* We will eventually emit movups based on insn attributes. */
17273 emit_insn (gen_sse2_storedquv16qi (op0, op1));
17274 }
17275 else if (TARGET_SSE2 && mode == V2DFmode)
17276 {
17277 if (TARGET_AVX
17278 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17279 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17280 || optimize_insn_for_size_p ())
17281 /* We will eventually emit movups based on insn attributes. */
17282 emit_insn (gen_sse2_storeupd (op0, op1));
17283 else
17284 {
17285 m = adjust_address (op0, DFmode, 0);
17286 emit_insn (gen_sse2_storelpd (m, op1));
17287 m = adjust_address (op0, DFmode, 8);
17288 emit_insn (gen_sse2_storehpd (m, op1));
17289 }
17290 }
17291 else
17292 {
17293 if (mode != V4SFmode)
17294 op1 = gen_lowpart (V4SFmode, op1);
17295
17296 if (TARGET_AVX
17297 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17298 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17299 || optimize_insn_for_size_p ())
17300 {
17301 op0 = gen_lowpart (V4SFmode, op0);
17302 emit_insn (gen_sse_storeups (op0, op1));
17303 }
17304 else
17305 {
17306 m = adjust_address (op0, V2SFmode, 0);
17307 emit_insn (gen_sse_storelps (m, op1));
17308 m = adjust_address (op0, V2SFmode, 8);
17309 emit_insn (gen_sse_storehps (m, op1));
17310 }
17311 }
17312 }
17313 else
17314 gcc_unreachable ();
17315 }
17316
17317 /* Helper function of ix86_fixup_binary_operands to canonicalize
17318 operand order. Returns true if the operands should be swapped. */
17319
17320 static bool
17321 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
17322 rtx operands[])
17323 {
17324 rtx dst = operands[0];
17325 rtx src1 = operands[1];
17326 rtx src2 = operands[2];
17327
17328 /* If the operation is not commutative, we can't do anything. */
17329 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
17330 return false;
17331
17332 /* Highest priority is that src1 should match dst. */
17333 if (rtx_equal_p (dst, src1))
17334 return false;
17335 if (rtx_equal_p (dst, src2))
17336 return true;
17337
17338 /* Next highest priority is that immediate constants come second. */
17339 if (immediate_operand (src2, mode))
17340 return false;
17341 if (immediate_operand (src1, mode))
17342 return true;
17343
17344 /* Lowest priority is that memory references should come second. */
17345 if (MEM_P (src2))
17346 return false;
17347 if (MEM_P (src1))
17348 return true;
17349
17350 return false;
17351 }
17352
17353
17354 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
17355 destination to use for the operation. If different from the true
17356 destination in operands[0], a copy operation will be required. */
17357
17358 rtx
17359 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
17360 rtx operands[])
17361 {
17362 rtx dst = operands[0];
17363 rtx src1 = operands[1];
17364 rtx src2 = operands[2];
17365
17366 /* Canonicalize operand order. */
17367 if (ix86_swap_binary_operands_p (code, mode, operands))
17368 {
17369 rtx temp;
17370
17371 /* It is invalid to swap operands of different modes. */
17372 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
17373
17374 temp = src1;
17375 src1 = src2;
17376 src2 = temp;
17377 }
17378
17379 /* Both source operands cannot be in memory. */
17380 if (MEM_P (src1) && MEM_P (src2))
17381 {
17382 /* Optimization: Only read from memory once. */
17383 if (rtx_equal_p (src1, src2))
17384 {
17385 src2 = force_reg (mode, src2);
17386 src1 = src2;
17387 }
17388 else if (rtx_equal_p (dst, src1))
17389 src2 = force_reg (mode, src2);
17390 else
17391 src1 = force_reg (mode, src1);
17392 }
17393
17394 /* If the destination is memory, and we do not have matching source
17395 operands, do things in registers. */
17396 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
17397 dst = gen_reg_rtx (mode);
17398
17399 /* Source 1 cannot be a constant. */
17400 if (CONSTANT_P (src1))
17401 src1 = force_reg (mode, src1);
17402
17403 /* Source 1 cannot be a non-matching memory. */
17404 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
17405 src1 = force_reg (mode, src1);
17406
17407 /* Improve address combine. */
17408 if (code == PLUS
17409 && GET_MODE_CLASS (mode) == MODE_INT
17410 && MEM_P (src2))
17411 src2 = force_reg (mode, src2);
17412
17413 operands[1] = src1;
17414 operands[2] = src2;
17415 return dst;
17416 }
17417
17418 /* Similarly, but assume that the destination has already been
17419 set up properly. */
17420
17421 void
17422 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
17423 enum machine_mode mode, rtx operands[])
17424 {
17425 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
17426 gcc_assert (dst == operands[0]);
17427 }
17428
17429 /* Attempt to expand a binary operator. Make the expansion closer to the
17430 actual machine, then just general_operand, which will allow 3 separate
17431 memory references (one output, two input) in a single insn. */
17432
17433 void
17434 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
17435 rtx operands[])
17436 {
17437 rtx src1, src2, dst, op, clob;
17438
17439 dst = ix86_fixup_binary_operands (code, mode, operands);
17440 src1 = operands[1];
17441 src2 = operands[2];
17442
17443 /* Emit the instruction. */
17444
17445 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
17446 if (reload_in_progress)
17447 {
17448 /* Reload doesn't know about the flags register, and doesn't know that
17449 it doesn't want to clobber it. We can only do this with PLUS. */
17450 gcc_assert (code == PLUS);
17451 emit_insn (op);
17452 }
17453 else if (reload_completed
17454 && code == PLUS
17455 && !rtx_equal_p (dst, src1))
17456 {
17457 /* This is going to be an LEA; avoid splitting it later. */
17458 emit_insn (op);
17459 }
17460 else
17461 {
17462 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17463 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
17464 }
17465
17466 /* Fix up the destination if needed. */
17467 if (dst != operands[0])
17468 emit_move_insn (operands[0], dst);
17469 }
17470
17471 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
17472 the given OPERANDS. */
17473
17474 void
17475 ix86_expand_vector_logical_operator (enum rtx_code code, enum machine_mode mode,
17476 rtx operands[])
17477 {
17478 rtx op1 = NULL_RTX, op2 = NULL_RTX;
17479 if (GET_CODE (operands[1]) == SUBREG)
17480 {
17481 op1 = operands[1];
17482 op2 = operands[2];
17483 }
17484 else if (GET_CODE (operands[2]) == SUBREG)
17485 {
17486 op1 = operands[2];
17487 op2 = operands[1];
17488 }
17489 /* Optimize (__m128i) d | (__m128i) e and similar code
17490 when d and e are float vectors into float vector logical
17491 insn. In C/C++ without using intrinsics there is no other way
17492 to express vector logical operation on float vectors than
17493 to cast them temporarily to integer vectors. */
17494 if (op1
17495 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17496 && ((GET_CODE (op2) == SUBREG || GET_CODE (op2) == CONST_VECTOR))
17497 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1))) == MODE_VECTOR_FLOAT
17498 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1))) == GET_MODE_SIZE (mode)
17499 && SUBREG_BYTE (op1) == 0
17500 && (GET_CODE (op2) == CONST_VECTOR
17501 || (GET_MODE (SUBREG_REG (op1)) == GET_MODE (SUBREG_REG (op2))
17502 && SUBREG_BYTE (op2) == 0))
17503 && can_create_pseudo_p ())
17504 {
17505 rtx dst;
17506 switch (GET_MODE (SUBREG_REG (op1)))
17507 {
17508 case V4SFmode:
17509 case V8SFmode:
17510 case V2DFmode:
17511 case V4DFmode:
17512 dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1)));
17513 if (GET_CODE (op2) == CONST_VECTOR)
17514 {
17515 op2 = gen_lowpart (GET_MODE (dst), op2);
17516 op2 = force_reg (GET_MODE (dst), op2);
17517 }
17518 else
17519 {
17520 op1 = operands[1];
17521 op2 = SUBREG_REG (operands[2]);
17522 if (!nonimmediate_operand (op2, GET_MODE (dst)))
17523 op2 = force_reg (GET_MODE (dst), op2);
17524 }
17525 op1 = SUBREG_REG (op1);
17526 if (!nonimmediate_operand (op1, GET_MODE (dst)))
17527 op1 = force_reg (GET_MODE (dst), op1);
17528 emit_insn (gen_rtx_SET (VOIDmode, dst,
17529 gen_rtx_fmt_ee (code, GET_MODE (dst),
17530 op1, op2)));
17531 emit_move_insn (operands[0], gen_lowpart (mode, dst));
17532 return;
17533 default:
17534 break;
17535 }
17536 }
17537 if (!nonimmediate_operand (operands[1], mode))
17538 operands[1] = force_reg (mode, operands[1]);
17539 if (!nonimmediate_operand (operands[2], mode))
17540 operands[2] = force_reg (mode, operands[2]);
17541 ix86_fixup_binary_operands_no_copy (code, mode, operands);
17542 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
17543 gen_rtx_fmt_ee (code, mode, operands[1],
17544 operands[2])));
17545 }
17546
17547 /* Return TRUE or FALSE depending on whether the binary operator meets the
17548 appropriate constraints. */
17549
17550 bool
17551 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
17552 rtx operands[3])
17553 {
17554 rtx dst = operands[0];
17555 rtx src1 = operands[1];
17556 rtx src2 = operands[2];
17557
17558 /* Both source operands cannot be in memory. */
17559 if (MEM_P (src1) && MEM_P (src2))
17560 return false;
17561
17562 /* Canonicalize operand order for commutative operators. */
17563 if (ix86_swap_binary_operands_p (code, mode, operands))
17564 {
17565 rtx temp = src1;
17566 src1 = src2;
17567 src2 = temp;
17568 }
17569
17570 /* If the destination is memory, we must have a matching source operand. */
17571 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
17572 return false;
17573
17574 /* Source 1 cannot be a constant. */
17575 if (CONSTANT_P (src1))
17576 return false;
17577
17578 /* Source 1 cannot be a non-matching memory. */
17579 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
17580 /* Support "andhi/andsi/anddi" as a zero-extending move. */
17581 return (code == AND
17582 && (mode == HImode
17583 || mode == SImode
17584 || (TARGET_64BIT && mode == DImode))
17585 && satisfies_constraint_L (src2));
17586
17587 return true;
17588 }
17589
17590 /* Attempt to expand a unary operator. Make the expansion closer to the
17591 actual machine, then just general_operand, which will allow 2 separate
17592 memory references (one output, one input) in a single insn. */
17593
17594 void
17595 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
17596 rtx operands[])
17597 {
17598 int matching_memory;
17599 rtx src, dst, op, clob;
17600
17601 dst = operands[0];
17602 src = operands[1];
17603
17604 /* If the destination is memory, and we do not have matching source
17605 operands, do things in registers. */
17606 matching_memory = 0;
17607 if (MEM_P (dst))
17608 {
17609 if (rtx_equal_p (dst, src))
17610 matching_memory = 1;
17611 else
17612 dst = gen_reg_rtx (mode);
17613 }
17614
17615 /* When source operand is memory, destination must match. */
17616 if (MEM_P (src) && !matching_memory)
17617 src = force_reg (mode, src);
17618
17619 /* Emit the instruction. */
17620
17621 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
17622 if (reload_in_progress || code == NOT)
17623 {
17624 /* Reload doesn't know about the flags register, and doesn't know that
17625 it doesn't want to clobber it. */
17626 gcc_assert (code == NOT);
17627 emit_insn (op);
17628 }
17629 else
17630 {
17631 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17632 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
17633 }
17634
17635 /* Fix up the destination if needed. */
17636 if (dst != operands[0])
17637 emit_move_insn (operands[0], dst);
17638 }
17639
17640 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
17641 divisor are within the range [0-255]. */
17642
17643 void
17644 ix86_split_idivmod (enum machine_mode mode, rtx operands[],
17645 bool signed_p)
17646 {
17647 rtx end_label, qimode_label;
17648 rtx insn, div, mod;
17649 rtx scratch, tmp0, tmp1, tmp2;
17650 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
17651 rtx (*gen_zero_extend) (rtx, rtx);
17652 rtx (*gen_test_ccno_1) (rtx, rtx);
17653
17654 switch (mode)
17655 {
17656 case SImode:
17657 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
17658 gen_test_ccno_1 = gen_testsi_ccno_1;
17659 gen_zero_extend = gen_zero_extendqisi2;
17660 break;
17661 case DImode:
17662 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
17663 gen_test_ccno_1 = gen_testdi_ccno_1;
17664 gen_zero_extend = gen_zero_extendqidi2;
17665 break;
17666 default:
17667 gcc_unreachable ();
17668 }
17669
17670 end_label = gen_label_rtx ();
17671 qimode_label = gen_label_rtx ();
17672
17673 scratch = gen_reg_rtx (mode);
17674
17675 /* Use 8bit unsigned divimod if dividend and divisor are within
17676 the range [0-255]. */
17677 emit_move_insn (scratch, operands[2]);
17678 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
17679 scratch, 1, OPTAB_DIRECT);
17680 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
17681 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
17682 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
17683 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
17684 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
17685 pc_rtx);
17686 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
17687 predict_jump (REG_BR_PROB_BASE * 50 / 100);
17688 JUMP_LABEL (insn) = qimode_label;
17689
17690 /* Generate original signed/unsigned divimod. */
17691 div = gen_divmod4_1 (operands[0], operands[1],
17692 operands[2], operands[3]);
17693 emit_insn (div);
17694
17695 /* Branch to the end. */
17696 emit_jump_insn (gen_jump (end_label));
17697 emit_barrier ();
17698
17699 /* Generate 8bit unsigned divide. */
17700 emit_label (qimode_label);
17701 /* Don't use operands[0] for result of 8bit divide since not all
17702 registers support QImode ZERO_EXTRACT. */
17703 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
17704 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
17705 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
17706 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
17707
17708 if (signed_p)
17709 {
17710 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
17711 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
17712 }
17713 else
17714 {
17715 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
17716 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
17717 }
17718
17719 /* Extract remainder from AH. */
17720 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
17721 if (REG_P (operands[1]))
17722 insn = emit_move_insn (operands[1], tmp1);
17723 else
17724 {
17725 /* Need a new scratch register since the old one has result
17726 of 8bit divide. */
17727 scratch = gen_reg_rtx (mode);
17728 emit_move_insn (scratch, tmp1);
17729 insn = emit_move_insn (operands[1], scratch);
17730 }
17731 set_unique_reg_note (insn, REG_EQUAL, mod);
17732
17733 /* Zero extend quotient from AL. */
17734 tmp1 = gen_lowpart (QImode, tmp0);
17735 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
17736 set_unique_reg_note (insn, REG_EQUAL, div);
17737
17738 emit_label (end_label);
17739 }
17740
17741 /* Whether it is OK to emit CFI directives when emitting asm code. */
17742
17743 bool
17744 ix86_emit_cfi ()
17745 {
17746 return dwarf2out_do_cfi_asm ();
17747 }
17748
17749 #define LEA_MAX_STALL (3)
17750 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
17751
17752 /* Increase given DISTANCE in half-cycles according to
17753 dependencies between PREV and NEXT instructions.
17754 Add 1 half-cycle if there is no dependency and
17755 go to next cycle if there is some dependecy. */
17756
17757 static unsigned int
17758 increase_distance (rtx prev, rtx next, unsigned int distance)
17759 {
17760 df_ref def, use;
17761
17762 if (!prev || !next)
17763 return distance + (distance & 1) + 2;
17764
17765 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
17766 return distance + 1;
17767
17768 FOR_EACH_INSN_USE (use, next)
17769 FOR_EACH_INSN_DEF (def, prev)
17770 if (!DF_REF_IS_ARTIFICIAL (def)
17771 && DF_REF_REGNO (use) == DF_REF_REGNO (def))
17772 return distance + (distance & 1) + 2;
17773
17774 return distance + 1;
17775 }
17776
17777 /* Function checks if instruction INSN defines register number
17778 REGNO1 or REGNO2. */
17779
17780 static bool
17781 insn_defines_reg (unsigned int regno1, unsigned int regno2,
17782 rtx insn)
17783 {
17784 df_ref def;
17785
17786 FOR_EACH_INSN_DEF (def, insn)
17787 if (DF_REF_REG_DEF_P (def)
17788 && !DF_REF_IS_ARTIFICIAL (def)
17789 && (regno1 == DF_REF_REGNO (def)
17790 || regno2 == DF_REF_REGNO (def)))
17791 return true;
17792
17793 return false;
17794 }
17795
17796 /* Function checks if instruction INSN uses register number
17797 REGNO as a part of address expression. */
17798
17799 static bool
17800 insn_uses_reg_mem (unsigned int regno, rtx insn)
17801 {
17802 df_ref use;
17803
17804 FOR_EACH_INSN_USE (use, insn)
17805 if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use))
17806 return true;
17807
17808 return false;
17809 }
17810
17811 /* Search backward for non-agu definition of register number REGNO1
17812 or register number REGNO2 in basic block starting from instruction
17813 START up to head of basic block or instruction INSN.
17814
17815 Function puts true value into *FOUND var if definition was found
17816 and false otherwise.
17817
17818 Distance in half-cycles between START and found instruction or head
17819 of BB is added to DISTANCE and returned. */
17820
17821 static int
17822 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
17823 rtx insn, int distance,
17824 rtx start, bool *found)
17825 {
17826 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
17827 rtx prev = start;
17828 rtx next = NULL;
17829
17830 *found = false;
17831
17832 while (prev
17833 && prev != insn
17834 && distance < LEA_SEARCH_THRESHOLD)
17835 {
17836 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
17837 {
17838 distance = increase_distance (prev, next, distance);
17839 if (insn_defines_reg (regno1, regno2, prev))
17840 {
17841 if (recog_memoized (prev) < 0
17842 || get_attr_type (prev) != TYPE_LEA)
17843 {
17844 *found = true;
17845 return distance;
17846 }
17847 }
17848
17849 next = prev;
17850 }
17851 if (prev == BB_HEAD (bb))
17852 break;
17853
17854 prev = PREV_INSN (prev);
17855 }
17856
17857 return distance;
17858 }
17859
17860 /* Search backward for non-agu definition of register number REGNO1
17861 or register number REGNO2 in INSN's basic block until
17862 1. Pass LEA_SEARCH_THRESHOLD instructions, or
17863 2. Reach neighbour BBs boundary, or
17864 3. Reach agu definition.
17865 Returns the distance between the non-agu definition point and INSN.
17866 If no definition point, returns -1. */
17867
17868 static int
17869 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
17870 rtx insn)
17871 {
17872 basic_block bb = BLOCK_FOR_INSN (insn);
17873 int distance = 0;
17874 bool found = false;
17875
17876 if (insn != BB_HEAD (bb))
17877 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
17878 distance, PREV_INSN (insn),
17879 &found);
17880
17881 if (!found && distance < LEA_SEARCH_THRESHOLD)
17882 {
17883 edge e;
17884 edge_iterator ei;
17885 bool simple_loop = false;
17886
17887 FOR_EACH_EDGE (e, ei, bb->preds)
17888 if (e->src == bb)
17889 {
17890 simple_loop = true;
17891 break;
17892 }
17893
17894 if (simple_loop)
17895 distance = distance_non_agu_define_in_bb (regno1, regno2,
17896 insn, distance,
17897 BB_END (bb), &found);
17898 else
17899 {
17900 int shortest_dist = -1;
17901 bool found_in_bb = false;
17902
17903 FOR_EACH_EDGE (e, ei, bb->preds)
17904 {
17905 int bb_dist
17906 = distance_non_agu_define_in_bb (regno1, regno2,
17907 insn, distance,
17908 BB_END (e->src),
17909 &found_in_bb);
17910 if (found_in_bb)
17911 {
17912 if (shortest_dist < 0)
17913 shortest_dist = bb_dist;
17914 else if (bb_dist > 0)
17915 shortest_dist = MIN (bb_dist, shortest_dist);
17916
17917 found = true;
17918 }
17919 }
17920
17921 distance = shortest_dist;
17922 }
17923 }
17924
17925 /* get_attr_type may modify recog data. We want to make sure
17926 that recog data is valid for instruction INSN, on which
17927 distance_non_agu_define is called. INSN is unchanged here. */
17928 extract_insn_cached (insn);
17929
17930 if (!found)
17931 return -1;
17932
17933 return distance >> 1;
17934 }
17935
17936 /* Return the distance in half-cycles between INSN and the next
17937 insn that uses register number REGNO in memory address added
17938 to DISTANCE. Return -1 if REGNO0 is set.
17939
17940 Put true value into *FOUND if register usage was found and
17941 false otherwise.
17942 Put true value into *REDEFINED if register redefinition was
17943 found and false otherwise. */
17944
17945 static int
17946 distance_agu_use_in_bb (unsigned int regno,
17947 rtx insn, int distance, rtx start,
17948 bool *found, bool *redefined)
17949 {
17950 basic_block bb = NULL;
17951 rtx next = start;
17952 rtx prev = NULL;
17953
17954 *found = false;
17955 *redefined = false;
17956
17957 if (start != NULL_RTX)
17958 {
17959 bb = BLOCK_FOR_INSN (start);
17960 if (start != BB_HEAD (bb))
17961 /* If insn and start belong to the same bb, set prev to insn,
17962 so the call to increase_distance will increase the distance
17963 between insns by 1. */
17964 prev = insn;
17965 }
17966
17967 while (next
17968 && next != insn
17969 && distance < LEA_SEARCH_THRESHOLD)
17970 {
17971 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
17972 {
17973 distance = increase_distance(prev, next, distance);
17974 if (insn_uses_reg_mem (regno, next))
17975 {
17976 /* Return DISTANCE if OP0 is used in memory
17977 address in NEXT. */
17978 *found = true;
17979 return distance;
17980 }
17981
17982 if (insn_defines_reg (regno, INVALID_REGNUM, next))
17983 {
17984 /* Return -1 if OP0 is set in NEXT. */
17985 *redefined = true;
17986 return -1;
17987 }
17988
17989 prev = next;
17990 }
17991
17992 if (next == BB_END (bb))
17993 break;
17994
17995 next = NEXT_INSN (next);
17996 }
17997
17998 return distance;
17999 }
18000
18001 /* Return the distance between INSN and the next insn that uses
18002 register number REGNO0 in memory address. Return -1 if no such
18003 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
18004
18005 static int
18006 distance_agu_use (unsigned int regno0, rtx insn)
18007 {
18008 basic_block bb = BLOCK_FOR_INSN (insn);
18009 int distance = 0;
18010 bool found = false;
18011 bool redefined = false;
18012
18013 if (insn != BB_END (bb))
18014 distance = distance_agu_use_in_bb (regno0, insn, distance,
18015 NEXT_INSN (insn),
18016 &found, &redefined);
18017
18018 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
18019 {
18020 edge e;
18021 edge_iterator ei;
18022 bool simple_loop = false;
18023
18024 FOR_EACH_EDGE (e, ei, bb->succs)
18025 if (e->dest == bb)
18026 {
18027 simple_loop = true;
18028 break;
18029 }
18030
18031 if (simple_loop)
18032 distance = distance_agu_use_in_bb (regno0, insn,
18033 distance, BB_HEAD (bb),
18034 &found, &redefined);
18035 else
18036 {
18037 int shortest_dist = -1;
18038 bool found_in_bb = false;
18039 bool redefined_in_bb = false;
18040
18041 FOR_EACH_EDGE (e, ei, bb->succs)
18042 {
18043 int bb_dist
18044 = distance_agu_use_in_bb (regno0, insn,
18045 distance, BB_HEAD (e->dest),
18046 &found_in_bb, &redefined_in_bb);
18047 if (found_in_bb)
18048 {
18049 if (shortest_dist < 0)
18050 shortest_dist = bb_dist;
18051 else if (bb_dist > 0)
18052 shortest_dist = MIN (bb_dist, shortest_dist);
18053
18054 found = true;
18055 }
18056 }
18057
18058 distance = shortest_dist;
18059 }
18060 }
18061
18062 if (!found || redefined)
18063 return -1;
18064
18065 return distance >> 1;
18066 }
18067
18068 /* Define this macro to tune LEA priority vs ADD, it take effect when
18069 there is a dilemma of choicing LEA or ADD
18070 Negative value: ADD is more preferred than LEA
18071 Zero: Netrual
18072 Positive value: LEA is more preferred than ADD*/
18073 #define IX86_LEA_PRIORITY 0
18074
18075 /* Return true if usage of lea INSN has performance advantage
18076 over a sequence of instructions. Instructions sequence has
18077 SPLIT_COST cycles higher latency than lea latency. */
18078
18079 static bool
18080 ix86_lea_outperforms (rtx insn, unsigned int regno0, unsigned int regno1,
18081 unsigned int regno2, int split_cost, bool has_scale)
18082 {
18083 int dist_define, dist_use;
18084
18085 /* For Silvermont if using a 2-source or 3-source LEA for
18086 non-destructive destination purposes, or due to wanting
18087 ability to use SCALE, the use of LEA is justified. */
18088 if (TARGET_SILVERMONT || TARGET_INTEL)
18089 {
18090 if (has_scale)
18091 return true;
18092 if (split_cost < 1)
18093 return false;
18094 if (regno0 == regno1 || regno0 == regno2)
18095 return false;
18096 return true;
18097 }
18098
18099 dist_define = distance_non_agu_define (regno1, regno2, insn);
18100 dist_use = distance_agu_use (regno0, insn);
18101
18102 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
18103 {
18104 /* If there is no non AGU operand definition, no AGU
18105 operand usage and split cost is 0 then both lea
18106 and non lea variants have same priority. Currently
18107 we prefer lea for 64 bit code and non lea on 32 bit
18108 code. */
18109 if (dist_use < 0 && split_cost == 0)
18110 return TARGET_64BIT || IX86_LEA_PRIORITY;
18111 else
18112 return true;
18113 }
18114
18115 /* With longer definitions distance lea is more preferable.
18116 Here we change it to take into account splitting cost and
18117 lea priority. */
18118 dist_define += split_cost + IX86_LEA_PRIORITY;
18119
18120 /* If there is no use in memory addess then we just check
18121 that split cost exceeds AGU stall. */
18122 if (dist_use < 0)
18123 return dist_define > LEA_MAX_STALL;
18124
18125 /* If this insn has both backward non-agu dependence and forward
18126 agu dependence, the one with short distance takes effect. */
18127 return dist_define >= dist_use;
18128 }
18129
18130 /* Return true if it is legal to clobber flags by INSN and
18131 false otherwise. */
18132
18133 static bool
18134 ix86_ok_to_clobber_flags (rtx insn)
18135 {
18136 basic_block bb = BLOCK_FOR_INSN (insn);
18137 df_ref use;
18138 bitmap live;
18139
18140 while (insn)
18141 {
18142 if (NONDEBUG_INSN_P (insn))
18143 {
18144 FOR_EACH_INSN_USE (use, insn)
18145 if (DF_REF_REG_USE_P (use) && DF_REF_REGNO (use) == FLAGS_REG)
18146 return false;
18147
18148 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
18149 return true;
18150 }
18151
18152 if (insn == BB_END (bb))
18153 break;
18154
18155 insn = NEXT_INSN (insn);
18156 }
18157
18158 live = df_get_live_out(bb);
18159 return !REGNO_REG_SET_P (live, FLAGS_REG);
18160 }
18161
18162 /* Return true if we need to split op0 = op1 + op2 into a sequence of
18163 move and add to avoid AGU stalls. */
18164
18165 bool
18166 ix86_avoid_lea_for_add (rtx insn, rtx operands[])
18167 {
18168 unsigned int regno0, regno1, regno2;
18169
18170 /* Check if we need to optimize. */
18171 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18172 return false;
18173
18174 /* Check it is correct to split here. */
18175 if (!ix86_ok_to_clobber_flags(insn))
18176 return false;
18177
18178 regno0 = true_regnum (operands[0]);
18179 regno1 = true_regnum (operands[1]);
18180 regno2 = true_regnum (operands[2]);
18181
18182 /* We need to split only adds with non destructive
18183 destination operand. */
18184 if (regno0 == regno1 || regno0 == regno2)
18185 return false;
18186 else
18187 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
18188 }
18189
18190 /* Return true if we should emit lea instruction instead of mov
18191 instruction. */
18192
18193 bool
18194 ix86_use_lea_for_mov (rtx insn, rtx operands[])
18195 {
18196 unsigned int regno0, regno1;
18197
18198 /* Check if we need to optimize. */
18199 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18200 return false;
18201
18202 /* Use lea for reg to reg moves only. */
18203 if (!REG_P (operands[0]) || !REG_P (operands[1]))
18204 return false;
18205
18206 regno0 = true_regnum (operands[0]);
18207 regno1 = true_regnum (operands[1]);
18208
18209 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
18210 }
18211
18212 /* Return true if we need to split lea into a sequence of
18213 instructions to avoid AGU stalls. */
18214
18215 bool
18216 ix86_avoid_lea_for_addr (rtx insn, rtx operands[])
18217 {
18218 unsigned int regno0, regno1, regno2;
18219 int split_cost;
18220 struct ix86_address parts;
18221 int ok;
18222
18223 /* Check we need to optimize. */
18224 if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
18225 return false;
18226
18227 /* The "at least two components" test below might not catch simple
18228 move or zero extension insns if parts.base is non-NULL and parts.disp
18229 is const0_rtx as the only components in the address, e.g. if the
18230 register is %rbp or %r13. As this test is much cheaper and moves or
18231 zero extensions are the common case, do this check first. */
18232 if (REG_P (operands[1])
18233 || (SImode_address_operand (operands[1], VOIDmode)
18234 && REG_P (XEXP (operands[1], 0))))
18235 return false;
18236
18237 /* Check if it is OK to split here. */
18238 if (!ix86_ok_to_clobber_flags (insn))
18239 return false;
18240
18241 ok = ix86_decompose_address (operands[1], &parts);
18242 gcc_assert (ok);
18243
18244 /* There should be at least two components in the address. */
18245 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
18246 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
18247 return false;
18248
18249 /* We should not split into add if non legitimate pic
18250 operand is used as displacement. */
18251 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
18252 return false;
18253
18254 regno0 = true_regnum (operands[0]) ;
18255 regno1 = INVALID_REGNUM;
18256 regno2 = INVALID_REGNUM;
18257
18258 if (parts.base)
18259 regno1 = true_regnum (parts.base);
18260 if (parts.index)
18261 regno2 = true_regnum (parts.index);
18262
18263 split_cost = 0;
18264
18265 /* Compute how many cycles we will add to execution time
18266 if split lea into a sequence of instructions. */
18267 if (parts.base || parts.index)
18268 {
18269 /* Have to use mov instruction if non desctructive
18270 destination form is used. */
18271 if (regno1 != regno0 && regno2 != regno0)
18272 split_cost += 1;
18273
18274 /* Have to add index to base if both exist. */
18275 if (parts.base && parts.index)
18276 split_cost += 1;
18277
18278 /* Have to use shift and adds if scale is 2 or greater. */
18279 if (parts.scale > 1)
18280 {
18281 if (regno0 != regno1)
18282 split_cost += 1;
18283 else if (regno2 == regno0)
18284 split_cost += 4;
18285 else
18286 split_cost += parts.scale;
18287 }
18288
18289 /* Have to use add instruction with immediate if
18290 disp is non zero. */
18291 if (parts.disp && parts.disp != const0_rtx)
18292 split_cost += 1;
18293
18294 /* Subtract the price of lea. */
18295 split_cost -= 1;
18296 }
18297
18298 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
18299 parts.scale > 1);
18300 }
18301
18302 /* Emit x86 binary operand CODE in mode MODE, where the first operand
18303 matches destination. RTX includes clobber of FLAGS_REG. */
18304
18305 static void
18306 ix86_emit_binop (enum rtx_code code, enum machine_mode mode,
18307 rtx dst, rtx src)
18308 {
18309 rtx op, clob;
18310
18311 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, dst, src));
18312 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18313
18314 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18315 }
18316
18317 /* Return true if regno1 def is nearest to the insn. */
18318
18319 static bool
18320 find_nearest_reg_def (rtx insn, int regno1, int regno2)
18321 {
18322 rtx prev = insn;
18323 rtx start = BB_HEAD (BLOCK_FOR_INSN (insn));
18324
18325 if (insn == start)
18326 return false;
18327 while (prev && prev != start)
18328 {
18329 if (!INSN_P (prev) || !NONDEBUG_INSN_P (prev))
18330 {
18331 prev = PREV_INSN (prev);
18332 continue;
18333 }
18334 if (insn_defines_reg (regno1, INVALID_REGNUM, prev))
18335 return true;
18336 else if (insn_defines_reg (regno2, INVALID_REGNUM, prev))
18337 return false;
18338 prev = PREV_INSN (prev);
18339 }
18340
18341 /* None of the regs is defined in the bb. */
18342 return false;
18343 }
18344
18345 /* Split lea instructions into a sequence of instructions
18346 which are executed on ALU to avoid AGU stalls.
18347 It is assumed that it is allowed to clobber flags register
18348 at lea position. */
18349
18350 void
18351 ix86_split_lea_for_addr (rtx insn, rtx operands[], enum machine_mode mode)
18352 {
18353 unsigned int regno0, regno1, regno2;
18354 struct ix86_address parts;
18355 rtx target, tmp;
18356 int ok, adds;
18357
18358 ok = ix86_decompose_address (operands[1], &parts);
18359 gcc_assert (ok);
18360
18361 target = gen_lowpart (mode, operands[0]);
18362
18363 regno0 = true_regnum (target);
18364 regno1 = INVALID_REGNUM;
18365 regno2 = INVALID_REGNUM;
18366
18367 if (parts.base)
18368 {
18369 parts.base = gen_lowpart (mode, parts.base);
18370 regno1 = true_regnum (parts.base);
18371 }
18372
18373 if (parts.index)
18374 {
18375 parts.index = gen_lowpart (mode, parts.index);
18376 regno2 = true_regnum (parts.index);
18377 }
18378
18379 if (parts.disp)
18380 parts.disp = gen_lowpart (mode, parts.disp);
18381
18382 if (parts.scale > 1)
18383 {
18384 /* Case r1 = r1 + ... */
18385 if (regno1 == regno0)
18386 {
18387 /* If we have a case r1 = r1 + C * r2 then we
18388 should use multiplication which is very
18389 expensive. Assume cost model is wrong if we
18390 have such case here. */
18391 gcc_assert (regno2 != regno0);
18392
18393 for (adds = parts.scale; adds > 0; adds--)
18394 ix86_emit_binop (PLUS, mode, target, parts.index);
18395 }
18396 else
18397 {
18398 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
18399 if (regno0 != regno2)
18400 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18401
18402 /* Use shift for scaling. */
18403 ix86_emit_binop (ASHIFT, mode, target,
18404 GEN_INT (exact_log2 (parts.scale)));
18405
18406 if (parts.base)
18407 ix86_emit_binop (PLUS, mode, target, parts.base);
18408
18409 if (parts.disp && parts.disp != const0_rtx)
18410 ix86_emit_binop (PLUS, mode, target, parts.disp);
18411 }
18412 }
18413 else if (!parts.base && !parts.index)
18414 {
18415 gcc_assert(parts.disp);
18416 emit_insn (gen_rtx_SET (VOIDmode, target, parts.disp));
18417 }
18418 else
18419 {
18420 if (!parts.base)
18421 {
18422 if (regno0 != regno2)
18423 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18424 }
18425 else if (!parts.index)
18426 {
18427 if (regno0 != regno1)
18428 emit_insn (gen_rtx_SET (VOIDmode, target, parts.base));
18429 }
18430 else
18431 {
18432 if (regno0 == regno1)
18433 tmp = parts.index;
18434 else if (regno0 == regno2)
18435 tmp = parts.base;
18436 else
18437 {
18438 rtx tmp1;
18439
18440 /* Find better operand for SET instruction, depending
18441 on which definition is farther from the insn. */
18442 if (find_nearest_reg_def (insn, regno1, regno2))
18443 tmp = parts.index, tmp1 = parts.base;
18444 else
18445 tmp = parts.base, tmp1 = parts.index;
18446
18447 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18448
18449 if (parts.disp && parts.disp != const0_rtx)
18450 ix86_emit_binop (PLUS, mode, target, parts.disp);
18451
18452 ix86_emit_binop (PLUS, mode, target, tmp1);
18453 return;
18454 }
18455
18456 ix86_emit_binop (PLUS, mode, target, tmp);
18457 }
18458
18459 if (parts.disp && parts.disp != const0_rtx)
18460 ix86_emit_binop (PLUS, mode, target, parts.disp);
18461 }
18462 }
18463
18464 /* Return true if it is ok to optimize an ADD operation to LEA
18465 operation to avoid flag register consumation. For most processors,
18466 ADD is faster than LEA. For the processors like BONNELL, if the
18467 destination register of LEA holds an actual address which will be
18468 used soon, LEA is better and otherwise ADD is better. */
18469
18470 bool
18471 ix86_lea_for_add_ok (rtx insn, rtx operands[])
18472 {
18473 unsigned int regno0 = true_regnum (operands[0]);
18474 unsigned int regno1 = true_regnum (operands[1]);
18475 unsigned int regno2 = true_regnum (operands[2]);
18476
18477 /* If a = b + c, (a!=b && a!=c), must use lea form. */
18478 if (regno0 != regno1 && regno0 != regno2)
18479 return true;
18480
18481 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18482 return false;
18483
18484 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
18485 }
18486
18487 /* Return true if destination reg of SET_BODY is shift count of
18488 USE_BODY. */
18489
18490 static bool
18491 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
18492 {
18493 rtx set_dest;
18494 rtx shift_rtx;
18495 int i;
18496
18497 /* Retrieve destination of SET_BODY. */
18498 switch (GET_CODE (set_body))
18499 {
18500 case SET:
18501 set_dest = SET_DEST (set_body);
18502 if (!set_dest || !REG_P (set_dest))
18503 return false;
18504 break;
18505 case PARALLEL:
18506 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
18507 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
18508 use_body))
18509 return true;
18510 default:
18511 return false;
18512 break;
18513 }
18514
18515 /* Retrieve shift count of USE_BODY. */
18516 switch (GET_CODE (use_body))
18517 {
18518 case SET:
18519 shift_rtx = XEXP (use_body, 1);
18520 break;
18521 case PARALLEL:
18522 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
18523 if (ix86_dep_by_shift_count_body (set_body,
18524 XVECEXP (use_body, 0, i)))
18525 return true;
18526 default:
18527 return false;
18528 break;
18529 }
18530
18531 if (shift_rtx
18532 && (GET_CODE (shift_rtx) == ASHIFT
18533 || GET_CODE (shift_rtx) == LSHIFTRT
18534 || GET_CODE (shift_rtx) == ASHIFTRT
18535 || GET_CODE (shift_rtx) == ROTATE
18536 || GET_CODE (shift_rtx) == ROTATERT))
18537 {
18538 rtx shift_count = XEXP (shift_rtx, 1);
18539
18540 /* Return true if shift count is dest of SET_BODY. */
18541 if (REG_P (shift_count))
18542 {
18543 /* Add check since it can be invoked before register
18544 allocation in pre-reload schedule. */
18545 if (reload_completed
18546 && true_regnum (set_dest) == true_regnum (shift_count))
18547 return true;
18548 else if (REGNO(set_dest) == REGNO(shift_count))
18549 return true;
18550 }
18551 }
18552
18553 return false;
18554 }
18555
18556 /* Return true if destination reg of SET_INSN is shift count of
18557 USE_INSN. */
18558
18559 bool
18560 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
18561 {
18562 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
18563 PATTERN (use_insn));
18564 }
18565
18566 /* Return TRUE or FALSE depending on whether the unary operator meets the
18567 appropriate constraints. */
18568
18569 bool
18570 ix86_unary_operator_ok (enum rtx_code,
18571 enum machine_mode,
18572 rtx operands[2])
18573 {
18574 /* If one of operands is memory, source and destination must match. */
18575 if ((MEM_P (operands[0])
18576 || MEM_P (operands[1]))
18577 && ! rtx_equal_p (operands[0], operands[1]))
18578 return false;
18579 return true;
18580 }
18581
18582 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
18583 are ok, keeping in mind the possible movddup alternative. */
18584
18585 bool
18586 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
18587 {
18588 if (MEM_P (operands[0]))
18589 return rtx_equal_p (operands[0], operands[1 + high]);
18590 if (MEM_P (operands[1]) && MEM_P (operands[2]))
18591 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
18592 return true;
18593 }
18594
18595 /* Post-reload splitter for converting an SF or DFmode value in an
18596 SSE register into an unsigned SImode. */
18597
18598 void
18599 ix86_split_convert_uns_si_sse (rtx operands[])
18600 {
18601 enum machine_mode vecmode;
18602 rtx value, large, zero_or_two31, input, two31, x;
18603
18604 large = operands[1];
18605 zero_or_two31 = operands[2];
18606 input = operands[3];
18607 two31 = operands[4];
18608 vecmode = GET_MODE (large);
18609 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
18610
18611 /* Load up the value into the low element. We must ensure that the other
18612 elements are valid floats -- zero is the easiest such value. */
18613 if (MEM_P (input))
18614 {
18615 if (vecmode == V4SFmode)
18616 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
18617 else
18618 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
18619 }
18620 else
18621 {
18622 input = gen_rtx_REG (vecmode, REGNO (input));
18623 emit_move_insn (value, CONST0_RTX (vecmode));
18624 if (vecmode == V4SFmode)
18625 emit_insn (gen_sse_movss (value, value, input));
18626 else
18627 emit_insn (gen_sse2_movsd (value, value, input));
18628 }
18629
18630 emit_move_insn (large, two31);
18631 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
18632
18633 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
18634 emit_insn (gen_rtx_SET (VOIDmode, large, x));
18635
18636 x = gen_rtx_AND (vecmode, zero_or_two31, large);
18637 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
18638
18639 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
18640 emit_insn (gen_rtx_SET (VOIDmode, value, x));
18641
18642 large = gen_rtx_REG (V4SImode, REGNO (large));
18643 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
18644
18645 x = gen_rtx_REG (V4SImode, REGNO (value));
18646 if (vecmode == V4SFmode)
18647 emit_insn (gen_fix_truncv4sfv4si2 (x, value));
18648 else
18649 emit_insn (gen_sse2_cvttpd2dq (x, value));
18650 value = x;
18651
18652 emit_insn (gen_xorv4si3 (value, value, large));
18653 }
18654
18655 /* Convert an unsigned DImode value into a DFmode, using only SSE.
18656 Expects the 64-bit DImode to be supplied in a pair of integral
18657 registers. Requires SSE2; will use SSE3 if available. For x86_32,
18658 -mfpmath=sse, !optimize_size only. */
18659
18660 void
18661 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
18662 {
18663 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
18664 rtx int_xmm, fp_xmm;
18665 rtx biases, exponents;
18666 rtx x;
18667
18668 int_xmm = gen_reg_rtx (V4SImode);
18669 if (TARGET_INTER_UNIT_MOVES_TO_VEC)
18670 emit_insn (gen_movdi_to_sse (int_xmm, input));
18671 else if (TARGET_SSE_SPLIT_REGS)
18672 {
18673 emit_clobber (int_xmm);
18674 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
18675 }
18676 else
18677 {
18678 x = gen_reg_rtx (V2DImode);
18679 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
18680 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
18681 }
18682
18683 x = gen_rtx_CONST_VECTOR (V4SImode,
18684 gen_rtvec (4, GEN_INT (0x43300000UL),
18685 GEN_INT (0x45300000UL),
18686 const0_rtx, const0_rtx));
18687 exponents = validize_mem (force_const_mem (V4SImode, x));
18688
18689 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
18690 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
18691
18692 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
18693 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
18694 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
18695 (0x1.0p84 + double(fp_value_hi_xmm)).
18696 Note these exponents differ by 32. */
18697
18698 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
18699
18700 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
18701 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
18702 real_ldexp (&bias_lo_rvt, &dconst1, 52);
18703 real_ldexp (&bias_hi_rvt, &dconst1, 84);
18704 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
18705 x = const_double_from_real_value (bias_hi_rvt, DFmode);
18706 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
18707 biases = validize_mem (force_const_mem (V2DFmode, biases));
18708 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
18709
18710 /* Add the upper and lower DFmode values together. */
18711 if (TARGET_SSE3)
18712 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
18713 else
18714 {
18715 x = copy_to_mode_reg (V2DFmode, fp_xmm);
18716 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
18717 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
18718 }
18719
18720 ix86_expand_vector_extract (false, target, fp_xmm, 0);
18721 }
18722
18723 /* Not used, but eases macroization of patterns. */
18724 void
18725 ix86_expand_convert_uns_sixf_sse (rtx, rtx)
18726 {
18727 gcc_unreachable ();
18728 }
18729
18730 /* Convert an unsigned SImode value into a DFmode. Only currently used
18731 for SSE, but applicable anywhere. */
18732
18733 void
18734 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
18735 {
18736 REAL_VALUE_TYPE TWO31r;
18737 rtx x, fp;
18738
18739 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
18740 NULL, 1, OPTAB_DIRECT);
18741
18742 fp = gen_reg_rtx (DFmode);
18743 emit_insn (gen_floatsidf2 (fp, x));
18744
18745 real_ldexp (&TWO31r, &dconst1, 31);
18746 x = const_double_from_real_value (TWO31r, DFmode);
18747
18748 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
18749 if (x != target)
18750 emit_move_insn (target, x);
18751 }
18752
18753 /* Convert a signed DImode value into a DFmode. Only used for SSE in
18754 32-bit mode; otherwise we have a direct convert instruction. */
18755
18756 void
18757 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
18758 {
18759 REAL_VALUE_TYPE TWO32r;
18760 rtx fp_lo, fp_hi, x;
18761
18762 fp_lo = gen_reg_rtx (DFmode);
18763 fp_hi = gen_reg_rtx (DFmode);
18764
18765 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
18766
18767 real_ldexp (&TWO32r, &dconst1, 32);
18768 x = const_double_from_real_value (TWO32r, DFmode);
18769 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
18770
18771 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
18772
18773 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
18774 0, OPTAB_DIRECT);
18775 if (x != target)
18776 emit_move_insn (target, x);
18777 }
18778
18779 /* Convert an unsigned SImode value into a SFmode, using only SSE.
18780 For x86_32, -mfpmath=sse, !optimize_size only. */
18781 void
18782 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
18783 {
18784 REAL_VALUE_TYPE ONE16r;
18785 rtx fp_hi, fp_lo, int_hi, int_lo, x;
18786
18787 real_ldexp (&ONE16r, &dconst1, 16);
18788 x = const_double_from_real_value (ONE16r, SFmode);
18789 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
18790 NULL, 0, OPTAB_DIRECT);
18791 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
18792 NULL, 0, OPTAB_DIRECT);
18793 fp_hi = gen_reg_rtx (SFmode);
18794 fp_lo = gen_reg_rtx (SFmode);
18795 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
18796 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
18797 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
18798 0, OPTAB_DIRECT);
18799 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
18800 0, OPTAB_DIRECT);
18801 if (!rtx_equal_p (target, fp_hi))
18802 emit_move_insn (target, fp_hi);
18803 }
18804
18805 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
18806 a vector of unsigned ints VAL to vector of floats TARGET. */
18807
18808 void
18809 ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
18810 {
18811 rtx tmp[8];
18812 REAL_VALUE_TYPE TWO16r;
18813 enum machine_mode intmode = GET_MODE (val);
18814 enum machine_mode fltmode = GET_MODE (target);
18815 rtx (*cvt) (rtx, rtx);
18816
18817 if (intmode == V4SImode)
18818 cvt = gen_floatv4siv4sf2;
18819 else
18820 cvt = gen_floatv8siv8sf2;
18821 tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
18822 tmp[0] = force_reg (intmode, tmp[0]);
18823 tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
18824 OPTAB_DIRECT);
18825 tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
18826 NULL_RTX, 1, OPTAB_DIRECT);
18827 tmp[3] = gen_reg_rtx (fltmode);
18828 emit_insn (cvt (tmp[3], tmp[1]));
18829 tmp[4] = gen_reg_rtx (fltmode);
18830 emit_insn (cvt (tmp[4], tmp[2]));
18831 real_ldexp (&TWO16r, &dconst1, 16);
18832 tmp[5] = const_double_from_real_value (TWO16r, SFmode);
18833 tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
18834 tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
18835 OPTAB_DIRECT);
18836 tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
18837 OPTAB_DIRECT);
18838 if (tmp[7] != target)
18839 emit_move_insn (target, tmp[7]);
18840 }
18841
18842 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
18843 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
18844 This is done by doing just signed conversion if < 0x1p31, and otherwise by
18845 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
18846
18847 rtx
18848 ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
18849 {
18850 REAL_VALUE_TYPE TWO31r;
18851 rtx two31r, tmp[4];
18852 enum machine_mode mode = GET_MODE (val);
18853 enum machine_mode scalarmode = GET_MODE_INNER (mode);
18854 enum machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
18855 rtx (*cmp) (rtx, rtx, rtx, rtx);
18856 int i;
18857
18858 for (i = 0; i < 3; i++)
18859 tmp[i] = gen_reg_rtx (mode);
18860 real_ldexp (&TWO31r, &dconst1, 31);
18861 two31r = const_double_from_real_value (TWO31r, scalarmode);
18862 two31r = ix86_build_const_vector (mode, 1, two31r);
18863 two31r = force_reg (mode, two31r);
18864 switch (mode)
18865 {
18866 case V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
18867 case V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
18868 case V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
18869 case V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
18870 default: gcc_unreachable ();
18871 }
18872 tmp[3] = gen_rtx_LE (mode, two31r, val);
18873 emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
18874 tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
18875 0, OPTAB_DIRECT);
18876 if (intmode == V4SImode || TARGET_AVX2)
18877 *xorp = expand_simple_binop (intmode, ASHIFT,
18878 gen_lowpart (intmode, tmp[0]),
18879 GEN_INT (31), NULL_RTX, 0,
18880 OPTAB_DIRECT);
18881 else
18882 {
18883 rtx two31 = GEN_INT ((unsigned HOST_WIDE_INT) 1 << 31);
18884 two31 = ix86_build_const_vector (intmode, 1, two31);
18885 *xorp = expand_simple_binop (intmode, AND,
18886 gen_lowpart (intmode, tmp[0]),
18887 two31, NULL_RTX, 0,
18888 OPTAB_DIRECT);
18889 }
18890 return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
18891 0, OPTAB_DIRECT);
18892 }
18893
18894 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
18895 then replicate the value for all elements of the vector
18896 register. */
18897
18898 rtx
18899 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
18900 {
18901 int i, n_elt;
18902 rtvec v;
18903 enum machine_mode scalar_mode;
18904
18905 switch (mode)
18906 {
18907 case V64QImode:
18908 case V32QImode:
18909 case V16QImode:
18910 case V32HImode:
18911 case V16HImode:
18912 case V8HImode:
18913 case V16SImode:
18914 case V8SImode:
18915 case V4SImode:
18916 case V8DImode:
18917 case V4DImode:
18918 case V2DImode:
18919 gcc_assert (vect);
18920 case V16SFmode:
18921 case V8SFmode:
18922 case V4SFmode:
18923 case V8DFmode:
18924 case V4DFmode:
18925 case V2DFmode:
18926 n_elt = GET_MODE_NUNITS (mode);
18927 v = rtvec_alloc (n_elt);
18928 scalar_mode = GET_MODE_INNER (mode);
18929
18930 RTVEC_ELT (v, 0) = value;
18931
18932 for (i = 1; i < n_elt; ++i)
18933 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
18934
18935 return gen_rtx_CONST_VECTOR (mode, v);
18936
18937 default:
18938 gcc_unreachable ();
18939 }
18940 }
18941
18942 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
18943 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
18944 for an SSE register. If VECT is true, then replicate the mask for
18945 all elements of the vector register. If INVERT is true, then create
18946 a mask excluding the sign bit. */
18947
18948 rtx
18949 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
18950 {
18951 enum machine_mode vec_mode, imode;
18952 HOST_WIDE_INT hi, lo;
18953 int shift = 63;
18954 rtx v;
18955 rtx mask;
18956
18957 /* Find the sign bit, sign extended to 2*HWI. */
18958 switch (mode)
18959 {
18960 case V16SImode:
18961 case V16SFmode:
18962 case V8SImode:
18963 case V4SImode:
18964 case V8SFmode:
18965 case V4SFmode:
18966 vec_mode = mode;
18967 mode = GET_MODE_INNER (mode);
18968 imode = SImode;
18969 lo = 0x80000000, hi = lo < 0;
18970 break;
18971
18972 case V8DImode:
18973 case V4DImode:
18974 case V2DImode:
18975 case V8DFmode:
18976 case V4DFmode:
18977 case V2DFmode:
18978 vec_mode = mode;
18979 mode = GET_MODE_INNER (mode);
18980 imode = DImode;
18981 if (HOST_BITS_PER_WIDE_INT >= 64)
18982 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
18983 else
18984 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
18985 break;
18986
18987 case TImode:
18988 case TFmode:
18989 vec_mode = VOIDmode;
18990 if (HOST_BITS_PER_WIDE_INT >= 64)
18991 {
18992 imode = TImode;
18993 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
18994 }
18995 else
18996 {
18997 rtvec vec;
18998
18999 imode = DImode;
19000 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
19001
19002 if (invert)
19003 {
19004 lo = ~lo, hi = ~hi;
19005 v = constm1_rtx;
19006 }
19007 else
19008 v = const0_rtx;
19009
19010 mask = immed_double_const (lo, hi, imode);
19011
19012 vec = gen_rtvec (2, v, mask);
19013 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
19014 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
19015
19016 return v;
19017 }
19018 break;
19019
19020 default:
19021 gcc_unreachable ();
19022 }
19023
19024 if (invert)
19025 lo = ~lo, hi = ~hi;
19026
19027 /* Force this value into the low part of a fp vector constant. */
19028 mask = immed_double_const (lo, hi, imode);
19029 mask = gen_lowpart (mode, mask);
19030
19031 if (vec_mode == VOIDmode)
19032 return force_reg (mode, mask);
19033
19034 v = ix86_build_const_vector (vec_mode, vect, mask);
19035 return force_reg (vec_mode, v);
19036 }
19037
19038 /* Generate code for floating point ABS or NEG. */
19039
19040 void
19041 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
19042 rtx operands[])
19043 {
19044 rtx mask, set, dst, src;
19045 bool use_sse = false;
19046 bool vector_mode = VECTOR_MODE_P (mode);
19047 enum machine_mode vmode = mode;
19048
19049 if (vector_mode)
19050 use_sse = true;
19051 else if (mode == TFmode)
19052 use_sse = true;
19053 else if (TARGET_SSE_MATH)
19054 {
19055 use_sse = SSE_FLOAT_MODE_P (mode);
19056 if (mode == SFmode)
19057 vmode = V4SFmode;
19058 else if (mode == DFmode)
19059 vmode = V2DFmode;
19060 }
19061
19062 /* NEG and ABS performed with SSE use bitwise mask operations.
19063 Create the appropriate mask now. */
19064 if (use_sse)
19065 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
19066 else
19067 mask = NULL_RTX;
19068
19069 dst = operands[0];
19070 src = operands[1];
19071
19072 set = gen_rtx_fmt_e (code, mode, src);
19073 set = gen_rtx_SET (VOIDmode, dst, set);
19074
19075 if (mask)
19076 {
19077 rtx use, clob;
19078 rtvec par;
19079
19080 use = gen_rtx_USE (VOIDmode, mask);
19081 if (vector_mode)
19082 par = gen_rtvec (2, set, use);
19083 else
19084 {
19085 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
19086 par = gen_rtvec (3, set, use, clob);
19087 }
19088 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
19089 }
19090 else
19091 emit_insn (set);
19092 }
19093
19094 /* Expand a copysign operation. Special case operand 0 being a constant. */
19095
19096 void
19097 ix86_expand_copysign (rtx operands[])
19098 {
19099 enum machine_mode mode, vmode;
19100 rtx dest, op0, op1, mask, nmask;
19101
19102 dest = operands[0];
19103 op0 = operands[1];
19104 op1 = operands[2];
19105
19106 mode = GET_MODE (dest);
19107
19108 if (mode == SFmode)
19109 vmode = V4SFmode;
19110 else if (mode == DFmode)
19111 vmode = V2DFmode;
19112 else
19113 vmode = mode;
19114
19115 if (GET_CODE (op0) == CONST_DOUBLE)
19116 {
19117 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
19118
19119 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
19120 op0 = simplify_unary_operation (ABS, mode, op0, mode);
19121
19122 if (mode == SFmode || mode == DFmode)
19123 {
19124 if (op0 == CONST0_RTX (mode))
19125 op0 = CONST0_RTX (vmode);
19126 else
19127 {
19128 rtx v = ix86_build_const_vector (vmode, false, op0);
19129
19130 op0 = force_reg (vmode, v);
19131 }
19132 }
19133 else if (op0 != CONST0_RTX (mode))
19134 op0 = force_reg (mode, op0);
19135
19136 mask = ix86_build_signbit_mask (vmode, 0, 0);
19137
19138 if (mode == SFmode)
19139 copysign_insn = gen_copysignsf3_const;
19140 else if (mode == DFmode)
19141 copysign_insn = gen_copysigndf3_const;
19142 else
19143 copysign_insn = gen_copysigntf3_const;
19144
19145 emit_insn (copysign_insn (dest, op0, op1, mask));
19146 }
19147 else
19148 {
19149 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
19150
19151 nmask = ix86_build_signbit_mask (vmode, 0, 1);
19152 mask = ix86_build_signbit_mask (vmode, 0, 0);
19153
19154 if (mode == SFmode)
19155 copysign_insn = gen_copysignsf3_var;
19156 else if (mode == DFmode)
19157 copysign_insn = gen_copysigndf3_var;
19158 else
19159 copysign_insn = gen_copysigntf3_var;
19160
19161 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
19162 }
19163 }
19164
19165 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
19166 be a constant, and so has already been expanded into a vector constant. */
19167
19168 void
19169 ix86_split_copysign_const (rtx operands[])
19170 {
19171 enum machine_mode mode, vmode;
19172 rtx dest, op0, mask, x;
19173
19174 dest = operands[0];
19175 op0 = operands[1];
19176 mask = operands[3];
19177
19178 mode = GET_MODE (dest);
19179 vmode = GET_MODE (mask);
19180
19181 dest = simplify_gen_subreg (vmode, dest, mode, 0);
19182 x = gen_rtx_AND (vmode, dest, mask);
19183 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19184
19185 if (op0 != CONST0_RTX (vmode))
19186 {
19187 x = gen_rtx_IOR (vmode, dest, op0);
19188 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19189 }
19190 }
19191
19192 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
19193 so we have to do two masks. */
19194
19195 void
19196 ix86_split_copysign_var (rtx operands[])
19197 {
19198 enum machine_mode mode, vmode;
19199 rtx dest, scratch, op0, op1, mask, nmask, x;
19200
19201 dest = operands[0];
19202 scratch = operands[1];
19203 op0 = operands[2];
19204 op1 = operands[3];
19205 nmask = operands[4];
19206 mask = operands[5];
19207
19208 mode = GET_MODE (dest);
19209 vmode = GET_MODE (mask);
19210
19211 if (rtx_equal_p (op0, op1))
19212 {
19213 /* Shouldn't happen often (it's useless, obviously), but when it does
19214 we'd generate incorrect code if we continue below. */
19215 emit_move_insn (dest, op0);
19216 return;
19217 }
19218
19219 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
19220 {
19221 gcc_assert (REGNO (op1) == REGNO (scratch));
19222
19223 x = gen_rtx_AND (vmode, scratch, mask);
19224 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19225
19226 dest = mask;
19227 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19228 x = gen_rtx_NOT (vmode, dest);
19229 x = gen_rtx_AND (vmode, x, op0);
19230 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19231 }
19232 else
19233 {
19234 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
19235 {
19236 x = gen_rtx_AND (vmode, scratch, mask);
19237 }
19238 else /* alternative 2,4 */
19239 {
19240 gcc_assert (REGNO (mask) == REGNO (scratch));
19241 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
19242 x = gen_rtx_AND (vmode, scratch, op1);
19243 }
19244 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19245
19246 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
19247 {
19248 dest = simplify_gen_subreg (vmode, op0, mode, 0);
19249 x = gen_rtx_AND (vmode, dest, nmask);
19250 }
19251 else /* alternative 3,4 */
19252 {
19253 gcc_assert (REGNO (nmask) == REGNO (dest));
19254 dest = nmask;
19255 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19256 x = gen_rtx_AND (vmode, dest, op0);
19257 }
19258 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19259 }
19260
19261 x = gen_rtx_IOR (vmode, dest, scratch);
19262 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19263 }
19264
19265 /* Return TRUE or FALSE depending on whether the first SET in INSN
19266 has source and destination with matching CC modes, and that the
19267 CC mode is at least as constrained as REQ_MODE. */
19268
19269 bool
19270 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
19271 {
19272 rtx set;
19273 enum machine_mode set_mode;
19274
19275 set = PATTERN (insn);
19276 if (GET_CODE (set) == PARALLEL)
19277 set = XVECEXP (set, 0, 0);
19278 gcc_assert (GET_CODE (set) == SET);
19279 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
19280
19281 set_mode = GET_MODE (SET_DEST (set));
19282 switch (set_mode)
19283 {
19284 case CCNOmode:
19285 if (req_mode != CCNOmode
19286 && (req_mode != CCmode
19287 || XEXP (SET_SRC (set), 1) != const0_rtx))
19288 return false;
19289 break;
19290 case CCmode:
19291 if (req_mode == CCGCmode)
19292 return false;
19293 /* FALLTHRU */
19294 case CCGCmode:
19295 if (req_mode == CCGOCmode || req_mode == CCNOmode)
19296 return false;
19297 /* FALLTHRU */
19298 case CCGOCmode:
19299 if (req_mode == CCZmode)
19300 return false;
19301 /* FALLTHRU */
19302 case CCZmode:
19303 break;
19304
19305 case CCAmode:
19306 case CCCmode:
19307 case CCOmode:
19308 case CCSmode:
19309 if (set_mode != req_mode)
19310 return false;
19311 break;
19312
19313 default:
19314 gcc_unreachable ();
19315 }
19316
19317 return GET_MODE (SET_SRC (set)) == set_mode;
19318 }
19319
19320 /* Generate insn patterns to do an integer compare of OPERANDS. */
19321
19322 static rtx
19323 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
19324 {
19325 enum machine_mode cmpmode;
19326 rtx tmp, flags;
19327
19328 cmpmode = SELECT_CC_MODE (code, op0, op1);
19329 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
19330
19331 /* This is very simple, but making the interface the same as in the
19332 FP case makes the rest of the code easier. */
19333 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
19334 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
19335
19336 /* Return the test that should be put into the flags user, i.e.
19337 the bcc, scc, or cmov instruction. */
19338 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
19339 }
19340
19341 /* Figure out whether to use ordered or unordered fp comparisons.
19342 Return the appropriate mode to use. */
19343
19344 enum machine_mode
19345 ix86_fp_compare_mode (enum rtx_code)
19346 {
19347 /* ??? In order to make all comparisons reversible, we do all comparisons
19348 non-trapping when compiling for IEEE. Once gcc is able to distinguish
19349 all forms trapping and nontrapping comparisons, we can make inequality
19350 comparisons trapping again, since it results in better code when using
19351 FCOM based compares. */
19352 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
19353 }
19354
19355 enum machine_mode
19356 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
19357 {
19358 enum machine_mode mode = GET_MODE (op0);
19359
19360 if (SCALAR_FLOAT_MODE_P (mode))
19361 {
19362 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
19363 return ix86_fp_compare_mode (code);
19364 }
19365
19366 switch (code)
19367 {
19368 /* Only zero flag is needed. */
19369 case EQ: /* ZF=0 */
19370 case NE: /* ZF!=0 */
19371 return CCZmode;
19372 /* Codes needing carry flag. */
19373 case GEU: /* CF=0 */
19374 case LTU: /* CF=1 */
19375 /* Detect overflow checks. They need just the carry flag. */
19376 if (GET_CODE (op0) == PLUS
19377 && rtx_equal_p (op1, XEXP (op0, 0)))
19378 return CCCmode;
19379 else
19380 return CCmode;
19381 case GTU: /* CF=0 & ZF=0 */
19382 case LEU: /* CF=1 | ZF=1 */
19383 return CCmode;
19384 /* Codes possibly doable only with sign flag when
19385 comparing against zero. */
19386 case GE: /* SF=OF or SF=0 */
19387 case LT: /* SF<>OF or SF=1 */
19388 if (op1 == const0_rtx)
19389 return CCGOCmode;
19390 else
19391 /* For other cases Carry flag is not required. */
19392 return CCGCmode;
19393 /* Codes doable only with sign flag when comparing
19394 against zero, but we miss jump instruction for it
19395 so we need to use relational tests against overflow
19396 that thus needs to be zero. */
19397 case GT: /* ZF=0 & SF=OF */
19398 case LE: /* ZF=1 | SF<>OF */
19399 if (op1 == const0_rtx)
19400 return CCNOmode;
19401 else
19402 return CCGCmode;
19403 /* strcmp pattern do (use flags) and combine may ask us for proper
19404 mode. */
19405 case USE:
19406 return CCmode;
19407 default:
19408 gcc_unreachable ();
19409 }
19410 }
19411
19412 /* Return the fixed registers used for condition codes. */
19413
19414 static bool
19415 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
19416 {
19417 *p1 = FLAGS_REG;
19418 *p2 = FPSR_REG;
19419 return true;
19420 }
19421
19422 /* If two condition code modes are compatible, return a condition code
19423 mode which is compatible with both. Otherwise, return
19424 VOIDmode. */
19425
19426 static enum machine_mode
19427 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
19428 {
19429 if (m1 == m2)
19430 return m1;
19431
19432 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
19433 return VOIDmode;
19434
19435 if ((m1 == CCGCmode && m2 == CCGOCmode)
19436 || (m1 == CCGOCmode && m2 == CCGCmode))
19437 return CCGCmode;
19438
19439 if (m1 == CCZmode && (m2 == CCGCmode || m2 == CCGOCmode))
19440 return m2;
19441 else if (m2 == CCZmode && (m1 == CCGCmode || m1 == CCGOCmode))
19442 return m1;
19443
19444 switch (m1)
19445 {
19446 default:
19447 gcc_unreachable ();
19448
19449 case CCmode:
19450 case CCGCmode:
19451 case CCGOCmode:
19452 case CCNOmode:
19453 case CCAmode:
19454 case CCCmode:
19455 case CCOmode:
19456 case CCSmode:
19457 case CCZmode:
19458 switch (m2)
19459 {
19460 default:
19461 return VOIDmode;
19462
19463 case CCmode:
19464 case CCGCmode:
19465 case CCGOCmode:
19466 case CCNOmode:
19467 case CCAmode:
19468 case CCCmode:
19469 case CCOmode:
19470 case CCSmode:
19471 case CCZmode:
19472 return CCmode;
19473 }
19474
19475 case CCFPmode:
19476 case CCFPUmode:
19477 /* These are only compatible with themselves, which we already
19478 checked above. */
19479 return VOIDmode;
19480 }
19481 }
19482
19483
19484 /* Return a comparison we can do and that it is equivalent to
19485 swap_condition (code) apart possibly from orderedness.
19486 But, never change orderedness if TARGET_IEEE_FP, returning
19487 UNKNOWN in that case if necessary. */
19488
19489 static enum rtx_code
19490 ix86_fp_swap_condition (enum rtx_code code)
19491 {
19492 switch (code)
19493 {
19494 case GT: /* GTU - CF=0 & ZF=0 */
19495 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
19496 case GE: /* GEU - CF=0 */
19497 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
19498 case UNLT: /* LTU - CF=1 */
19499 return TARGET_IEEE_FP ? UNKNOWN : GT;
19500 case UNLE: /* LEU - CF=1 | ZF=1 */
19501 return TARGET_IEEE_FP ? UNKNOWN : GE;
19502 default:
19503 return swap_condition (code);
19504 }
19505 }
19506
19507 /* Return cost of comparison CODE using the best strategy for performance.
19508 All following functions do use number of instructions as a cost metrics.
19509 In future this should be tweaked to compute bytes for optimize_size and
19510 take into account performance of various instructions on various CPUs. */
19511
19512 static int
19513 ix86_fp_comparison_cost (enum rtx_code code)
19514 {
19515 int arith_cost;
19516
19517 /* The cost of code using bit-twiddling on %ah. */
19518 switch (code)
19519 {
19520 case UNLE:
19521 case UNLT:
19522 case LTGT:
19523 case GT:
19524 case GE:
19525 case UNORDERED:
19526 case ORDERED:
19527 case UNEQ:
19528 arith_cost = 4;
19529 break;
19530 case LT:
19531 case NE:
19532 case EQ:
19533 case UNGE:
19534 arith_cost = TARGET_IEEE_FP ? 5 : 4;
19535 break;
19536 case LE:
19537 case UNGT:
19538 arith_cost = TARGET_IEEE_FP ? 6 : 4;
19539 break;
19540 default:
19541 gcc_unreachable ();
19542 }
19543
19544 switch (ix86_fp_comparison_strategy (code))
19545 {
19546 case IX86_FPCMP_COMI:
19547 return arith_cost > 4 ? 3 : 2;
19548 case IX86_FPCMP_SAHF:
19549 return arith_cost > 4 ? 4 : 3;
19550 default:
19551 return arith_cost;
19552 }
19553 }
19554
19555 /* Return strategy to use for floating-point. We assume that fcomi is always
19556 preferrable where available, since that is also true when looking at size
19557 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
19558
19559 enum ix86_fpcmp_strategy
19560 ix86_fp_comparison_strategy (enum rtx_code)
19561 {
19562 /* Do fcomi/sahf based test when profitable. */
19563
19564 if (TARGET_CMOVE)
19565 return IX86_FPCMP_COMI;
19566
19567 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
19568 return IX86_FPCMP_SAHF;
19569
19570 return IX86_FPCMP_ARITH;
19571 }
19572
19573 /* Swap, force into registers, or otherwise massage the two operands
19574 to a fp comparison. The operands are updated in place; the new
19575 comparison code is returned. */
19576
19577 static enum rtx_code
19578 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
19579 {
19580 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
19581 rtx op0 = *pop0, op1 = *pop1;
19582 enum machine_mode op_mode = GET_MODE (op0);
19583 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
19584
19585 /* All of the unordered compare instructions only work on registers.
19586 The same is true of the fcomi compare instructions. The XFmode
19587 compare instructions require registers except when comparing
19588 against zero or when converting operand 1 from fixed point to
19589 floating point. */
19590
19591 if (!is_sse
19592 && (fpcmp_mode == CCFPUmode
19593 || (op_mode == XFmode
19594 && ! (standard_80387_constant_p (op0) == 1
19595 || standard_80387_constant_p (op1) == 1)
19596 && GET_CODE (op1) != FLOAT)
19597 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
19598 {
19599 op0 = force_reg (op_mode, op0);
19600 op1 = force_reg (op_mode, op1);
19601 }
19602 else
19603 {
19604 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
19605 things around if they appear profitable, otherwise force op0
19606 into a register. */
19607
19608 if (standard_80387_constant_p (op0) == 0
19609 || (MEM_P (op0)
19610 && ! (standard_80387_constant_p (op1) == 0
19611 || MEM_P (op1))))
19612 {
19613 enum rtx_code new_code = ix86_fp_swap_condition (code);
19614 if (new_code != UNKNOWN)
19615 {
19616 rtx tmp;
19617 tmp = op0, op0 = op1, op1 = tmp;
19618 code = new_code;
19619 }
19620 }
19621
19622 if (!REG_P (op0))
19623 op0 = force_reg (op_mode, op0);
19624
19625 if (CONSTANT_P (op1))
19626 {
19627 int tmp = standard_80387_constant_p (op1);
19628 if (tmp == 0)
19629 op1 = validize_mem (force_const_mem (op_mode, op1));
19630 else if (tmp == 1)
19631 {
19632 if (TARGET_CMOVE)
19633 op1 = force_reg (op_mode, op1);
19634 }
19635 else
19636 op1 = force_reg (op_mode, op1);
19637 }
19638 }
19639
19640 /* Try to rearrange the comparison to make it cheaper. */
19641 if (ix86_fp_comparison_cost (code)
19642 > ix86_fp_comparison_cost (swap_condition (code))
19643 && (REG_P (op1) || can_create_pseudo_p ()))
19644 {
19645 rtx tmp;
19646 tmp = op0, op0 = op1, op1 = tmp;
19647 code = swap_condition (code);
19648 if (!REG_P (op0))
19649 op0 = force_reg (op_mode, op0);
19650 }
19651
19652 *pop0 = op0;
19653 *pop1 = op1;
19654 return code;
19655 }
19656
19657 /* Convert comparison codes we use to represent FP comparison to integer
19658 code that will result in proper branch. Return UNKNOWN if no such code
19659 is available. */
19660
19661 enum rtx_code
19662 ix86_fp_compare_code_to_integer (enum rtx_code code)
19663 {
19664 switch (code)
19665 {
19666 case GT:
19667 return GTU;
19668 case GE:
19669 return GEU;
19670 case ORDERED:
19671 case UNORDERED:
19672 return code;
19673 break;
19674 case UNEQ:
19675 return EQ;
19676 break;
19677 case UNLT:
19678 return LTU;
19679 break;
19680 case UNLE:
19681 return LEU;
19682 break;
19683 case LTGT:
19684 return NE;
19685 break;
19686 default:
19687 return UNKNOWN;
19688 }
19689 }
19690
19691 /* Generate insn patterns to do a floating point compare of OPERANDS. */
19692
19693 static rtx
19694 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
19695 {
19696 enum machine_mode fpcmp_mode, intcmp_mode;
19697 rtx tmp, tmp2;
19698
19699 fpcmp_mode = ix86_fp_compare_mode (code);
19700 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
19701
19702 /* Do fcomi/sahf based test when profitable. */
19703 switch (ix86_fp_comparison_strategy (code))
19704 {
19705 case IX86_FPCMP_COMI:
19706 intcmp_mode = fpcmp_mode;
19707 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19708 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
19709 tmp);
19710 emit_insn (tmp);
19711 break;
19712
19713 case IX86_FPCMP_SAHF:
19714 intcmp_mode = fpcmp_mode;
19715 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19716 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
19717 tmp);
19718
19719 if (!scratch)
19720 scratch = gen_reg_rtx (HImode);
19721 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
19722 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
19723 break;
19724
19725 case IX86_FPCMP_ARITH:
19726 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
19727 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19728 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
19729 if (!scratch)
19730 scratch = gen_reg_rtx (HImode);
19731 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
19732
19733 /* In the unordered case, we have to check C2 for NaN's, which
19734 doesn't happen to work out to anything nice combination-wise.
19735 So do some bit twiddling on the value we've got in AH to come
19736 up with an appropriate set of condition codes. */
19737
19738 intcmp_mode = CCNOmode;
19739 switch (code)
19740 {
19741 case GT:
19742 case UNGT:
19743 if (code == GT || !TARGET_IEEE_FP)
19744 {
19745 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
19746 code = EQ;
19747 }
19748 else
19749 {
19750 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19751 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
19752 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
19753 intcmp_mode = CCmode;
19754 code = GEU;
19755 }
19756 break;
19757 case LT:
19758 case UNLT:
19759 if (code == LT && TARGET_IEEE_FP)
19760 {
19761 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19762 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
19763 intcmp_mode = CCmode;
19764 code = EQ;
19765 }
19766 else
19767 {
19768 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
19769 code = NE;
19770 }
19771 break;
19772 case GE:
19773 case UNGE:
19774 if (code == GE || !TARGET_IEEE_FP)
19775 {
19776 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
19777 code = EQ;
19778 }
19779 else
19780 {
19781 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19782 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
19783 code = NE;
19784 }
19785 break;
19786 case LE:
19787 case UNLE:
19788 if (code == LE && TARGET_IEEE_FP)
19789 {
19790 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19791 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
19792 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
19793 intcmp_mode = CCmode;
19794 code = LTU;
19795 }
19796 else
19797 {
19798 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
19799 code = NE;
19800 }
19801 break;
19802 case EQ:
19803 case UNEQ:
19804 if (code == EQ && TARGET_IEEE_FP)
19805 {
19806 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19807 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
19808 intcmp_mode = CCmode;
19809 code = EQ;
19810 }
19811 else
19812 {
19813 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
19814 code = NE;
19815 }
19816 break;
19817 case NE:
19818 case LTGT:
19819 if (code == NE && TARGET_IEEE_FP)
19820 {
19821 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19822 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
19823 GEN_INT (0x40)));
19824 code = NE;
19825 }
19826 else
19827 {
19828 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
19829 code = EQ;
19830 }
19831 break;
19832
19833 case UNORDERED:
19834 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
19835 code = NE;
19836 break;
19837 case ORDERED:
19838 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
19839 code = EQ;
19840 break;
19841
19842 default:
19843 gcc_unreachable ();
19844 }
19845 break;
19846
19847 default:
19848 gcc_unreachable();
19849 }
19850
19851 /* Return the test that should be put into the flags user, i.e.
19852 the bcc, scc, or cmov instruction. */
19853 return gen_rtx_fmt_ee (code, VOIDmode,
19854 gen_rtx_REG (intcmp_mode, FLAGS_REG),
19855 const0_rtx);
19856 }
19857
19858 static rtx
19859 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
19860 {
19861 rtx ret;
19862
19863 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
19864 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
19865
19866 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
19867 {
19868 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
19869 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
19870 }
19871 else
19872 ret = ix86_expand_int_compare (code, op0, op1);
19873
19874 return ret;
19875 }
19876
19877 void
19878 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
19879 {
19880 enum machine_mode mode = GET_MODE (op0);
19881 rtx tmp;
19882
19883 switch (mode)
19884 {
19885 case SFmode:
19886 case DFmode:
19887 case XFmode:
19888 case QImode:
19889 case HImode:
19890 case SImode:
19891 simple:
19892 tmp = ix86_expand_compare (code, op0, op1);
19893 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
19894 gen_rtx_LABEL_REF (VOIDmode, label),
19895 pc_rtx);
19896 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
19897 return;
19898
19899 case DImode:
19900 if (TARGET_64BIT)
19901 goto simple;
19902 case TImode:
19903 /* Expand DImode branch into multiple compare+branch. */
19904 {
19905 rtx lo[2], hi[2], label2;
19906 enum rtx_code code1, code2, code3;
19907 enum machine_mode submode;
19908
19909 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
19910 {
19911 tmp = op0, op0 = op1, op1 = tmp;
19912 code = swap_condition (code);
19913 }
19914
19915 split_double_mode (mode, &op0, 1, lo+0, hi+0);
19916 split_double_mode (mode, &op1, 1, lo+1, hi+1);
19917
19918 submode = mode == DImode ? SImode : DImode;
19919
19920 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
19921 avoid two branches. This costs one extra insn, so disable when
19922 optimizing for size. */
19923
19924 if ((code == EQ || code == NE)
19925 && (!optimize_insn_for_size_p ()
19926 || hi[1] == const0_rtx || lo[1] == const0_rtx))
19927 {
19928 rtx xor0, xor1;
19929
19930 xor1 = hi[0];
19931 if (hi[1] != const0_rtx)
19932 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
19933 NULL_RTX, 0, OPTAB_WIDEN);
19934
19935 xor0 = lo[0];
19936 if (lo[1] != const0_rtx)
19937 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
19938 NULL_RTX, 0, OPTAB_WIDEN);
19939
19940 tmp = expand_binop (submode, ior_optab, xor1, xor0,
19941 NULL_RTX, 0, OPTAB_WIDEN);
19942
19943 ix86_expand_branch (code, tmp, const0_rtx, label);
19944 return;
19945 }
19946
19947 /* Otherwise, if we are doing less-than or greater-or-equal-than,
19948 op1 is a constant and the low word is zero, then we can just
19949 examine the high word. Similarly for low word -1 and
19950 less-or-equal-than or greater-than. */
19951
19952 if (CONST_INT_P (hi[1]))
19953 switch (code)
19954 {
19955 case LT: case LTU: case GE: case GEU:
19956 if (lo[1] == const0_rtx)
19957 {
19958 ix86_expand_branch (code, hi[0], hi[1], label);
19959 return;
19960 }
19961 break;
19962 case LE: case LEU: case GT: case GTU:
19963 if (lo[1] == constm1_rtx)
19964 {
19965 ix86_expand_branch (code, hi[0], hi[1], label);
19966 return;
19967 }
19968 break;
19969 default:
19970 break;
19971 }
19972
19973 /* Otherwise, we need two or three jumps. */
19974
19975 label2 = gen_label_rtx ();
19976
19977 code1 = code;
19978 code2 = swap_condition (code);
19979 code3 = unsigned_condition (code);
19980
19981 switch (code)
19982 {
19983 case LT: case GT: case LTU: case GTU:
19984 break;
19985
19986 case LE: code1 = LT; code2 = GT; break;
19987 case GE: code1 = GT; code2 = LT; break;
19988 case LEU: code1 = LTU; code2 = GTU; break;
19989 case GEU: code1 = GTU; code2 = LTU; break;
19990
19991 case EQ: code1 = UNKNOWN; code2 = NE; break;
19992 case NE: code2 = UNKNOWN; break;
19993
19994 default:
19995 gcc_unreachable ();
19996 }
19997
19998 /*
19999 * a < b =>
20000 * if (hi(a) < hi(b)) goto true;
20001 * if (hi(a) > hi(b)) goto false;
20002 * if (lo(a) < lo(b)) goto true;
20003 * false:
20004 */
20005
20006 if (code1 != UNKNOWN)
20007 ix86_expand_branch (code1, hi[0], hi[1], label);
20008 if (code2 != UNKNOWN)
20009 ix86_expand_branch (code2, hi[0], hi[1], label2);
20010
20011 ix86_expand_branch (code3, lo[0], lo[1], label);
20012
20013 if (code2 != UNKNOWN)
20014 emit_label (label2);
20015 return;
20016 }
20017
20018 default:
20019 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
20020 goto simple;
20021 }
20022 }
20023
20024 /* Split branch based on floating point condition. */
20025 void
20026 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
20027 rtx target1, rtx target2, rtx tmp)
20028 {
20029 rtx condition;
20030 rtx i;
20031
20032 if (target2 != pc_rtx)
20033 {
20034 rtx tmp = target2;
20035 code = reverse_condition_maybe_unordered (code);
20036 target2 = target1;
20037 target1 = tmp;
20038 }
20039
20040 condition = ix86_expand_fp_compare (code, op1, op2,
20041 tmp);
20042
20043 i = emit_jump_insn (gen_rtx_SET
20044 (VOIDmode, pc_rtx,
20045 gen_rtx_IF_THEN_ELSE (VOIDmode,
20046 condition, target1, target2)));
20047 if (split_branch_probability >= 0)
20048 add_int_reg_note (i, REG_BR_PROB, split_branch_probability);
20049 }
20050
20051 void
20052 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
20053 {
20054 rtx ret;
20055
20056 gcc_assert (GET_MODE (dest) == QImode);
20057
20058 ret = ix86_expand_compare (code, op0, op1);
20059 PUT_MODE (ret, QImode);
20060 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
20061 }
20062
20063 /* Expand comparison setting or clearing carry flag. Return true when
20064 successful and set pop for the operation. */
20065 static bool
20066 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
20067 {
20068 enum machine_mode mode =
20069 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
20070
20071 /* Do not handle double-mode compares that go through special path. */
20072 if (mode == (TARGET_64BIT ? TImode : DImode))
20073 return false;
20074
20075 if (SCALAR_FLOAT_MODE_P (mode))
20076 {
20077 rtx compare_op, compare_seq;
20078
20079 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
20080
20081 /* Shortcut: following common codes never translate
20082 into carry flag compares. */
20083 if (code == EQ || code == NE || code == UNEQ || code == LTGT
20084 || code == ORDERED || code == UNORDERED)
20085 return false;
20086
20087 /* These comparisons require zero flag; swap operands so they won't. */
20088 if ((code == GT || code == UNLE || code == LE || code == UNGT)
20089 && !TARGET_IEEE_FP)
20090 {
20091 rtx tmp = op0;
20092 op0 = op1;
20093 op1 = tmp;
20094 code = swap_condition (code);
20095 }
20096
20097 /* Try to expand the comparison and verify that we end up with
20098 carry flag based comparison. This fails to be true only when
20099 we decide to expand comparison using arithmetic that is not
20100 too common scenario. */
20101 start_sequence ();
20102 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
20103 compare_seq = get_insns ();
20104 end_sequence ();
20105
20106 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
20107 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
20108 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
20109 else
20110 code = GET_CODE (compare_op);
20111
20112 if (code != LTU && code != GEU)
20113 return false;
20114
20115 emit_insn (compare_seq);
20116 *pop = compare_op;
20117 return true;
20118 }
20119
20120 if (!INTEGRAL_MODE_P (mode))
20121 return false;
20122
20123 switch (code)
20124 {
20125 case LTU:
20126 case GEU:
20127 break;
20128
20129 /* Convert a==0 into (unsigned)a<1. */
20130 case EQ:
20131 case NE:
20132 if (op1 != const0_rtx)
20133 return false;
20134 op1 = const1_rtx;
20135 code = (code == EQ ? LTU : GEU);
20136 break;
20137
20138 /* Convert a>b into b<a or a>=b-1. */
20139 case GTU:
20140 case LEU:
20141 if (CONST_INT_P (op1))
20142 {
20143 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
20144 /* Bail out on overflow. We still can swap operands but that
20145 would force loading of the constant into register. */
20146 if (op1 == const0_rtx
20147 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
20148 return false;
20149 code = (code == GTU ? GEU : LTU);
20150 }
20151 else
20152 {
20153 rtx tmp = op1;
20154 op1 = op0;
20155 op0 = tmp;
20156 code = (code == GTU ? LTU : GEU);
20157 }
20158 break;
20159
20160 /* Convert a>=0 into (unsigned)a<0x80000000. */
20161 case LT:
20162 case GE:
20163 if (mode == DImode || op1 != const0_rtx)
20164 return false;
20165 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20166 code = (code == LT ? GEU : LTU);
20167 break;
20168 case LE:
20169 case GT:
20170 if (mode == DImode || op1 != constm1_rtx)
20171 return false;
20172 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20173 code = (code == LE ? GEU : LTU);
20174 break;
20175
20176 default:
20177 return false;
20178 }
20179 /* Swapping operands may cause constant to appear as first operand. */
20180 if (!nonimmediate_operand (op0, VOIDmode))
20181 {
20182 if (!can_create_pseudo_p ())
20183 return false;
20184 op0 = force_reg (mode, op0);
20185 }
20186 *pop = ix86_expand_compare (code, op0, op1);
20187 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
20188 return true;
20189 }
20190
20191 bool
20192 ix86_expand_int_movcc (rtx operands[])
20193 {
20194 enum rtx_code code = GET_CODE (operands[1]), compare_code;
20195 rtx compare_seq, compare_op;
20196 enum machine_mode mode = GET_MODE (operands[0]);
20197 bool sign_bit_compare_p = false;
20198 rtx op0 = XEXP (operands[1], 0);
20199 rtx op1 = XEXP (operands[1], 1);
20200
20201 if (GET_MODE (op0) == TImode
20202 || (GET_MODE (op0) == DImode
20203 && !TARGET_64BIT))
20204 return false;
20205
20206 start_sequence ();
20207 compare_op = ix86_expand_compare (code, op0, op1);
20208 compare_seq = get_insns ();
20209 end_sequence ();
20210
20211 compare_code = GET_CODE (compare_op);
20212
20213 if ((op1 == const0_rtx && (code == GE || code == LT))
20214 || (op1 == constm1_rtx && (code == GT || code == LE)))
20215 sign_bit_compare_p = true;
20216
20217 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
20218 HImode insns, we'd be swallowed in word prefix ops. */
20219
20220 if ((mode != HImode || TARGET_FAST_PREFIX)
20221 && (mode != (TARGET_64BIT ? TImode : DImode))
20222 && CONST_INT_P (operands[2])
20223 && CONST_INT_P (operands[3]))
20224 {
20225 rtx out = operands[0];
20226 HOST_WIDE_INT ct = INTVAL (operands[2]);
20227 HOST_WIDE_INT cf = INTVAL (operands[3]);
20228 HOST_WIDE_INT diff;
20229
20230 diff = ct - cf;
20231 /* Sign bit compares are better done using shifts than we do by using
20232 sbb. */
20233 if (sign_bit_compare_p
20234 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
20235 {
20236 /* Detect overlap between destination and compare sources. */
20237 rtx tmp = out;
20238
20239 if (!sign_bit_compare_p)
20240 {
20241 rtx flags;
20242 bool fpcmp = false;
20243
20244 compare_code = GET_CODE (compare_op);
20245
20246 flags = XEXP (compare_op, 0);
20247
20248 if (GET_MODE (flags) == CCFPmode
20249 || GET_MODE (flags) == CCFPUmode)
20250 {
20251 fpcmp = true;
20252 compare_code
20253 = ix86_fp_compare_code_to_integer (compare_code);
20254 }
20255
20256 /* To simplify rest of code, restrict to the GEU case. */
20257 if (compare_code == LTU)
20258 {
20259 HOST_WIDE_INT tmp = ct;
20260 ct = cf;
20261 cf = tmp;
20262 compare_code = reverse_condition (compare_code);
20263 code = reverse_condition (code);
20264 }
20265 else
20266 {
20267 if (fpcmp)
20268 PUT_CODE (compare_op,
20269 reverse_condition_maybe_unordered
20270 (GET_CODE (compare_op)));
20271 else
20272 PUT_CODE (compare_op,
20273 reverse_condition (GET_CODE (compare_op)));
20274 }
20275 diff = ct - cf;
20276
20277 if (reg_overlap_mentioned_p (out, op0)
20278 || reg_overlap_mentioned_p (out, op1))
20279 tmp = gen_reg_rtx (mode);
20280
20281 if (mode == DImode)
20282 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
20283 else
20284 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
20285 flags, compare_op));
20286 }
20287 else
20288 {
20289 if (code == GT || code == GE)
20290 code = reverse_condition (code);
20291 else
20292 {
20293 HOST_WIDE_INT tmp = ct;
20294 ct = cf;
20295 cf = tmp;
20296 diff = ct - cf;
20297 }
20298 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
20299 }
20300
20301 if (diff == 1)
20302 {
20303 /*
20304 * cmpl op0,op1
20305 * sbbl dest,dest
20306 * [addl dest, ct]
20307 *
20308 * Size 5 - 8.
20309 */
20310 if (ct)
20311 tmp = expand_simple_binop (mode, PLUS,
20312 tmp, GEN_INT (ct),
20313 copy_rtx (tmp), 1, OPTAB_DIRECT);
20314 }
20315 else if (cf == -1)
20316 {
20317 /*
20318 * cmpl op0,op1
20319 * sbbl dest,dest
20320 * orl $ct, dest
20321 *
20322 * Size 8.
20323 */
20324 tmp = expand_simple_binop (mode, IOR,
20325 tmp, GEN_INT (ct),
20326 copy_rtx (tmp), 1, OPTAB_DIRECT);
20327 }
20328 else if (diff == -1 && ct)
20329 {
20330 /*
20331 * cmpl op0,op1
20332 * sbbl dest,dest
20333 * notl dest
20334 * [addl dest, cf]
20335 *
20336 * Size 8 - 11.
20337 */
20338 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20339 if (cf)
20340 tmp = expand_simple_binop (mode, PLUS,
20341 copy_rtx (tmp), GEN_INT (cf),
20342 copy_rtx (tmp), 1, OPTAB_DIRECT);
20343 }
20344 else
20345 {
20346 /*
20347 * cmpl op0,op1
20348 * sbbl dest,dest
20349 * [notl dest]
20350 * andl cf - ct, dest
20351 * [addl dest, ct]
20352 *
20353 * Size 8 - 11.
20354 */
20355
20356 if (cf == 0)
20357 {
20358 cf = ct;
20359 ct = 0;
20360 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20361 }
20362
20363 tmp = expand_simple_binop (mode, AND,
20364 copy_rtx (tmp),
20365 gen_int_mode (cf - ct, mode),
20366 copy_rtx (tmp), 1, OPTAB_DIRECT);
20367 if (ct)
20368 tmp = expand_simple_binop (mode, PLUS,
20369 copy_rtx (tmp), GEN_INT (ct),
20370 copy_rtx (tmp), 1, OPTAB_DIRECT);
20371 }
20372
20373 if (!rtx_equal_p (tmp, out))
20374 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
20375
20376 return true;
20377 }
20378
20379 if (diff < 0)
20380 {
20381 enum machine_mode cmp_mode = GET_MODE (op0);
20382
20383 HOST_WIDE_INT tmp;
20384 tmp = ct, ct = cf, cf = tmp;
20385 diff = -diff;
20386
20387 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20388 {
20389 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20390
20391 /* We may be reversing unordered compare to normal compare, that
20392 is not valid in general (we may convert non-trapping condition
20393 to trapping one), however on i386 we currently emit all
20394 comparisons unordered. */
20395 compare_code = reverse_condition_maybe_unordered (compare_code);
20396 code = reverse_condition_maybe_unordered (code);
20397 }
20398 else
20399 {
20400 compare_code = reverse_condition (compare_code);
20401 code = reverse_condition (code);
20402 }
20403 }
20404
20405 compare_code = UNKNOWN;
20406 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
20407 && CONST_INT_P (op1))
20408 {
20409 if (op1 == const0_rtx
20410 && (code == LT || code == GE))
20411 compare_code = code;
20412 else if (op1 == constm1_rtx)
20413 {
20414 if (code == LE)
20415 compare_code = LT;
20416 else if (code == GT)
20417 compare_code = GE;
20418 }
20419 }
20420
20421 /* Optimize dest = (op0 < 0) ? -1 : cf. */
20422 if (compare_code != UNKNOWN
20423 && GET_MODE (op0) == GET_MODE (out)
20424 && (cf == -1 || ct == -1))
20425 {
20426 /* If lea code below could be used, only optimize
20427 if it results in a 2 insn sequence. */
20428
20429 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
20430 || diff == 3 || diff == 5 || diff == 9)
20431 || (compare_code == LT && ct == -1)
20432 || (compare_code == GE && cf == -1))
20433 {
20434 /*
20435 * notl op1 (if necessary)
20436 * sarl $31, op1
20437 * orl cf, op1
20438 */
20439 if (ct != -1)
20440 {
20441 cf = ct;
20442 ct = -1;
20443 code = reverse_condition (code);
20444 }
20445
20446 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
20447
20448 out = expand_simple_binop (mode, IOR,
20449 out, GEN_INT (cf),
20450 out, 1, OPTAB_DIRECT);
20451 if (out != operands[0])
20452 emit_move_insn (operands[0], out);
20453
20454 return true;
20455 }
20456 }
20457
20458
20459 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
20460 || diff == 3 || diff == 5 || diff == 9)
20461 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
20462 && (mode != DImode
20463 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
20464 {
20465 /*
20466 * xorl dest,dest
20467 * cmpl op1,op2
20468 * setcc dest
20469 * lea cf(dest*(ct-cf)),dest
20470 *
20471 * Size 14.
20472 *
20473 * This also catches the degenerate setcc-only case.
20474 */
20475
20476 rtx tmp;
20477 int nops;
20478
20479 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
20480
20481 nops = 0;
20482 /* On x86_64 the lea instruction operates on Pmode, so we need
20483 to get arithmetics done in proper mode to match. */
20484 if (diff == 1)
20485 tmp = copy_rtx (out);
20486 else
20487 {
20488 rtx out1;
20489 out1 = copy_rtx (out);
20490 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
20491 nops++;
20492 if (diff & 1)
20493 {
20494 tmp = gen_rtx_PLUS (mode, tmp, out1);
20495 nops++;
20496 }
20497 }
20498 if (cf != 0)
20499 {
20500 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
20501 nops++;
20502 }
20503 if (!rtx_equal_p (tmp, out))
20504 {
20505 if (nops == 1)
20506 out = force_operand (tmp, copy_rtx (out));
20507 else
20508 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
20509 }
20510 if (!rtx_equal_p (out, operands[0]))
20511 emit_move_insn (operands[0], copy_rtx (out));
20512
20513 return true;
20514 }
20515
20516 /*
20517 * General case: Jumpful:
20518 * xorl dest,dest cmpl op1, op2
20519 * cmpl op1, op2 movl ct, dest
20520 * setcc dest jcc 1f
20521 * decl dest movl cf, dest
20522 * andl (cf-ct),dest 1:
20523 * addl ct,dest
20524 *
20525 * Size 20. Size 14.
20526 *
20527 * This is reasonably steep, but branch mispredict costs are
20528 * high on modern cpus, so consider failing only if optimizing
20529 * for space.
20530 */
20531
20532 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
20533 && BRANCH_COST (optimize_insn_for_speed_p (),
20534 false) >= 2)
20535 {
20536 if (cf == 0)
20537 {
20538 enum machine_mode cmp_mode = GET_MODE (op0);
20539
20540 cf = ct;
20541 ct = 0;
20542
20543 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20544 {
20545 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20546
20547 /* We may be reversing unordered compare to normal compare,
20548 that is not valid in general (we may convert non-trapping
20549 condition to trapping one), however on i386 we currently
20550 emit all comparisons unordered. */
20551 code = reverse_condition_maybe_unordered (code);
20552 }
20553 else
20554 {
20555 code = reverse_condition (code);
20556 if (compare_code != UNKNOWN)
20557 compare_code = reverse_condition (compare_code);
20558 }
20559 }
20560
20561 if (compare_code != UNKNOWN)
20562 {
20563 /* notl op1 (if needed)
20564 sarl $31, op1
20565 andl (cf-ct), op1
20566 addl ct, op1
20567
20568 For x < 0 (resp. x <= -1) there will be no notl,
20569 so if possible swap the constants to get rid of the
20570 complement.
20571 True/false will be -1/0 while code below (store flag
20572 followed by decrement) is 0/-1, so the constants need
20573 to be exchanged once more. */
20574
20575 if (compare_code == GE || !cf)
20576 {
20577 code = reverse_condition (code);
20578 compare_code = LT;
20579 }
20580 else
20581 {
20582 HOST_WIDE_INT tmp = cf;
20583 cf = ct;
20584 ct = tmp;
20585 }
20586
20587 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
20588 }
20589 else
20590 {
20591 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
20592
20593 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
20594 constm1_rtx,
20595 copy_rtx (out), 1, OPTAB_DIRECT);
20596 }
20597
20598 out = expand_simple_binop (mode, AND, copy_rtx (out),
20599 gen_int_mode (cf - ct, mode),
20600 copy_rtx (out), 1, OPTAB_DIRECT);
20601 if (ct)
20602 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
20603 copy_rtx (out), 1, OPTAB_DIRECT);
20604 if (!rtx_equal_p (out, operands[0]))
20605 emit_move_insn (operands[0], copy_rtx (out));
20606
20607 return true;
20608 }
20609 }
20610
20611 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
20612 {
20613 /* Try a few things more with specific constants and a variable. */
20614
20615 optab op;
20616 rtx var, orig_out, out, tmp;
20617
20618 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
20619 return false;
20620
20621 /* If one of the two operands is an interesting constant, load a
20622 constant with the above and mask it in with a logical operation. */
20623
20624 if (CONST_INT_P (operands[2]))
20625 {
20626 var = operands[3];
20627 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
20628 operands[3] = constm1_rtx, op = and_optab;
20629 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
20630 operands[3] = const0_rtx, op = ior_optab;
20631 else
20632 return false;
20633 }
20634 else if (CONST_INT_P (operands[3]))
20635 {
20636 var = operands[2];
20637 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
20638 operands[2] = constm1_rtx, op = and_optab;
20639 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
20640 operands[2] = const0_rtx, op = ior_optab;
20641 else
20642 return false;
20643 }
20644 else
20645 return false;
20646
20647 orig_out = operands[0];
20648 tmp = gen_reg_rtx (mode);
20649 operands[0] = tmp;
20650
20651 /* Recurse to get the constant loaded. */
20652 if (ix86_expand_int_movcc (operands) == 0)
20653 return false;
20654
20655 /* Mask in the interesting variable. */
20656 out = expand_binop (mode, op, var, tmp, orig_out, 0,
20657 OPTAB_WIDEN);
20658 if (!rtx_equal_p (out, orig_out))
20659 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
20660
20661 return true;
20662 }
20663
20664 /*
20665 * For comparison with above,
20666 *
20667 * movl cf,dest
20668 * movl ct,tmp
20669 * cmpl op1,op2
20670 * cmovcc tmp,dest
20671 *
20672 * Size 15.
20673 */
20674
20675 if (! nonimmediate_operand (operands[2], mode))
20676 operands[2] = force_reg (mode, operands[2]);
20677 if (! nonimmediate_operand (operands[3], mode))
20678 operands[3] = force_reg (mode, operands[3]);
20679
20680 if (! register_operand (operands[2], VOIDmode)
20681 && (mode == QImode
20682 || ! register_operand (operands[3], VOIDmode)))
20683 operands[2] = force_reg (mode, operands[2]);
20684
20685 if (mode == QImode
20686 && ! register_operand (operands[3], VOIDmode))
20687 operands[3] = force_reg (mode, operands[3]);
20688
20689 emit_insn (compare_seq);
20690 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
20691 gen_rtx_IF_THEN_ELSE (mode,
20692 compare_op, operands[2],
20693 operands[3])));
20694 return true;
20695 }
20696
20697 /* Swap, force into registers, or otherwise massage the two operands
20698 to an sse comparison with a mask result. Thus we differ a bit from
20699 ix86_prepare_fp_compare_args which expects to produce a flags result.
20700
20701 The DEST operand exists to help determine whether to commute commutative
20702 operators. The POP0/POP1 operands are updated in place. The new
20703 comparison code is returned, or UNKNOWN if not implementable. */
20704
20705 static enum rtx_code
20706 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
20707 rtx *pop0, rtx *pop1)
20708 {
20709 rtx tmp;
20710
20711 switch (code)
20712 {
20713 case LTGT:
20714 case UNEQ:
20715 /* AVX supports all the needed comparisons. */
20716 if (TARGET_AVX)
20717 break;
20718 /* We have no LTGT as an operator. We could implement it with
20719 NE & ORDERED, but this requires an extra temporary. It's
20720 not clear that it's worth it. */
20721 return UNKNOWN;
20722
20723 case LT:
20724 case LE:
20725 case UNGT:
20726 case UNGE:
20727 /* These are supported directly. */
20728 break;
20729
20730 case EQ:
20731 case NE:
20732 case UNORDERED:
20733 case ORDERED:
20734 /* AVX has 3 operand comparisons, no need to swap anything. */
20735 if (TARGET_AVX)
20736 break;
20737 /* For commutative operators, try to canonicalize the destination
20738 operand to be first in the comparison - this helps reload to
20739 avoid extra moves. */
20740 if (!dest || !rtx_equal_p (dest, *pop1))
20741 break;
20742 /* FALLTHRU */
20743
20744 case GE:
20745 case GT:
20746 case UNLE:
20747 case UNLT:
20748 /* These are not supported directly before AVX, and furthermore
20749 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
20750 comparison operands to transform into something that is
20751 supported. */
20752 tmp = *pop0;
20753 *pop0 = *pop1;
20754 *pop1 = tmp;
20755 code = swap_condition (code);
20756 break;
20757
20758 default:
20759 gcc_unreachable ();
20760 }
20761
20762 return code;
20763 }
20764
20765 /* Detect conditional moves that exactly match min/max operational
20766 semantics. Note that this is IEEE safe, as long as we don't
20767 interchange the operands.
20768
20769 Returns FALSE if this conditional move doesn't match a MIN/MAX,
20770 and TRUE if the operation is successful and instructions are emitted. */
20771
20772 static bool
20773 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
20774 rtx cmp_op1, rtx if_true, rtx if_false)
20775 {
20776 enum machine_mode mode;
20777 bool is_min;
20778 rtx tmp;
20779
20780 if (code == LT)
20781 ;
20782 else if (code == UNGE)
20783 {
20784 tmp = if_true;
20785 if_true = if_false;
20786 if_false = tmp;
20787 }
20788 else
20789 return false;
20790
20791 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
20792 is_min = true;
20793 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
20794 is_min = false;
20795 else
20796 return false;
20797
20798 mode = GET_MODE (dest);
20799
20800 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
20801 but MODE may be a vector mode and thus not appropriate. */
20802 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
20803 {
20804 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
20805 rtvec v;
20806
20807 if_true = force_reg (mode, if_true);
20808 v = gen_rtvec (2, if_true, if_false);
20809 tmp = gen_rtx_UNSPEC (mode, v, u);
20810 }
20811 else
20812 {
20813 code = is_min ? SMIN : SMAX;
20814 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
20815 }
20816
20817 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
20818 return true;
20819 }
20820
20821 /* Expand an sse vector comparison. Return the register with the result. */
20822
20823 static rtx
20824 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
20825 rtx op_true, rtx op_false)
20826 {
20827 enum machine_mode mode = GET_MODE (dest);
20828 enum machine_mode cmp_ops_mode = GET_MODE (cmp_op0);
20829
20830 /* In general case result of comparison can differ from operands' type. */
20831 enum machine_mode cmp_mode;
20832
20833 /* In AVX512F the result of comparison is an integer mask. */
20834 bool maskcmp = false;
20835 rtx x;
20836
20837 if (GET_MODE_SIZE (cmp_ops_mode) == 64)
20838 {
20839 cmp_mode = mode_for_size (GET_MODE_NUNITS (cmp_ops_mode), MODE_INT, 0);
20840 gcc_assert (cmp_mode != BLKmode);
20841
20842 maskcmp = true;
20843 }
20844 else
20845 cmp_mode = cmp_ops_mode;
20846
20847
20848 cmp_op0 = force_reg (cmp_ops_mode, cmp_op0);
20849 if (!nonimmediate_operand (cmp_op1, cmp_ops_mode))
20850 cmp_op1 = force_reg (cmp_ops_mode, cmp_op1);
20851
20852 if (optimize
20853 || reg_overlap_mentioned_p (dest, op_true)
20854 || reg_overlap_mentioned_p (dest, op_false))
20855 dest = gen_reg_rtx (maskcmp ? cmp_mode : mode);
20856
20857 /* Compare patterns for int modes are unspec in AVX512F only. */
20858 if (maskcmp && (code == GT || code == EQ))
20859 {
20860 rtx (*gen)(rtx, rtx, rtx);
20861
20862 switch (cmp_ops_mode)
20863 {
20864 case V16SImode:
20865 gen = code == GT ? gen_avx512f_gtv16si3 : gen_avx512f_eqv16si3_1;
20866 break;
20867 case V8DImode:
20868 gen = code == GT ? gen_avx512f_gtv8di3 : gen_avx512f_eqv8di3_1;
20869 break;
20870 default:
20871 gen = NULL;
20872 }
20873
20874 if (gen)
20875 {
20876 emit_insn (gen (dest, cmp_op0, cmp_op1));
20877 return dest;
20878 }
20879 }
20880 x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
20881
20882 if (cmp_mode != mode && !maskcmp)
20883 {
20884 x = force_reg (cmp_ops_mode, x);
20885 convert_move (dest, x, false);
20886 }
20887 else
20888 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20889
20890 return dest;
20891 }
20892
20893 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
20894 operations. This is used for both scalar and vector conditional moves. */
20895
20896 static void
20897 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
20898 {
20899 enum machine_mode mode = GET_MODE (dest);
20900 enum machine_mode cmpmode = GET_MODE (cmp);
20901
20902 /* In AVX512F the result of comparison is an integer mask. */
20903 bool maskcmp = (mode != cmpmode && TARGET_AVX512F);
20904
20905 rtx t2, t3, x;
20906
20907 if (vector_all_ones_operand (op_true, mode)
20908 && rtx_equal_p (op_false, CONST0_RTX (mode))
20909 && !maskcmp)
20910 {
20911 emit_insn (gen_rtx_SET (VOIDmode, dest, cmp));
20912 }
20913 else if (op_false == CONST0_RTX (mode)
20914 && !maskcmp)
20915 {
20916 op_true = force_reg (mode, op_true);
20917 x = gen_rtx_AND (mode, cmp, op_true);
20918 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20919 }
20920 else if (op_true == CONST0_RTX (mode)
20921 && !maskcmp)
20922 {
20923 op_false = force_reg (mode, op_false);
20924 x = gen_rtx_NOT (mode, cmp);
20925 x = gen_rtx_AND (mode, x, op_false);
20926 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20927 }
20928 else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode)
20929 && !maskcmp)
20930 {
20931 op_false = force_reg (mode, op_false);
20932 x = gen_rtx_IOR (mode, cmp, op_false);
20933 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20934 }
20935 else if (TARGET_XOP
20936 && !maskcmp)
20937 {
20938 op_true = force_reg (mode, op_true);
20939
20940 if (!nonimmediate_operand (op_false, mode))
20941 op_false = force_reg (mode, op_false);
20942
20943 emit_insn (gen_rtx_SET (mode, dest,
20944 gen_rtx_IF_THEN_ELSE (mode, cmp,
20945 op_true,
20946 op_false)));
20947 }
20948 else
20949 {
20950 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
20951 rtx d = dest;
20952
20953 if (!nonimmediate_operand (op_true, mode))
20954 op_true = force_reg (mode, op_true);
20955
20956 op_false = force_reg (mode, op_false);
20957
20958 switch (mode)
20959 {
20960 case V4SFmode:
20961 if (TARGET_SSE4_1)
20962 gen = gen_sse4_1_blendvps;
20963 break;
20964 case V2DFmode:
20965 if (TARGET_SSE4_1)
20966 gen = gen_sse4_1_blendvpd;
20967 break;
20968 case V16QImode:
20969 case V8HImode:
20970 case V4SImode:
20971 case V2DImode:
20972 if (TARGET_SSE4_1)
20973 {
20974 gen = gen_sse4_1_pblendvb;
20975 if (mode != V16QImode)
20976 d = gen_reg_rtx (V16QImode);
20977 op_false = gen_lowpart (V16QImode, op_false);
20978 op_true = gen_lowpart (V16QImode, op_true);
20979 cmp = gen_lowpart (V16QImode, cmp);
20980 }
20981 break;
20982 case V8SFmode:
20983 if (TARGET_AVX)
20984 gen = gen_avx_blendvps256;
20985 break;
20986 case V4DFmode:
20987 if (TARGET_AVX)
20988 gen = gen_avx_blendvpd256;
20989 break;
20990 case V32QImode:
20991 case V16HImode:
20992 case V8SImode:
20993 case V4DImode:
20994 if (TARGET_AVX2)
20995 {
20996 gen = gen_avx2_pblendvb;
20997 if (mode != V32QImode)
20998 d = gen_reg_rtx (V32QImode);
20999 op_false = gen_lowpart (V32QImode, op_false);
21000 op_true = gen_lowpart (V32QImode, op_true);
21001 cmp = gen_lowpart (V32QImode, cmp);
21002 }
21003 break;
21004
21005 case V16SImode:
21006 gen = gen_avx512f_blendmv16si;
21007 break;
21008 case V8DImode:
21009 gen = gen_avx512f_blendmv8di;
21010 break;
21011 case V8DFmode:
21012 gen = gen_avx512f_blendmv8df;
21013 break;
21014 case V16SFmode:
21015 gen = gen_avx512f_blendmv16sf;
21016 break;
21017
21018 default:
21019 break;
21020 }
21021
21022 if (gen != NULL)
21023 {
21024 emit_insn (gen (d, op_false, op_true, cmp));
21025 if (d != dest)
21026 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
21027 }
21028 else
21029 {
21030 op_true = force_reg (mode, op_true);
21031
21032 t2 = gen_reg_rtx (mode);
21033 if (optimize)
21034 t3 = gen_reg_rtx (mode);
21035 else
21036 t3 = dest;
21037
21038 x = gen_rtx_AND (mode, op_true, cmp);
21039 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
21040
21041 x = gen_rtx_NOT (mode, cmp);
21042 x = gen_rtx_AND (mode, x, op_false);
21043 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
21044
21045 x = gen_rtx_IOR (mode, t3, t2);
21046 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21047 }
21048 }
21049 }
21050
21051 /* Expand a floating-point conditional move. Return true if successful. */
21052
21053 bool
21054 ix86_expand_fp_movcc (rtx operands[])
21055 {
21056 enum machine_mode mode = GET_MODE (operands[0]);
21057 enum rtx_code code = GET_CODE (operands[1]);
21058 rtx tmp, compare_op;
21059 rtx op0 = XEXP (operands[1], 0);
21060 rtx op1 = XEXP (operands[1], 1);
21061
21062 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
21063 {
21064 enum machine_mode cmode;
21065
21066 /* Since we've no cmove for sse registers, don't force bad register
21067 allocation just to gain access to it. Deny movcc when the
21068 comparison mode doesn't match the move mode. */
21069 cmode = GET_MODE (op0);
21070 if (cmode == VOIDmode)
21071 cmode = GET_MODE (op1);
21072 if (cmode != mode)
21073 return false;
21074
21075 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
21076 if (code == UNKNOWN)
21077 return false;
21078
21079 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
21080 operands[2], operands[3]))
21081 return true;
21082
21083 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
21084 operands[2], operands[3]);
21085 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
21086 return true;
21087 }
21088
21089 if (GET_MODE (op0) == TImode
21090 || (GET_MODE (op0) == DImode
21091 && !TARGET_64BIT))
21092 return false;
21093
21094 /* The floating point conditional move instructions don't directly
21095 support conditions resulting from a signed integer comparison. */
21096
21097 compare_op = ix86_expand_compare (code, op0, op1);
21098 if (!fcmov_comparison_operator (compare_op, VOIDmode))
21099 {
21100 tmp = gen_reg_rtx (QImode);
21101 ix86_expand_setcc (tmp, code, op0, op1);
21102
21103 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
21104 }
21105
21106 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
21107 gen_rtx_IF_THEN_ELSE (mode, compare_op,
21108 operands[2], operands[3])));
21109
21110 return true;
21111 }
21112
21113 /* Expand a floating-point vector conditional move; a vcond operation
21114 rather than a movcc operation. */
21115
21116 bool
21117 ix86_expand_fp_vcond (rtx operands[])
21118 {
21119 enum rtx_code code = GET_CODE (operands[3]);
21120 rtx cmp;
21121
21122 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
21123 &operands[4], &operands[5]);
21124 if (code == UNKNOWN)
21125 {
21126 rtx temp;
21127 switch (GET_CODE (operands[3]))
21128 {
21129 case LTGT:
21130 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
21131 operands[5], operands[0], operands[0]);
21132 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
21133 operands[5], operands[1], operands[2]);
21134 code = AND;
21135 break;
21136 case UNEQ:
21137 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
21138 operands[5], operands[0], operands[0]);
21139 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
21140 operands[5], operands[1], operands[2]);
21141 code = IOR;
21142 break;
21143 default:
21144 gcc_unreachable ();
21145 }
21146 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
21147 OPTAB_DIRECT);
21148 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21149 return true;
21150 }
21151
21152 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
21153 operands[5], operands[1], operands[2]))
21154 return true;
21155
21156 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
21157 operands[1], operands[2]);
21158 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21159 return true;
21160 }
21161
21162 /* Expand a signed/unsigned integral vector conditional move. */
21163
21164 bool
21165 ix86_expand_int_vcond (rtx operands[])
21166 {
21167 enum machine_mode data_mode = GET_MODE (operands[0]);
21168 enum machine_mode mode = GET_MODE (operands[4]);
21169 enum rtx_code code = GET_CODE (operands[3]);
21170 bool negate = false;
21171 rtx x, cop0, cop1;
21172
21173 cop0 = operands[4];
21174 cop1 = operands[5];
21175
21176 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
21177 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
21178 if ((code == LT || code == GE)
21179 && data_mode == mode
21180 && cop1 == CONST0_RTX (mode)
21181 && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
21182 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) > 1
21183 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) <= 8
21184 && (GET_MODE_SIZE (data_mode) == 16
21185 || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
21186 {
21187 rtx negop = operands[2 - (code == LT)];
21188 int shift = GET_MODE_BITSIZE (GET_MODE_INNER (data_mode)) - 1;
21189 if (negop == CONST1_RTX (data_mode))
21190 {
21191 rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
21192 operands[0], 1, OPTAB_DIRECT);
21193 if (res != operands[0])
21194 emit_move_insn (operands[0], res);
21195 return true;
21196 }
21197 else if (GET_MODE_INNER (data_mode) != DImode
21198 && vector_all_ones_operand (negop, data_mode))
21199 {
21200 rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
21201 operands[0], 0, OPTAB_DIRECT);
21202 if (res != operands[0])
21203 emit_move_insn (operands[0], res);
21204 return true;
21205 }
21206 }
21207
21208 if (!nonimmediate_operand (cop1, mode))
21209 cop1 = force_reg (mode, cop1);
21210 if (!general_operand (operands[1], data_mode))
21211 operands[1] = force_reg (data_mode, operands[1]);
21212 if (!general_operand (operands[2], data_mode))
21213 operands[2] = force_reg (data_mode, operands[2]);
21214
21215 /* XOP supports all of the comparisons on all 128-bit vector int types. */
21216 if (TARGET_XOP
21217 && (mode == V16QImode || mode == V8HImode
21218 || mode == V4SImode || mode == V2DImode))
21219 ;
21220 else
21221 {
21222 /* Canonicalize the comparison to EQ, GT, GTU. */
21223 switch (code)
21224 {
21225 case EQ:
21226 case GT:
21227 case GTU:
21228 break;
21229
21230 case NE:
21231 case LE:
21232 case LEU:
21233 code = reverse_condition (code);
21234 negate = true;
21235 break;
21236
21237 case GE:
21238 case GEU:
21239 code = reverse_condition (code);
21240 negate = true;
21241 /* FALLTHRU */
21242
21243 case LT:
21244 case LTU:
21245 code = swap_condition (code);
21246 x = cop0, cop0 = cop1, cop1 = x;
21247 break;
21248
21249 default:
21250 gcc_unreachable ();
21251 }
21252
21253 /* Only SSE4.1/SSE4.2 supports V2DImode. */
21254 if (mode == V2DImode)
21255 {
21256 switch (code)
21257 {
21258 case EQ:
21259 /* SSE4.1 supports EQ. */
21260 if (!TARGET_SSE4_1)
21261 return false;
21262 break;
21263
21264 case GT:
21265 case GTU:
21266 /* SSE4.2 supports GT/GTU. */
21267 if (!TARGET_SSE4_2)
21268 return false;
21269 break;
21270
21271 default:
21272 gcc_unreachable ();
21273 }
21274 }
21275
21276 /* Unsigned parallel compare is not supported by the hardware.
21277 Play some tricks to turn this into a signed comparison
21278 against 0. */
21279 if (code == GTU)
21280 {
21281 cop0 = force_reg (mode, cop0);
21282
21283 switch (mode)
21284 {
21285 case V16SImode:
21286 case V8DImode:
21287 case V8SImode:
21288 case V4DImode:
21289 case V4SImode:
21290 case V2DImode:
21291 {
21292 rtx t1, t2, mask;
21293 rtx (*gen_sub3) (rtx, rtx, rtx);
21294
21295 switch (mode)
21296 {
21297 case V16SImode: gen_sub3 = gen_subv16si3; break;
21298 case V8DImode: gen_sub3 = gen_subv8di3; break;
21299 case V8SImode: gen_sub3 = gen_subv8si3; break;
21300 case V4DImode: gen_sub3 = gen_subv4di3; break;
21301 case V4SImode: gen_sub3 = gen_subv4si3; break;
21302 case V2DImode: gen_sub3 = gen_subv2di3; break;
21303 default:
21304 gcc_unreachable ();
21305 }
21306 /* Subtract (-(INT MAX) - 1) from both operands to make
21307 them signed. */
21308 mask = ix86_build_signbit_mask (mode, true, false);
21309 t1 = gen_reg_rtx (mode);
21310 emit_insn (gen_sub3 (t1, cop0, mask));
21311
21312 t2 = gen_reg_rtx (mode);
21313 emit_insn (gen_sub3 (t2, cop1, mask));
21314
21315 cop0 = t1;
21316 cop1 = t2;
21317 code = GT;
21318 }
21319 break;
21320
21321 case V32QImode:
21322 case V16HImode:
21323 case V16QImode:
21324 case V8HImode:
21325 /* Perform a parallel unsigned saturating subtraction. */
21326 x = gen_reg_rtx (mode);
21327 emit_insn (gen_rtx_SET (VOIDmode, x,
21328 gen_rtx_US_MINUS (mode, cop0, cop1)));
21329
21330 cop0 = x;
21331 cop1 = CONST0_RTX (mode);
21332 code = EQ;
21333 negate = !negate;
21334 break;
21335
21336 default:
21337 gcc_unreachable ();
21338 }
21339 }
21340 }
21341
21342 /* Allow the comparison to be done in one mode, but the movcc to
21343 happen in another mode. */
21344 if (data_mode == mode)
21345 {
21346 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
21347 operands[1+negate], operands[2-negate]);
21348 }
21349 else
21350 {
21351 gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
21352 x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1,
21353 operands[1+negate], operands[2-negate]);
21354 if (GET_MODE (x) == mode)
21355 x = gen_lowpart (data_mode, x);
21356 }
21357
21358 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
21359 operands[2-negate]);
21360 return true;
21361 }
21362
21363 static bool
21364 ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1)
21365 {
21366 enum machine_mode mode = GET_MODE (op0);
21367 switch (mode)
21368 {
21369 case V16SImode:
21370 emit_insn (gen_avx512f_vpermi2varv16si3 (target, op0,
21371 force_reg (V16SImode, mask),
21372 op1));
21373 return true;
21374 case V16SFmode:
21375 emit_insn (gen_avx512f_vpermi2varv16sf3 (target, op0,
21376 force_reg (V16SImode, mask),
21377 op1));
21378 return true;
21379 case V8DImode:
21380 emit_insn (gen_avx512f_vpermi2varv8di3 (target, op0,
21381 force_reg (V8DImode, mask), op1));
21382 return true;
21383 case V8DFmode:
21384 emit_insn (gen_avx512f_vpermi2varv8df3 (target, op0,
21385 force_reg (V8DImode, mask), op1));
21386 return true;
21387 default:
21388 return false;
21389 }
21390 }
21391
21392 /* Expand a variable vector permutation. */
21393
21394 void
21395 ix86_expand_vec_perm (rtx operands[])
21396 {
21397 rtx target = operands[0];
21398 rtx op0 = operands[1];
21399 rtx op1 = operands[2];
21400 rtx mask = operands[3];
21401 rtx t1, t2, t3, t4, t5, t6, t7, t8, vt, vt2, vec[32];
21402 enum machine_mode mode = GET_MODE (op0);
21403 enum machine_mode maskmode = GET_MODE (mask);
21404 int w, e, i;
21405 bool one_operand_shuffle = rtx_equal_p (op0, op1);
21406
21407 /* Number of elements in the vector. */
21408 w = GET_MODE_NUNITS (mode);
21409 e = GET_MODE_UNIT_SIZE (mode);
21410 gcc_assert (w <= 64);
21411
21412 if (ix86_expand_vec_perm_vpermi2 (target, op0, mask, op1))
21413 return;
21414
21415 if (TARGET_AVX2)
21416 {
21417 if (mode == V4DImode || mode == V4DFmode || mode == V16HImode)
21418 {
21419 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
21420 an constant shuffle operand. With a tiny bit of effort we can
21421 use VPERMD instead. A re-interpretation stall for V4DFmode is
21422 unfortunate but there's no avoiding it.
21423 Similarly for V16HImode we don't have instructions for variable
21424 shuffling, while for V32QImode we can use after preparing suitable
21425 masks vpshufb; vpshufb; vpermq; vpor. */
21426
21427 if (mode == V16HImode)
21428 {
21429 maskmode = mode = V32QImode;
21430 w = 32;
21431 e = 1;
21432 }
21433 else
21434 {
21435 maskmode = mode = V8SImode;
21436 w = 8;
21437 e = 4;
21438 }
21439 t1 = gen_reg_rtx (maskmode);
21440
21441 /* Replicate the low bits of the V4DImode mask into V8SImode:
21442 mask = { A B C D }
21443 t1 = { A A B B C C D D }. */
21444 for (i = 0; i < w / 2; ++i)
21445 vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
21446 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21447 vt = force_reg (maskmode, vt);
21448 mask = gen_lowpart (maskmode, mask);
21449 if (maskmode == V8SImode)
21450 emit_insn (gen_avx2_permvarv8si (t1, mask, vt));
21451 else
21452 emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
21453
21454 /* Multiply the shuffle indicies by two. */
21455 t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
21456 OPTAB_DIRECT);
21457
21458 /* Add one to the odd shuffle indicies:
21459 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
21460 for (i = 0; i < w / 2; ++i)
21461 {
21462 vec[i * 2] = const0_rtx;
21463 vec[i * 2 + 1] = const1_rtx;
21464 }
21465 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21466 vt = validize_mem (force_const_mem (maskmode, vt));
21467 t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
21468 OPTAB_DIRECT);
21469
21470 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
21471 operands[3] = mask = t1;
21472 target = gen_reg_rtx (mode);
21473 op0 = gen_lowpart (mode, op0);
21474 op1 = gen_lowpart (mode, op1);
21475 }
21476
21477 switch (mode)
21478 {
21479 case V8SImode:
21480 /* The VPERMD and VPERMPS instructions already properly ignore
21481 the high bits of the shuffle elements. No need for us to
21482 perform an AND ourselves. */
21483 if (one_operand_shuffle)
21484 {
21485 emit_insn (gen_avx2_permvarv8si (target, op0, mask));
21486 if (target != operands[0])
21487 emit_move_insn (operands[0],
21488 gen_lowpart (GET_MODE (operands[0]), target));
21489 }
21490 else
21491 {
21492 t1 = gen_reg_rtx (V8SImode);
21493 t2 = gen_reg_rtx (V8SImode);
21494 emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
21495 emit_insn (gen_avx2_permvarv8si (t2, op1, mask));
21496 goto merge_two;
21497 }
21498 return;
21499
21500 case V8SFmode:
21501 mask = gen_lowpart (V8SImode, mask);
21502 if (one_operand_shuffle)
21503 emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
21504 else
21505 {
21506 t1 = gen_reg_rtx (V8SFmode);
21507 t2 = gen_reg_rtx (V8SFmode);
21508 emit_insn (gen_avx2_permvarv8sf (t1, op0, mask));
21509 emit_insn (gen_avx2_permvarv8sf (t2, op1, mask));
21510 goto merge_two;
21511 }
21512 return;
21513
21514 case V4SImode:
21515 /* By combining the two 128-bit input vectors into one 256-bit
21516 input vector, we can use VPERMD and VPERMPS for the full
21517 two-operand shuffle. */
21518 t1 = gen_reg_rtx (V8SImode);
21519 t2 = gen_reg_rtx (V8SImode);
21520 emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
21521 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
21522 emit_insn (gen_avx2_permvarv8si (t1, t1, t2));
21523 emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
21524 return;
21525
21526 case V4SFmode:
21527 t1 = gen_reg_rtx (V8SFmode);
21528 t2 = gen_reg_rtx (V8SImode);
21529 mask = gen_lowpart (V4SImode, mask);
21530 emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
21531 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
21532 emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
21533 emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
21534 return;
21535
21536 case V32QImode:
21537 t1 = gen_reg_rtx (V32QImode);
21538 t2 = gen_reg_rtx (V32QImode);
21539 t3 = gen_reg_rtx (V32QImode);
21540 vt2 = GEN_INT (-128);
21541 for (i = 0; i < 32; i++)
21542 vec[i] = vt2;
21543 vt = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
21544 vt = force_reg (V32QImode, vt);
21545 for (i = 0; i < 32; i++)
21546 vec[i] = i < 16 ? vt2 : const0_rtx;
21547 vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
21548 vt2 = force_reg (V32QImode, vt2);
21549 /* From mask create two adjusted masks, which contain the same
21550 bits as mask in the low 7 bits of each vector element.
21551 The first mask will have the most significant bit clear
21552 if it requests element from the same 128-bit lane
21553 and MSB set if it requests element from the other 128-bit lane.
21554 The second mask will have the opposite values of the MSB,
21555 and additionally will have its 128-bit lanes swapped.
21556 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
21557 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
21558 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
21559 stands for other 12 bytes. */
21560 /* The bit whether element is from the same lane or the other
21561 lane is bit 4, so shift it up by 3 to the MSB position. */
21562 t5 = gen_reg_rtx (V4DImode);
21563 emit_insn (gen_ashlv4di3 (t5, gen_lowpart (V4DImode, mask),
21564 GEN_INT (3)));
21565 /* Clear MSB bits from the mask just in case it had them set. */
21566 emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
21567 /* After this t1 will have MSB set for elements from other lane. */
21568 emit_insn (gen_xorv32qi3 (t1, gen_lowpart (V32QImode, t5), vt2));
21569 /* Clear bits other than MSB. */
21570 emit_insn (gen_andv32qi3 (t1, t1, vt));
21571 /* Or in the lower bits from mask into t3. */
21572 emit_insn (gen_iorv32qi3 (t3, t1, t2));
21573 /* And invert MSB bits in t1, so MSB is set for elements from the same
21574 lane. */
21575 emit_insn (gen_xorv32qi3 (t1, t1, vt));
21576 /* Swap 128-bit lanes in t3. */
21577 t6 = gen_reg_rtx (V4DImode);
21578 emit_insn (gen_avx2_permv4di_1 (t6, gen_lowpart (V4DImode, t3),
21579 const2_rtx, GEN_INT (3),
21580 const0_rtx, const1_rtx));
21581 /* And or in the lower bits from mask into t1. */
21582 emit_insn (gen_iorv32qi3 (t1, t1, t2));
21583 if (one_operand_shuffle)
21584 {
21585 /* Each of these shuffles will put 0s in places where
21586 element from the other 128-bit lane is needed, otherwise
21587 will shuffle in the requested value. */
21588 emit_insn (gen_avx2_pshufbv32qi3 (t3, op0,
21589 gen_lowpart (V32QImode, t6)));
21590 emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
21591 /* For t3 the 128-bit lanes are swapped again. */
21592 t7 = gen_reg_rtx (V4DImode);
21593 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t3),
21594 const2_rtx, GEN_INT (3),
21595 const0_rtx, const1_rtx));
21596 /* And oring both together leads to the result. */
21597 emit_insn (gen_iorv32qi3 (target, t1,
21598 gen_lowpart (V32QImode, t7)));
21599 if (target != operands[0])
21600 emit_move_insn (operands[0],
21601 gen_lowpart (GET_MODE (operands[0]), target));
21602 return;
21603 }
21604
21605 t4 = gen_reg_rtx (V32QImode);
21606 /* Similarly to the above one_operand_shuffle code,
21607 just for repeated twice for each operand. merge_two:
21608 code will merge the two results together. */
21609 emit_insn (gen_avx2_pshufbv32qi3 (t4, op0,
21610 gen_lowpart (V32QImode, t6)));
21611 emit_insn (gen_avx2_pshufbv32qi3 (t3, op1,
21612 gen_lowpart (V32QImode, t6)));
21613 emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
21614 emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
21615 t7 = gen_reg_rtx (V4DImode);
21616 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t4),
21617 const2_rtx, GEN_INT (3),
21618 const0_rtx, const1_rtx));
21619 t8 = gen_reg_rtx (V4DImode);
21620 emit_insn (gen_avx2_permv4di_1 (t8, gen_lowpart (V4DImode, t3),
21621 const2_rtx, GEN_INT (3),
21622 const0_rtx, const1_rtx));
21623 emit_insn (gen_iorv32qi3 (t4, t2, gen_lowpart (V32QImode, t7)));
21624 emit_insn (gen_iorv32qi3 (t3, t1, gen_lowpart (V32QImode, t8)));
21625 t1 = t4;
21626 t2 = t3;
21627 goto merge_two;
21628
21629 default:
21630 gcc_assert (GET_MODE_SIZE (mode) <= 16);
21631 break;
21632 }
21633 }
21634
21635 if (TARGET_XOP)
21636 {
21637 /* The XOP VPPERM insn supports three inputs. By ignoring the
21638 one_operand_shuffle special case, we avoid creating another
21639 set of constant vectors in memory. */
21640 one_operand_shuffle = false;
21641
21642 /* mask = mask & {2*w-1, ...} */
21643 vt = GEN_INT (2*w - 1);
21644 }
21645 else
21646 {
21647 /* mask = mask & {w-1, ...} */
21648 vt = GEN_INT (w - 1);
21649 }
21650
21651 for (i = 0; i < w; i++)
21652 vec[i] = vt;
21653 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21654 mask = expand_simple_binop (maskmode, AND, mask, vt,
21655 NULL_RTX, 0, OPTAB_DIRECT);
21656
21657 /* For non-QImode operations, convert the word permutation control
21658 into a byte permutation control. */
21659 if (mode != V16QImode)
21660 {
21661 mask = expand_simple_binop (maskmode, ASHIFT, mask,
21662 GEN_INT (exact_log2 (e)),
21663 NULL_RTX, 0, OPTAB_DIRECT);
21664
21665 /* Convert mask to vector of chars. */
21666 mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
21667
21668 /* Replicate each of the input bytes into byte positions:
21669 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
21670 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
21671 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
21672 for (i = 0; i < 16; ++i)
21673 vec[i] = GEN_INT (i/e * e);
21674 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
21675 vt = validize_mem (force_const_mem (V16QImode, vt));
21676 if (TARGET_XOP)
21677 emit_insn (gen_xop_pperm (mask, mask, mask, vt));
21678 else
21679 emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
21680
21681 /* Convert it into the byte positions by doing
21682 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
21683 for (i = 0; i < 16; ++i)
21684 vec[i] = GEN_INT (i % e);
21685 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
21686 vt = validize_mem (force_const_mem (V16QImode, vt));
21687 emit_insn (gen_addv16qi3 (mask, mask, vt));
21688 }
21689
21690 /* The actual shuffle operations all operate on V16QImode. */
21691 op0 = gen_lowpart (V16QImode, op0);
21692 op1 = gen_lowpart (V16QImode, op1);
21693
21694 if (TARGET_XOP)
21695 {
21696 if (GET_MODE (target) != V16QImode)
21697 target = gen_reg_rtx (V16QImode);
21698 emit_insn (gen_xop_pperm (target, op0, op1, mask));
21699 if (target != operands[0])
21700 emit_move_insn (operands[0],
21701 gen_lowpart (GET_MODE (operands[0]), target));
21702 }
21703 else if (one_operand_shuffle)
21704 {
21705 if (GET_MODE (target) != V16QImode)
21706 target = gen_reg_rtx (V16QImode);
21707 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
21708 if (target != operands[0])
21709 emit_move_insn (operands[0],
21710 gen_lowpart (GET_MODE (operands[0]), target));
21711 }
21712 else
21713 {
21714 rtx xops[6];
21715 bool ok;
21716
21717 /* Shuffle the two input vectors independently. */
21718 t1 = gen_reg_rtx (V16QImode);
21719 t2 = gen_reg_rtx (V16QImode);
21720 emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
21721 emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
21722
21723 merge_two:
21724 /* Then merge them together. The key is whether any given control
21725 element contained a bit set that indicates the second word. */
21726 mask = operands[3];
21727 vt = GEN_INT (w);
21728 if (maskmode == V2DImode && !TARGET_SSE4_1)
21729 {
21730 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
21731 more shuffle to convert the V2DI input mask into a V4SI
21732 input mask. At which point the masking that expand_int_vcond
21733 will work as desired. */
21734 rtx t3 = gen_reg_rtx (V4SImode);
21735 emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
21736 const0_rtx, const0_rtx,
21737 const2_rtx, const2_rtx));
21738 mask = t3;
21739 maskmode = V4SImode;
21740 e = w = 4;
21741 }
21742
21743 for (i = 0; i < w; i++)
21744 vec[i] = vt;
21745 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21746 vt = force_reg (maskmode, vt);
21747 mask = expand_simple_binop (maskmode, AND, mask, vt,
21748 NULL_RTX, 0, OPTAB_DIRECT);
21749
21750 if (GET_MODE (target) != mode)
21751 target = gen_reg_rtx (mode);
21752 xops[0] = target;
21753 xops[1] = gen_lowpart (mode, t2);
21754 xops[2] = gen_lowpart (mode, t1);
21755 xops[3] = gen_rtx_EQ (maskmode, mask, vt);
21756 xops[4] = mask;
21757 xops[5] = vt;
21758 ok = ix86_expand_int_vcond (xops);
21759 gcc_assert (ok);
21760 if (target != operands[0])
21761 emit_move_insn (operands[0],
21762 gen_lowpart (GET_MODE (operands[0]), target));
21763 }
21764 }
21765
21766 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
21767 true if we should do zero extension, else sign extension. HIGH_P is
21768 true if we want the N/2 high elements, else the low elements. */
21769
21770 void
21771 ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
21772 {
21773 enum machine_mode imode = GET_MODE (src);
21774 rtx tmp;
21775
21776 if (TARGET_SSE4_1)
21777 {
21778 rtx (*unpack)(rtx, rtx);
21779 rtx (*extract)(rtx, rtx) = NULL;
21780 enum machine_mode halfmode = BLKmode;
21781
21782 switch (imode)
21783 {
21784 case V32QImode:
21785 if (unsigned_p)
21786 unpack = gen_avx2_zero_extendv16qiv16hi2;
21787 else
21788 unpack = gen_avx2_sign_extendv16qiv16hi2;
21789 halfmode = V16QImode;
21790 extract
21791 = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
21792 break;
21793 case V32HImode:
21794 if (unsigned_p)
21795 unpack = gen_avx512f_zero_extendv16hiv16si2;
21796 else
21797 unpack = gen_avx512f_sign_extendv16hiv16si2;
21798 halfmode = V16HImode;
21799 extract
21800 = high_p ? gen_vec_extract_hi_v32hi : gen_vec_extract_lo_v32hi;
21801 break;
21802 case V16HImode:
21803 if (unsigned_p)
21804 unpack = gen_avx2_zero_extendv8hiv8si2;
21805 else
21806 unpack = gen_avx2_sign_extendv8hiv8si2;
21807 halfmode = V8HImode;
21808 extract
21809 = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
21810 break;
21811 case V16SImode:
21812 if (unsigned_p)
21813 unpack = gen_avx512f_zero_extendv8siv8di2;
21814 else
21815 unpack = gen_avx512f_sign_extendv8siv8di2;
21816 halfmode = V8SImode;
21817 extract
21818 = high_p ? gen_vec_extract_hi_v16si : gen_vec_extract_lo_v16si;
21819 break;
21820 case V8SImode:
21821 if (unsigned_p)
21822 unpack = gen_avx2_zero_extendv4siv4di2;
21823 else
21824 unpack = gen_avx2_sign_extendv4siv4di2;
21825 halfmode = V4SImode;
21826 extract
21827 = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si;
21828 break;
21829 case V16QImode:
21830 if (unsigned_p)
21831 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
21832 else
21833 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
21834 break;
21835 case V8HImode:
21836 if (unsigned_p)
21837 unpack = gen_sse4_1_zero_extendv4hiv4si2;
21838 else
21839 unpack = gen_sse4_1_sign_extendv4hiv4si2;
21840 break;
21841 case V4SImode:
21842 if (unsigned_p)
21843 unpack = gen_sse4_1_zero_extendv2siv2di2;
21844 else
21845 unpack = gen_sse4_1_sign_extendv2siv2di2;
21846 break;
21847 default:
21848 gcc_unreachable ();
21849 }
21850
21851 if (GET_MODE_SIZE (imode) >= 32)
21852 {
21853 tmp = gen_reg_rtx (halfmode);
21854 emit_insn (extract (tmp, src));
21855 }
21856 else if (high_p)
21857 {
21858 /* Shift higher 8 bytes to lower 8 bytes. */
21859 tmp = gen_reg_rtx (V1TImode);
21860 emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, src),
21861 GEN_INT (64)));
21862 tmp = gen_lowpart (imode, tmp);
21863 }
21864 else
21865 tmp = src;
21866
21867 emit_insn (unpack (dest, tmp));
21868 }
21869 else
21870 {
21871 rtx (*unpack)(rtx, rtx, rtx);
21872
21873 switch (imode)
21874 {
21875 case V16QImode:
21876 if (high_p)
21877 unpack = gen_vec_interleave_highv16qi;
21878 else
21879 unpack = gen_vec_interleave_lowv16qi;
21880 break;
21881 case V8HImode:
21882 if (high_p)
21883 unpack = gen_vec_interleave_highv8hi;
21884 else
21885 unpack = gen_vec_interleave_lowv8hi;
21886 break;
21887 case V4SImode:
21888 if (high_p)
21889 unpack = gen_vec_interleave_highv4si;
21890 else
21891 unpack = gen_vec_interleave_lowv4si;
21892 break;
21893 default:
21894 gcc_unreachable ();
21895 }
21896
21897 if (unsigned_p)
21898 tmp = force_reg (imode, CONST0_RTX (imode));
21899 else
21900 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
21901 src, pc_rtx, pc_rtx);
21902
21903 rtx tmp2 = gen_reg_rtx (imode);
21904 emit_insn (unpack (tmp2, src, tmp));
21905 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), tmp2));
21906 }
21907 }
21908
21909 /* Expand conditional increment or decrement using adb/sbb instructions.
21910 The default case using setcc followed by the conditional move can be
21911 done by generic code. */
21912 bool
21913 ix86_expand_int_addcc (rtx operands[])
21914 {
21915 enum rtx_code code = GET_CODE (operands[1]);
21916 rtx flags;
21917 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
21918 rtx compare_op;
21919 rtx val = const0_rtx;
21920 bool fpcmp = false;
21921 enum machine_mode mode;
21922 rtx op0 = XEXP (operands[1], 0);
21923 rtx op1 = XEXP (operands[1], 1);
21924
21925 if (operands[3] != const1_rtx
21926 && operands[3] != constm1_rtx)
21927 return false;
21928 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
21929 return false;
21930 code = GET_CODE (compare_op);
21931
21932 flags = XEXP (compare_op, 0);
21933
21934 if (GET_MODE (flags) == CCFPmode
21935 || GET_MODE (flags) == CCFPUmode)
21936 {
21937 fpcmp = true;
21938 code = ix86_fp_compare_code_to_integer (code);
21939 }
21940
21941 if (code != LTU)
21942 {
21943 val = constm1_rtx;
21944 if (fpcmp)
21945 PUT_CODE (compare_op,
21946 reverse_condition_maybe_unordered
21947 (GET_CODE (compare_op)));
21948 else
21949 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
21950 }
21951
21952 mode = GET_MODE (operands[0]);
21953
21954 /* Construct either adc or sbb insn. */
21955 if ((code == LTU) == (operands[3] == constm1_rtx))
21956 {
21957 switch (mode)
21958 {
21959 case QImode:
21960 insn = gen_subqi3_carry;
21961 break;
21962 case HImode:
21963 insn = gen_subhi3_carry;
21964 break;
21965 case SImode:
21966 insn = gen_subsi3_carry;
21967 break;
21968 case DImode:
21969 insn = gen_subdi3_carry;
21970 break;
21971 default:
21972 gcc_unreachable ();
21973 }
21974 }
21975 else
21976 {
21977 switch (mode)
21978 {
21979 case QImode:
21980 insn = gen_addqi3_carry;
21981 break;
21982 case HImode:
21983 insn = gen_addhi3_carry;
21984 break;
21985 case SImode:
21986 insn = gen_addsi3_carry;
21987 break;
21988 case DImode:
21989 insn = gen_adddi3_carry;
21990 break;
21991 default:
21992 gcc_unreachable ();
21993 }
21994 }
21995 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
21996
21997 return true;
21998 }
21999
22000
22001 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
22002 but works for floating pointer parameters and nonoffsetable memories.
22003 For pushes, it returns just stack offsets; the values will be saved
22004 in the right order. Maximally three parts are generated. */
22005
22006 static int
22007 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
22008 {
22009 int size;
22010
22011 if (!TARGET_64BIT)
22012 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
22013 else
22014 size = (GET_MODE_SIZE (mode) + 4) / 8;
22015
22016 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
22017 gcc_assert (size >= 2 && size <= 4);
22018
22019 /* Optimize constant pool reference to immediates. This is used by fp
22020 moves, that force all constants to memory to allow combining. */
22021 if (MEM_P (operand) && MEM_READONLY_P (operand))
22022 {
22023 rtx tmp = maybe_get_pool_constant (operand);
22024 if (tmp)
22025 operand = tmp;
22026 }
22027
22028 if (MEM_P (operand) && !offsettable_memref_p (operand))
22029 {
22030 /* The only non-offsetable memories we handle are pushes. */
22031 int ok = push_operand (operand, VOIDmode);
22032
22033 gcc_assert (ok);
22034
22035 operand = copy_rtx (operand);
22036 PUT_MODE (operand, word_mode);
22037 parts[0] = parts[1] = parts[2] = parts[3] = operand;
22038 return size;
22039 }
22040
22041 if (GET_CODE (operand) == CONST_VECTOR)
22042 {
22043 enum machine_mode imode = int_mode_for_mode (mode);
22044 /* Caution: if we looked through a constant pool memory above,
22045 the operand may actually have a different mode now. That's
22046 ok, since we want to pun this all the way back to an integer. */
22047 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
22048 gcc_assert (operand != NULL);
22049 mode = imode;
22050 }
22051
22052 if (!TARGET_64BIT)
22053 {
22054 if (mode == DImode)
22055 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22056 else
22057 {
22058 int i;
22059
22060 if (REG_P (operand))
22061 {
22062 gcc_assert (reload_completed);
22063 for (i = 0; i < size; i++)
22064 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
22065 }
22066 else if (offsettable_memref_p (operand))
22067 {
22068 operand = adjust_address (operand, SImode, 0);
22069 parts[0] = operand;
22070 for (i = 1; i < size; i++)
22071 parts[i] = adjust_address (operand, SImode, 4 * i);
22072 }
22073 else if (GET_CODE (operand) == CONST_DOUBLE)
22074 {
22075 REAL_VALUE_TYPE r;
22076 long l[4];
22077
22078 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22079 switch (mode)
22080 {
22081 case TFmode:
22082 real_to_target (l, &r, mode);
22083 parts[3] = gen_int_mode (l[3], SImode);
22084 parts[2] = gen_int_mode (l[2], SImode);
22085 break;
22086 case XFmode:
22087 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
22088 long double may not be 80-bit. */
22089 real_to_target (l, &r, mode);
22090 parts[2] = gen_int_mode (l[2], SImode);
22091 break;
22092 case DFmode:
22093 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
22094 break;
22095 default:
22096 gcc_unreachable ();
22097 }
22098 parts[1] = gen_int_mode (l[1], SImode);
22099 parts[0] = gen_int_mode (l[0], SImode);
22100 }
22101 else
22102 gcc_unreachable ();
22103 }
22104 }
22105 else
22106 {
22107 if (mode == TImode)
22108 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22109 if (mode == XFmode || mode == TFmode)
22110 {
22111 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
22112 if (REG_P (operand))
22113 {
22114 gcc_assert (reload_completed);
22115 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
22116 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
22117 }
22118 else if (offsettable_memref_p (operand))
22119 {
22120 operand = adjust_address (operand, DImode, 0);
22121 parts[0] = operand;
22122 parts[1] = adjust_address (operand, upper_mode, 8);
22123 }
22124 else if (GET_CODE (operand) == CONST_DOUBLE)
22125 {
22126 REAL_VALUE_TYPE r;
22127 long l[4];
22128
22129 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22130 real_to_target (l, &r, mode);
22131
22132 /* Do not use shift by 32 to avoid warning on 32bit systems. */
22133 if (HOST_BITS_PER_WIDE_INT >= 64)
22134 parts[0]
22135 = gen_int_mode
22136 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
22137 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
22138 DImode);
22139 else
22140 parts[0] = immed_double_const (l[0], l[1], DImode);
22141
22142 if (upper_mode == SImode)
22143 parts[1] = gen_int_mode (l[2], SImode);
22144 else if (HOST_BITS_PER_WIDE_INT >= 64)
22145 parts[1]
22146 = gen_int_mode
22147 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
22148 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
22149 DImode);
22150 else
22151 parts[1] = immed_double_const (l[2], l[3], DImode);
22152 }
22153 else
22154 gcc_unreachable ();
22155 }
22156 }
22157
22158 return size;
22159 }
22160
22161 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
22162 Return false when normal moves are needed; true when all required
22163 insns have been emitted. Operands 2-4 contain the input values
22164 int the correct order; operands 5-7 contain the output values. */
22165
22166 void
22167 ix86_split_long_move (rtx operands[])
22168 {
22169 rtx part[2][4];
22170 int nparts, i, j;
22171 int push = 0;
22172 int collisions = 0;
22173 enum machine_mode mode = GET_MODE (operands[0]);
22174 bool collisionparts[4];
22175
22176 /* The DFmode expanders may ask us to move double.
22177 For 64bit target this is single move. By hiding the fact
22178 here we simplify i386.md splitters. */
22179 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
22180 {
22181 /* Optimize constant pool reference to immediates. This is used by
22182 fp moves, that force all constants to memory to allow combining. */
22183
22184 if (MEM_P (operands[1])
22185 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
22186 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
22187 operands[1] = get_pool_constant (XEXP (operands[1], 0));
22188 if (push_operand (operands[0], VOIDmode))
22189 {
22190 operands[0] = copy_rtx (operands[0]);
22191 PUT_MODE (operands[0], word_mode);
22192 }
22193 else
22194 operands[0] = gen_lowpart (DImode, operands[0]);
22195 operands[1] = gen_lowpart (DImode, operands[1]);
22196 emit_move_insn (operands[0], operands[1]);
22197 return;
22198 }
22199
22200 /* The only non-offsettable memory we handle is push. */
22201 if (push_operand (operands[0], VOIDmode))
22202 push = 1;
22203 else
22204 gcc_assert (!MEM_P (operands[0])
22205 || offsettable_memref_p (operands[0]));
22206
22207 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
22208 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
22209
22210 /* When emitting push, take care for source operands on the stack. */
22211 if (push && MEM_P (operands[1])
22212 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
22213 {
22214 rtx src_base = XEXP (part[1][nparts - 1], 0);
22215
22216 /* Compensate for the stack decrement by 4. */
22217 if (!TARGET_64BIT && nparts == 3
22218 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
22219 src_base = plus_constant (Pmode, src_base, 4);
22220
22221 /* src_base refers to the stack pointer and is
22222 automatically decreased by emitted push. */
22223 for (i = 0; i < nparts; i++)
22224 part[1][i] = change_address (part[1][i],
22225 GET_MODE (part[1][i]), src_base);
22226 }
22227
22228 /* We need to do copy in the right order in case an address register
22229 of the source overlaps the destination. */
22230 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
22231 {
22232 rtx tmp;
22233
22234 for (i = 0; i < nparts; i++)
22235 {
22236 collisionparts[i]
22237 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
22238 if (collisionparts[i])
22239 collisions++;
22240 }
22241
22242 /* Collision in the middle part can be handled by reordering. */
22243 if (collisions == 1 && nparts == 3 && collisionparts [1])
22244 {
22245 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
22246 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
22247 }
22248 else if (collisions == 1
22249 && nparts == 4
22250 && (collisionparts [1] || collisionparts [2]))
22251 {
22252 if (collisionparts [1])
22253 {
22254 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
22255 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
22256 }
22257 else
22258 {
22259 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
22260 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
22261 }
22262 }
22263
22264 /* If there are more collisions, we can't handle it by reordering.
22265 Do an lea to the last part and use only one colliding move. */
22266 else if (collisions > 1)
22267 {
22268 rtx base;
22269
22270 collisions = 1;
22271
22272 base = part[0][nparts - 1];
22273
22274 /* Handle the case when the last part isn't valid for lea.
22275 Happens in 64-bit mode storing the 12-byte XFmode. */
22276 if (GET_MODE (base) != Pmode)
22277 base = gen_rtx_REG (Pmode, REGNO (base));
22278
22279 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
22280 part[1][0] = replace_equiv_address (part[1][0], base);
22281 for (i = 1; i < nparts; i++)
22282 {
22283 tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i);
22284 part[1][i] = replace_equiv_address (part[1][i], tmp);
22285 }
22286 }
22287 }
22288
22289 if (push)
22290 {
22291 if (!TARGET_64BIT)
22292 {
22293 if (nparts == 3)
22294 {
22295 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
22296 emit_insn (ix86_gen_add3 (stack_pointer_rtx,
22297 stack_pointer_rtx, GEN_INT (-4)));
22298 emit_move_insn (part[0][2], part[1][2]);
22299 }
22300 else if (nparts == 4)
22301 {
22302 emit_move_insn (part[0][3], part[1][3]);
22303 emit_move_insn (part[0][2], part[1][2]);
22304 }
22305 }
22306 else
22307 {
22308 /* In 64bit mode we don't have 32bit push available. In case this is
22309 register, it is OK - we will just use larger counterpart. We also
22310 retype memory - these comes from attempt to avoid REX prefix on
22311 moving of second half of TFmode value. */
22312 if (GET_MODE (part[1][1]) == SImode)
22313 {
22314 switch (GET_CODE (part[1][1]))
22315 {
22316 case MEM:
22317 part[1][1] = adjust_address (part[1][1], DImode, 0);
22318 break;
22319
22320 case REG:
22321 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
22322 break;
22323
22324 default:
22325 gcc_unreachable ();
22326 }
22327
22328 if (GET_MODE (part[1][0]) == SImode)
22329 part[1][0] = part[1][1];
22330 }
22331 }
22332 emit_move_insn (part[0][1], part[1][1]);
22333 emit_move_insn (part[0][0], part[1][0]);
22334 return;
22335 }
22336
22337 /* Choose correct order to not overwrite the source before it is copied. */
22338 if ((REG_P (part[0][0])
22339 && REG_P (part[1][1])
22340 && (REGNO (part[0][0]) == REGNO (part[1][1])
22341 || (nparts == 3
22342 && REGNO (part[0][0]) == REGNO (part[1][2]))
22343 || (nparts == 4
22344 && REGNO (part[0][0]) == REGNO (part[1][3]))))
22345 || (collisions > 0
22346 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
22347 {
22348 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
22349 {
22350 operands[2 + i] = part[0][j];
22351 operands[6 + i] = part[1][j];
22352 }
22353 }
22354 else
22355 {
22356 for (i = 0; i < nparts; i++)
22357 {
22358 operands[2 + i] = part[0][i];
22359 operands[6 + i] = part[1][i];
22360 }
22361 }
22362
22363 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
22364 if (optimize_insn_for_size_p ())
22365 {
22366 for (j = 0; j < nparts - 1; j++)
22367 if (CONST_INT_P (operands[6 + j])
22368 && operands[6 + j] != const0_rtx
22369 && REG_P (operands[2 + j]))
22370 for (i = j; i < nparts - 1; i++)
22371 if (CONST_INT_P (operands[7 + i])
22372 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
22373 operands[7 + i] = operands[2 + j];
22374 }
22375
22376 for (i = 0; i < nparts; i++)
22377 emit_move_insn (operands[2 + i], operands[6 + i]);
22378
22379 return;
22380 }
22381
22382 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
22383 left shift by a constant, either using a single shift or
22384 a sequence of add instructions. */
22385
22386 static void
22387 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
22388 {
22389 rtx (*insn)(rtx, rtx, rtx);
22390
22391 if (count == 1
22392 || (count * ix86_cost->add <= ix86_cost->shift_const
22393 && !optimize_insn_for_size_p ()))
22394 {
22395 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
22396 while (count-- > 0)
22397 emit_insn (insn (operand, operand, operand));
22398 }
22399 else
22400 {
22401 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
22402 emit_insn (insn (operand, operand, GEN_INT (count)));
22403 }
22404 }
22405
22406 void
22407 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
22408 {
22409 rtx (*gen_ashl3)(rtx, rtx, rtx);
22410 rtx (*gen_shld)(rtx, rtx, rtx);
22411 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22412
22413 rtx low[2], high[2];
22414 int count;
22415
22416 if (CONST_INT_P (operands[2]))
22417 {
22418 split_double_mode (mode, operands, 2, low, high);
22419 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22420
22421 if (count >= half_width)
22422 {
22423 emit_move_insn (high[0], low[1]);
22424 emit_move_insn (low[0], const0_rtx);
22425
22426 if (count > half_width)
22427 ix86_expand_ashl_const (high[0], count - half_width, mode);
22428 }
22429 else
22430 {
22431 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
22432
22433 if (!rtx_equal_p (operands[0], operands[1]))
22434 emit_move_insn (operands[0], operands[1]);
22435
22436 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
22437 ix86_expand_ashl_const (low[0], count, mode);
22438 }
22439 return;
22440 }
22441
22442 split_double_mode (mode, operands, 1, low, high);
22443
22444 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
22445
22446 if (operands[1] == const1_rtx)
22447 {
22448 /* Assuming we've chosen a QImode capable registers, then 1 << N
22449 can be done with two 32/64-bit shifts, no branches, no cmoves. */
22450 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
22451 {
22452 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
22453
22454 ix86_expand_clear (low[0]);
22455 ix86_expand_clear (high[0]);
22456 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
22457
22458 d = gen_lowpart (QImode, low[0]);
22459 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
22460 s = gen_rtx_EQ (QImode, flags, const0_rtx);
22461 emit_insn (gen_rtx_SET (VOIDmode, d, s));
22462
22463 d = gen_lowpart (QImode, high[0]);
22464 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
22465 s = gen_rtx_NE (QImode, flags, const0_rtx);
22466 emit_insn (gen_rtx_SET (VOIDmode, d, s));
22467 }
22468
22469 /* Otherwise, we can get the same results by manually performing
22470 a bit extract operation on bit 5/6, and then performing the two
22471 shifts. The two methods of getting 0/1 into low/high are exactly
22472 the same size. Avoiding the shift in the bit extract case helps
22473 pentium4 a bit; no one else seems to care much either way. */
22474 else
22475 {
22476 enum machine_mode half_mode;
22477 rtx (*gen_lshr3)(rtx, rtx, rtx);
22478 rtx (*gen_and3)(rtx, rtx, rtx);
22479 rtx (*gen_xor3)(rtx, rtx, rtx);
22480 HOST_WIDE_INT bits;
22481 rtx x;
22482
22483 if (mode == DImode)
22484 {
22485 half_mode = SImode;
22486 gen_lshr3 = gen_lshrsi3;
22487 gen_and3 = gen_andsi3;
22488 gen_xor3 = gen_xorsi3;
22489 bits = 5;
22490 }
22491 else
22492 {
22493 half_mode = DImode;
22494 gen_lshr3 = gen_lshrdi3;
22495 gen_and3 = gen_anddi3;
22496 gen_xor3 = gen_xordi3;
22497 bits = 6;
22498 }
22499
22500 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
22501 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
22502 else
22503 x = gen_lowpart (half_mode, operands[2]);
22504 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
22505
22506 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
22507 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
22508 emit_move_insn (low[0], high[0]);
22509 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
22510 }
22511
22512 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
22513 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
22514 return;
22515 }
22516
22517 if (operands[1] == constm1_rtx)
22518 {
22519 /* For -1 << N, we can avoid the shld instruction, because we
22520 know that we're shifting 0...31/63 ones into a -1. */
22521 emit_move_insn (low[0], constm1_rtx);
22522 if (optimize_insn_for_size_p ())
22523 emit_move_insn (high[0], low[0]);
22524 else
22525 emit_move_insn (high[0], constm1_rtx);
22526 }
22527 else
22528 {
22529 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
22530
22531 if (!rtx_equal_p (operands[0], operands[1]))
22532 emit_move_insn (operands[0], operands[1]);
22533
22534 split_double_mode (mode, operands, 1, low, high);
22535 emit_insn (gen_shld (high[0], low[0], operands[2]));
22536 }
22537
22538 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
22539
22540 if (TARGET_CMOVE && scratch)
22541 {
22542 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22543 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22544
22545 ix86_expand_clear (scratch);
22546 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
22547 }
22548 else
22549 {
22550 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
22551 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
22552
22553 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
22554 }
22555 }
22556
22557 void
22558 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
22559 {
22560 rtx (*gen_ashr3)(rtx, rtx, rtx)
22561 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
22562 rtx (*gen_shrd)(rtx, rtx, rtx);
22563 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22564
22565 rtx low[2], high[2];
22566 int count;
22567
22568 if (CONST_INT_P (operands[2]))
22569 {
22570 split_double_mode (mode, operands, 2, low, high);
22571 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22572
22573 if (count == GET_MODE_BITSIZE (mode) - 1)
22574 {
22575 emit_move_insn (high[0], high[1]);
22576 emit_insn (gen_ashr3 (high[0], high[0],
22577 GEN_INT (half_width - 1)));
22578 emit_move_insn (low[0], high[0]);
22579
22580 }
22581 else if (count >= half_width)
22582 {
22583 emit_move_insn (low[0], high[1]);
22584 emit_move_insn (high[0], low[0]);
22585 emit_insn (gen_ashr3 (high[0], high[0],
22586 GEN_INT (half_width - 1)));
22587
22588 if (count > half_width)
22589 emit_insn (gen_ashr3 (low[0], low[0],
22590 GEN_INT (count - half_width)));
22591 }
22592 else
22593 {
22594 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22595
22596 if (!rtx_equal_p (operands[0], operands[1]))
22597 emit_move_insn (operands[0], operands[1]);
22598
22599 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
22600 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
22601 }
22602 }
22603 else
22604 {
22605 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22606
22607 if (!rtx_equal_p (operands[0], operands[1]))
22608 emit_move_insn (operands[0], operands[1]);
22609
22610 split_double_mode (mode, operands, 1, low, high);
22611
22612 emit_insn (gen_shrd (low[0], high[0], operands[2]));
22613 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
22614
22615 if (TARGET_CMOVE && scratch)
22616 {
22617 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22618 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22619
22620 emit_move_insn (scratch, high[0]);
22621 emit_insn (gen_ashr3 (scratch, scratch,
22622 GEN_INT (half_width - 1)));
22623 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
22624 scratch));
22625 }
22626 else
22627 {
22628 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
22629 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
22630
22631 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
22632 }
22633 }
22634 }
22635
22636 void
22637 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
22638 {
22639 rtx (*gen_lshr3)(rtx, rtx, rtx)
22640 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
22641 rtx (*gen_shrd)(rtx, rtx, rtx);
22642 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22643
22644 rtx low[2], high[2];
22645 int count;
22646
22647 if (CONST_INT_P (operands[2]))
22648 {
22649 split_double_mode (mode, operands, 2, low, high);
22650 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22651
22652 if (count >= half_width)
22653 {
22654 emit_move_insn (low[0], high[1]);
22655 ix86_expand_clear (high[0]);
22656
22657 if (count > half_width)
22658 emit_insn (gen_lshr3 (low[0], low[0],
22659 GEN_INT (count - half_width)));
22660 }
22661 else
22662 {
22663 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22664
22665 if (!rtx_equal_p (operands[0], operands[1]))
22666 emit_move_insn (operands[0], operands[1]);
22667
22668 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
22669 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
22670 }
22671 }
22672 else
22673 {
22674 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22675
22676 if (!rtx_equal_p (operands[0], operands[1]))
22677 emit_move_insn (operands[0], operands[1]);
22678
22679 split_double_mode (mode, operands, 1, low, high);
22680
22681 emit_insn (gen_shrd (low[0], high[0], operands[2]));
22682 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
22683
22684 if (TARGET_CMOVE && scratch)
22685 {
22686 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22687 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22688
22689 ix86_expand_clear (scratch);
22690 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
22691 scratch));
22692 }
22693 else
22694 {
22695 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
22696 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
22697
22698 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
22699 }
22700 }
22701 }
22702
22703 /* Predict just emitted jump instruction to be taken with probability PROB. */
22704 static void
22705 predict_jump (int prob)
22706 {
22707 rtx insn = get_last_insn ();
22708 gcc_assert (JUMP_P (insn));
22709 add_int_reg_note (insn, REG_BR_PROB, prob);
22710 }
22711
22712 /* Helper function for the string operations below. Dest VARIABLE whether
22713 it is aligned to VALUE bytes. If true, jump to the label. */
22714 static rtx
22715 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
22716 {
22717 rtx label = gen_label_rtx ();
22718 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
22719 if (GET_MODE (variable) == DImode)
22720 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
22721 else
22722 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
22723 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
22724 1, label);
22725 if (epilogue)
22726 predict_jump (REG_BR_PROB_BASE * 50 / 100);
22727 else
22728 predict_jump (REG_BR_PROB_BASE * 90 / 100);
22729 return label;
22730 }
22731
22732 /* Adjust COUNTER by the VALUE. */
22733 static void
22734 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
22735 {
22736 rtx (*gen_add)(rtx, rtx, rtx)
22737 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
22738
22739 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
22740 }
22741
22742 /* Zero extend possibly SImode EXP to Pmode register. */
22743 rtx
22744 ix86_zero_extend_to_Pmode (rtx exp)
22745 {
22746 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
22747 }
22748
22749 /* Divide COUNTREG by SCALE. */
22750 static rtx
22751 scale_counter (rtx countreg, int scale)
22752 {
22753 rtx sc;
22754
22755 if (scale == 1)
22756 return countreg;
22757 if (CONST_INT_P (countreg))
22758 return GEN_INT (INTVAL (countreg) / scale);
22759 gcc_assert (REG_P (countreg));
22760
22761 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
22762 GEN_INT (exact_log2 (scale)),
22763 NULL, 1, OPTAB_DIRECT);
22764 return sc;
22765 }
22766
22767 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
22768 DImode for constant loop counts. */
22769
22770 static enum machine_mode
22771 counter_mode (rtx count_exp)
22772 {
22773 if (GET_MODE (count_exp) != VOIDmode)
22774 return GET_MODE (count_exp);
22775 if (!CONST_INT_P (count_exp))
22776 return Pmode;
22777 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
22778 return DImode;
22779 return SImode;
22780 }
22781
22782 /* Copy the address to a Pmode register. This is used for x32 to
22783 truncate DImode TLS address to a SImode register. */
22784
22785 static rtx
22786 ix86_copy_addr_to_reg (rtx addr)
22787 {
22788 if (GET_MODE (addr) == Pmode || GET_MODE (addr) == VOIDmode)
22789 return copy_addr_to_reg (addr);
22790 else
22791 {
22792 gcc_assert (GET_MODE (addr) == DImode && Pmode == SImode);
22793 return gen_rtx_SUBREG (SImode, copy_to_mode_reg (DImode, addr), 0);
22794 }
22795 }
22796
22797 /* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR
22798 to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT
22799 specified in bytes. When ISSETMEM is TRUE, output the equivalent loop to set
22800 memory by VALUE (supposed to be in MODE).
22801
22802 The size is rounded down to whole number of chunk size moved at once.
22803 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
22804
22805
22806 static void
22807 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
22808 rtx destptr, rtx srcptr, rtx value,
22809 rtx count, enum machine_mode mode, int unroll,
22810 int expected_size, bool issetmem)
22811 {
22812 rtx out_label, top_label, iter, tmp;
22813 enum machine_mode iter_mode = counter_mode (count);
22814 int piece_size_n = GET_MODE_SIZE (mode) * unroll;
22815 rtx piece_size = GEN_INT (piece_size_n);
22816 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
22817 rtx size;
22818 int i;
22819
22820 top_label = gen_label_rtx ();
22821 out_label = gen_label_rtx ();
22822 iter = gen_reg_rtx (iter_mode);
22823
22824 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
22825 NULL, 1, OPTAB_DIRECT);
22826 /* Those two should combine. */
22827 if (piece_size == const1_rtx)
22828 {
22829 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
22830 true, out_label);
22831 predict_jump (REG_BR_PROB_BASE * 10 / 100);
22832 }
22833 emit_move_insn (iter, const0_rtx);
22834
22835 emit_label (top_label);
22836
22837 tmp = convert_modes (Pmode, iter_mode, iter, true);
22838
22839 /* This assert could be relaxed - in this case we'll need to compute
22840 smallest power of two, containing in PIECE_SIZE_N and pass it to
22841 offset_address. */
22842 gcc_assert ((piece_size_n & (piece_size_n - 1)) == 0);
22843 destmem = offset_address (destmem, tmp, piece_size_n);
22844 destmem = adjust_address (destmem, mode, 0);
22845
22846 if (!issetmem)
22847 {
22848 srcmem = offset_address (srcmem, copy_rtx (tmp), piece_size_n);
22849 srcmem = adjust_address (srcmem, mode, 0);
22850
22851 /* When unrolling for chips that reorder memory reads and writes,
22852 we can save registers by using single temporary.
22853 Also using 4 temporaries is overkill in 32bit mode. */
22854 if (!TARGET_64BIT && 0)
22855 {
22856 for (i = 0; i < unroll; i++)
22857 {
22858 if (i)
22859 {
22860 destmem =
22861 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
22862 srcmem =
22863 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
22864 }
22865 emit_move_insn (destmem, srcmem);
22866 }
22867 }
22868 else
22869 {
22870 rtx tmpreg[4];
22871 gcc_assert (unroll <= 4);
22872 for (i = 0; i < unroll; i++)
22873 {
22874 tmpreg[i] = gen_reg_rtx (mode);
22875 if (i)
22876 {
22877 srcmem =
22878 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
22879 }
22880 emit_move_insn (tmpreg[i], srcmem);
22881 }
22882 for (i = 0; i < unroll; i++)
22883 {
22884 if (i)
22885 {
22886 destmem =
22887 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
22888 }
22889 emit_move_insn (destmem, tmpreg[i]);
22890 }
22891 }
22892 }
22893 else
22894 for (i = 0; i < unroll; i++)
22895 {
22896 if (i)
22897 destmem =
22898 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
22899 emit_move_insn (destmem, value);
22900 }
22901
22902 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
22903 true, OPTAB_LIB_WIDEN);
22904 if (tmp != iter)
22905 emit_move_insn (iter, tmp);
22906
22907 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
22908 true, top_label);
22909 if (expected_size != -1)
22910 {
22911 expected_size /= GET_MODE_SIZE (mode) * unroll;
22912 if (expected_size == 0)
22913 predict_jump (0);
22914 else if (expected_size > REG_BR_PROB_BASE)
22915 predict_jump (REG_BR_PROB_BASE - 1);
22916 else
22917 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
22918 }
22919 else
22920 predict_jump (REG_BR_PROB_BASE * 80 / 100);
22921 iter = ix86_zero_extend_to_Pmode (iter);
22922 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
22923 true, OPTAB_LIB_WIDEN);
22924 if (tmp != destptr)
22925 emit_move_insn (destptr, tmp);
22926 if (!issetmem)
22927 {
22928 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
22929 true, OPTAB_LIB_WIDEN);
22930 if (tmp != srcptr)
22931 emit_move_insn (srcptr, tmp);
22932 }
22933 emit_label (out_label);
22934 }
22935
22936 /* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument.
22937 When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored.
22938 When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored.
22939 For setmem case, VALUE is a promoted to a wider size ORIG_VALUE.
22940 ORIG_VALUE is the original value passed to memset to fill the memory with.
22941 Other arguments have same meaning as for previous function. */
22942
22943 static void
22944 expand_set_or_movmem_via_rep (rtx destmem, rtx srcmem,
22945 rtx destptr, rtx srcptr, rtx value, rtx orig_value,
22946 rtx count,
22947 enum machine_mode mode, bool issetmem)
22948 {
22949 rtx destexp;
22950 rtx srcexp;
22951 rtx countreg;
22952 HOST_WIDE_INT rounded_count;
22953
22954 /* If possible, it is shorter to use rep movs.
22955 TODO: Maybe it is better to move this logic to decide_alg. */
22956 if (mode == QImode && CONST_INT_P (count) && !(INTVAL (count) & 3)
22957 && (!issetmem || orig_value == const0_rtx))
22958 mode = SImode;
22959
22960 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
22961 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
22962
22963 countreg = ix86_zero_extend_to_Pmode (scale_counter (count,
22964 GET_MODE_SIZE (mode)));
22965 if (mode != QImode)
22966 {
22967 destexp = gen_rtx_ASHIFT (Pmode, countreg,
22968 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
22969 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
22970 }
22971 else
22972 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
22973 if ((!issetmem || orig_value == const0_rtx) && CONST_INT_P (count))
22974 {
22975 rounded_count = (INTVAL (count)
22976 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
22977 destmem = shallow_copy_rtx (destmem);
22978 set_mem_size (destmem, rounded_count);
22979 }
22980 else if (MEM_SIZE_KNOWN_P (destmem))
22981 clear_mem_size (destmem);
22982
22983 if (issetmem)
22984 {
22985 value = force_reg (mode, gen_lowpart (mode, value));
22986 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
22987 }
22988 else
22989 {
22990 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
22991 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
22992 if (mode != QImode)
22993 {
22994 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
22995 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
22996 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
22997 }
22998 else
22999 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
23000 if (CONST_INT_P (count))
23001 {
23002 rounded_count = (INTVAL (count)
23003 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23004 srcmem = shallow_copy_rtx (srcmem);
23005 set_mem_size (srcmem, rounded_count);
23006 }
23007 else
23008 {
23009 if (MEM_SIZE_KNOWN_P (srcmem))
23010 clear_mem_size (srcmem);
23011 }
23012 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
23013 destexp, srcexp));
23014 }
23015 }
23016
23017 /* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to
23018 DESTMEM.
23019 SRC is passed by pointer to be updated on return.
23020 Return value is updated DST. */
23021 static rtx
23022 emit_memmov (rtx destmem, rtx *srcmem, rtx destptr, rtx srcptr,
23023 HOST_WIDE_INT size_to_move)
23024 {
23025 rtx dst = destmem, src = *srcmem, adjust, tempreg;
23026 enum insn_code code;
23027 enum machine_mode move_mode;
23028 int piece_size, i;
23029
23030 /* Find the widest mode in which we could perform moves.
23031 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23032 it until move of such size is supported. */
23033 piece_size = 1 << floor_log2 (size_to_move);
23034 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23035 code = optab_handler (mov_optab, move_mode);
23036 while (code == CODE_FOR_nothing && piece_size > 1)
23037 {
23038 piece_size >>= 1;
23039 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23040 code = optab_handler (mov_optab, move_mode);
23041 }
23042
23043 /* Find the corresponding vector mode with the same size as MOVE_MODE.
23044 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
23045 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
23046 {
23047 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
23048 move_mode = mode_for_vector (word_mode, nunits);
23049 code = optab_handler (mov_optab, move_mode);
23050 if (code == CODE_FOR_nothing)
23051 {
23052 move_mode = word_mode;
23053 piece_size = GET_MODE_SIZE (move_mode);
23054 code = optab_handler (mov_optab, move_mode);
23055 }
23056 }
23057 gcc_assert (code != CODE_FOR_nothing);
23058
23059 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23060 src = adjust_automodify_address_nv (src, move_mode, srcptr, 0);
23061
23062 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23063 gcc_assert (size_to_move % piece_size == 0);
23064 adjust = GEN_INT (piece_size);
23065 for (i = 0; i < size_to_move; i += piece_size)
23066 {
23067 /* We move from memory to memory, so we'll need to do it via
23068 a temporary register. */
23069 tempreg = gen_reg_rtx (move_mode);
23070 emit_insn (GEN_FCN (code) (tempreg, src));
23071 emit_insn (GEN_FCN (code) (dst, tempreg));
23072
23073 emit_move_insn (destptr,
23074 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23075 emit_move_insn (srcptr,
23076 gen_rtx_PLUS (Pmode, copy_rtx (srcptr), adjust));
23077
23078 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23079 piece_size);
23080 src = adjust_automodify_address_nv (src, move_mode, srcptr,
23081 piece_size);
23082 }
23083
23084 /* Update DST and SRC rtx. */
23085 *srcmem = src;
23086 return dst;
23087 }
23088
23089 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
23090 static void
23091 expand_movmem_epilogue (rtx destmem, rtx srcmem,
23092 rtx destptr, rtx srcptr, rtx count, int max_size)
23093 {
23094 rtx src, dest;
23095 if (CONST_INT_P (count))
23096 {
23097 HOST_WIDE_INT countval = INTVAL (count);
23098 HOST_WIDE_INT epilogue_size = countval % max_size;
23099 int i;
23100
23101 /* For now MAX_SIZE should be a power of 2. This assert could be
23102 relaxed, but it'll require a bit more complicated epilogue
23103 expanding. */
23104 gcc_assert ((max_size & (max_size - 1)) == 0);
23105 for (i = max_size; i >= 1; i >>= 1)
23106 {
23107 if (epilogue_size & i)
23108 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23109 }
23110 return;
23111 }
23112 if (max_size > 8)
23113 {
23114 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
23115 count, 1, OPTAB_DIRECT);
23116 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
23117 count, QImode, 1, 4, false);
23118 return;
23119 }
23120
23121 /* When there are stringops, we can cheaply increase dest and src pointers.
23122 Otherwise we save code size by maintaining offset (zero is readily
23123 available from preceding rep operation) and using x86 addressing modes.
23124 */
23125 if (TARGET_SINGLE_STRINGOP)
23126 {
23127 if (max_size > 4)
23128 {
23129 rtx label = ix86_expand_aligntest (count, 4, true);
23130 src = change_address (srcmem, SImode, srcptr);
23131 dest = change_address (destmem, SImode, destptr);
23132 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23133 emit_label (label);
23134 LABEL_NUSES (label) = 1;
23135 }
23136 if (max_size > 2)
23137 {
23138 rtx label = ix86_expand_aligntest (count, 2, true);
23139 src = change_address (srcmem, HImode, srcptr);
23140 dest = change_address (destmem, HImode, destptr);
23141 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23142 emit_label (label);
23143 LABEL_NUSES (label) = 1;
23144 }
23145 if (max_size > 1)
23146 {
23147 rtx label = ix86_expand_aligntest (count, 1, true);
23148 src = change_address (srcmem, QImode, srcptr);
23149 dest = change_address (destmem, QImode, destptr);
23150 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23151 emit_label (label);
23152 LABEL_NUSES (label) = 1;
23153 }
23154 }
23155 else
23156 {
23157 rtx offset = force_reg (Pmode, const0_rtx);
23158 rtx tmp;
23159
23160 if (max_size > 4)
23161 {
23162 rtx label = ix86_expand_aligntest (count, 4, true);
23163 src = change_address (srcmem, SImode, srcptr);
23164 dest = change_address (destmem, SImode, destptr);
23165 emit_move_insn (dest, src);
23166 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
23167 true, OPTAB_LIB_WIDEN);
23168 if (tmp != offset)
23169 emit_move_insn (offset, tmp);
23170 emit_label (label);
23171 LABEL_NUSES (label) = 1;
23172 }
23173 if (max_size > 2)
23174 {
23175 rtx label = ix86_expand_aligntest (count, 2, true);
23176 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23177 src = change_address (srcmem, HImode, tmp);
23178 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23179 dest = change_address (destmem, HImode, tmp);
23180 emit_move_insn (dest, src);
23181 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
23182 true, OPTAB_LIB_WIDEN);
23183 if (tmp != offset)
23184 emit_move_insn (offset, tmp);
23185 emit_label (label);
23186 LABEL_NUSES (label) = 1;
23187 }
23188 if (max_size > 1)
23189 {
23190 rtx label = ix86_expand_aligntest (count, 1, true);
23191 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23192 src = change_address (srcmem, QImode, tmp);
23193 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23194 dest = change_address (destmem, QImode, tmp);
23195 emit_move_insn (dest, src);
23196 emit_label (label);
23197 LABEL_NUSES (label) = 1;
23198 }
23199 }
23200 }
23201
23202 /* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM
23203 with value PROMOTED_VAL.
23204 SRC is passed by pointer to be updated on return.
23205 Return value is updated DST. */
23206 static rtx
23207 emit_memset (rtx destmem, rtx destptr, rtx promoted_val,
23208 HOST_WIDE_INT size_to_move)
23209 {
23210 rtx dst = destmem, adjust;
23211 enum insn_code code;
23212 enum machine_mode move_mode;
23213 int piece_size, i;
23214
23215 /* Find the widest mode in which we could perform moves.
23216 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23217 it until move of such size is supported. */
23218 move_mode = GET_MODE (promoted_val);
23219 if (move_mode == VOIDmode)
23220 move_mode = QImode;
23221 if (size_to_move < GET_MODE_SIZE (move_mode))
23222 {
23223 move_mode = mode_for_size (size_to_move * BITS_PER_UNIT, MODE_INT, 0);
23224 promoted_val = gen_lowpart (move_mode, promoted_val);
23225 }
23226 piece_size = GET_MODE_SIZE (move_mode);
23227 code = optab_handler (mov_optab, move_mode);
23228 gcc_assert (code != CODE_FOR_nothing && promoted_val != NULL_RTX);
23229
23230 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23231
23232 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23233 gcc_assert (size_to_move % piece_size == 0);
23234 adjust = GEN_INT (piece_size);
23235 for (i = 0; i < size_to_move; i += piece_size)
23236 {
23237 if (piece_size <= GET_MODE_SIZE (word_mode))
23238 {
23239 emit_insn (gen_strset (destptr, dst, promoted_val));
23240 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23241 piece_size);
23242 continue;
23243 }
23244
23245 emit_insn (GEN_FCN (code) (dst, promoted_val));
23246
23247 emit_move_insn (destptr,
23248 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23249
23250 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23251 piece_size);
23252 }
23253
23254 /* Update DST rtx. */
23255 return dst;
23256 }
23257 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23258 static void
23259 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
23260 rtx count, int max_size)
23261 {
23262 count =
23263 expand_simple_binop (counter_mode (count), AND, count,
23264 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
23265 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
23266 gen_lowpart (QImode, value), count, QImode,
23267 1, max_size / 2, true);
23268 }
23269
23270 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23271 static void
23272 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value,
23273 rtx count, int max_size)
23274 {
23275 rtx dest;
23276
23277 if (CONST_INT_P (count))
23278 {
23279 HOST_WIDE_INT countval = INTVAL (count);
23280 HOST_WIDE_INT epilogue_size = countval % max_size;
23281 int i;
23282
23283 /* For now MAX_SIZE should be a power of 2. This assert could be
23284 relaxed, but it'll require a bit more complicated epilogue
23285 expanding. */
23286 gcc_assert ((max_size & (max_size - 1)) == 0);
23287 for (i = max_size; i >= 1; i >>= 1)
23288 {
23289 if (epilogue_size & i)
23290 {
23291 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23292 destmem = emit_memset (destmem, destptr, vec_value, i);
23293 else
23294 destmem = emit_memset (destmem, destptr, value, i);
23295 }
23296 }
23297 return;
23298 }
23299 if (max_size > 32)
23300 {
23301 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
23302 return;
23303 }
23304 if (max_size > 16)
23305 {
23306 rtx label = ix86_expand_aligntest (count, 16, true);
23307 if (TARGET_64BIT)
23308 {
23309 dest = change_address (destmem, DImode, destptr);
23310 emit_insn (gen_strset (destptr, dest, value));
23311 dest = adjust_automodify_address_nv (dest, DImode, destptr, 8);
23312 emit_insn (gen_strset (destptr, dest, value));
23313 }
23314 else
23315 {
23316 dest = change_address (destmem, SImode, destptr);
23317 emit_insn (gen_strset (destptr, dest, value));
23318 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23319 emit_insn (gen_strset (destptr, dest, value));
23320 dest = adjust_automodify_address_nv (dest, SImode, destptr, 8);
23321 emit_insn (gen_strset (destptr, dest, value));
23322 dest = adjust_automodify_address_nv (dest, SImode, destptr, 12);
23323 emit_insn (gen_strset (destptr, dest, value));
23324 }
23325 emit_label (label);
23326 LABEL_NUSES (label) = 1;
23327 }
23328 if (max_size > 8)
23329 {
23330 rtx label = ix86_expand_aligntest (count, 8, true);
23331 if (TARGET_64BIT)
23332 {
23333 dest = change_address (destmem, DImode, destptr);
23334 emit_insn (gen_strset (destptr, dest, value));
23335 }
23336 else
23337 {
23338 dest = change_address (destmem, SImode, destptr);
23339 emit_insn (gen_strset (destptr, dest, value));
23340 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23341 emit_insn (gen_strset (destptr, dest, value));
23342 }
23343 emit_label (label);
23344 LABEL_NUSES (label) = 1;
23345 }
23346 if (max_size > 4)
23347 {
23348 rtx label = ix86_expand_aligntest (count, 4, true);
23349 dest = change_address (destmem, SImode, destptr);
23350 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
23351 emit_label (label);
23352 LABEL_NUSES (label) = 1;
23353 }
23354 if (max_size > 2)
23355 {
23356 rtx label = ix86_expand_aligntest (count, 2, true);
23357 dest = change_address (destmem, HImode, destptr);
23358 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
23359 emit_label (label);
23360 LABEL_NUSES (label) = 1;
23361 }
23362 if (max_size > 1)
23363 {
23364 rtx label = ix86_expand_aligntest (count, 1, true);
23365 dest = change_address (destmem, QImode, destptr);
23366 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
23367 emit_label (label);
23368 LABEL_NUSES (label) = 1;
23369 }
23370 }
23371
23372 /* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to
23373 DESTMEM to align it to DESIRED_ALIGNMENT. Original alignment is ALIGN.
23374 Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are
23375 ignored.
23376 Return value is updated DESTMEM. */
23377 static rtx
23378 expand_set_or_movmem_prologue (rtx destmem, rtx srcmem,
23379 rtx destptr, rtx srcptr, rtx value,
23380 rtx vec_value, rtx count, int align,
23381 int desired_alignment, bool issetmem)
23382 {
23383 int i;
23384 for (i = 1; i < desired_alignment; i <<= 1)
23385 {
23386 if (align <= i)
23387 {
23388 rtx label = ix86_expand_aligntest (destptr, i, false);
23389 if (issetmem)
23390 {
23391 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23392 destmem = emit_memset (destmem, destptr, vec_value, i);
23393 else
23394 destmem = emit_memset (destmem, destptr, value, i);
23395 }
23396 else
23397 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23398 ix86_adjust_counter (count, i);
23399 emit_label (label);
23400 LABEL_NUSES (label) = 1;
23401 set_mem_align (destmem, i * 2 * BITS_PER_UNIT);
23402 }
23403 }
23404 return destmem;
23405 }
23406
23407 /* Test if COUNT&SIZE is nonzero and if so, expand movme
23408 or setmem sequence that is valid for SIZE..2*SIZE-1 bytes
23409 and jump to DONE_LABEL. */
23410 static void
23411 expand_small_movmem_or_setmem (rtx destmem, rtx srcmem,
23412 rtx destptr, rtx srcptr,
23413 rtx value, rtx vec_value,
23414 rtx count, int size,
23415 rtx done_label, bool issetmem)
23416 {
23417 rtx label = ix86_expand_aligntest (count, size, false);
23418 enum machine_mode mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 1);
23419 rtx modesize;
23420 int n;
23421
23422 /* If we do not have vector value to copy, we must reduce size. */
23423 if (issetmem)
23424 {
23425 if (!vec_value)
23426 {
23427 if (GET_MODE (value) == VOIDmode && size > 8)
23428 mode = Pmode;
23429 else if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (value)))
23430 mode = GET_MODE (value);
23431 }
23432 else
23433 mode = GET_MODE (vec_value), value = vec_value;
23434 }
23435 else
23436 {
23437 /* Choose appropriate vector mode. */
23438 if (size >= 32)
23439 mode = TARGET_AVX ? V32QImode : TARGET_SSE ? V16QImode : DImode;
23440 else if (size >= 16)
23441 mode = TARGET_SSE ? V16QImode : DImode;
23442 srcmem = change_address (srcmem, mode, srcptr);
23443 }
23444 destmem = change_address (destmem, mode, destptr);
23445 modesize = GEN_INT (GET_MODE_SIZE (mode));
23446 gcc_assert (GET_MODE_SIZE (mode) <= size);
23447 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
23448 {
23449 if (issetmem)
23450 emit_move_insn (destmem, gen_lowpart (mode, value));
23451 else
23452 {
23453 emit_move_insn (destmem, srcmem);
23454 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23455 }
23456 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23457 }
23458
23459 destmem = offset_address (destmem, count, 1);
23460 destmem = offset_address (destmem, GEN_INT (-2 * size),
23461 GET_MODE_SIZE (mode));
23462 if (!issetmem)
23463 {
23464 srcmem = offset_address (srcmem, count, 1);
23465 srcmem = offset_address (srcmem, GEN_INT (-2 * size),
23466 GET_MODE_SIZE (mode));
23467 }
23468 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
23469 {
23470 if (issetmem)
23471 emit_move_insn (destmem, gen_lowpart (mode, value));
23472 else
23473 {
23474 emit_move_insn (destmem, srcmem);
23475 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23476 }
23477 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23478 }
23479 emit_jump_insn (gen_jump (done_label));
23480 emit_barrier ();
23481
23482 emit_label (label);
23483 LABEL_NUSES (label) = 1;
23484 }
23485
23486 /* Handle small memcpy (up to SIZE that is supposed to be small power of 2.
23487 and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN
23488 bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can
23489 proceed with an loop copying SIZE bytes at once. Do moves in MODE.
23490 DONE_LABEL is a label after the whole copying sequence. The label is created
23491 on demand if *DONE_LABEL is NULL.
23492 MIN_SIZE is minimal size of block copied. This value gets adjusted for new
23493 bounds after the initial copies.
23494
23495 DESTMEM/SRCMEM are memory expressions pointing to the copies block,
23496 DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether
23497 we will dispatch to a library call for large blocks.
23498
23499 In pseudocode we do:
23500
23501 if (COUNT < SIZE)
23502 {
23503 Assume that SIZE is 4. Bigger sizes are handled analogously
23504 if (COUNT & 4)
23505 {
23506 copy 4 bytes from SRCPTR to DESTPTR
23507 copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4
23508 goto done_label
23509 }
23510 if (!COUNT)
23511 goto done_label;
23512 copy 1 byte from SRCPTR to DESTPTR
23513 if (COUNT & 2)
23514 {
23515 copy 2 bytes from SRCPTR to DESTPTR
23516 copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2
23517 }
23518 }
23519 else
23520 {
23521 copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR
23522 copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE
23523
23524 OLD_DESPTR = DESTPTR;
23525 Align DESTPTR up to DESIRED_ALIGN
23526 SRCPTR += DESTPTR - OLD_DESTPTR
23527 COUNT -= DEST_PTR - OLD_DESTPTR
23528 if (DYNAMIC_CHECK)
23529 Round COUNT down to multiple of SIZE
23530 << optional caller supplied zero size guard is here >>
23531 << optional caller suppplied dynamic check is here >>
23532 << caller supplied main copy loop is here >>
23533 }
23534 done_label:
23535 */
23536 static void
23537 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves (rtx destmem, rtx srcmem,
23538 rtx *destptr, rtx *srcptr,
23539 enum machine_mode mode,
23540 rtx value, rtx vec_value,
23541 rtx *count,
23542 rtx *done_label,
23543 int size,
23544 int desired_align,
23545 int align,
23546 unsigned HOST_WIDE_INT *min_size,
23547 bool dynamic_check,
23548 bool issetmem)
23549 {
23550 rtx loop_label = NULL, label;
23551 int n;
23552 rtx modesize;
23553 int prolog_size = 0;
23554 rtx mode_value;
23555
23556 /* Chose proper value to copy. */
23557 if (issetmem && VECTOR_MODE_P (mode))
23558 mode_value = vec_value;
23559 else
23560 mode_value = value;
23561 gcc_assert (GET_MODE_SIZE (mode) <= size);
23562
23563 /* See if block is big or small, handle small blocks. */
23564 if (!CONST_INT_P (*count) && *min_size < (unsigned HOST_WIDE_INT)size)
23565 {
23566 int size2 = size;
23567 loop_label = gen_label_rtx ();
23568
23569 if (!*done_label)
23570 *done_label = gen_label_rtx ();
23571
23572 emit_cmp_and_jump_insns (*count, GEN_INT (size2), GE, 0, GET_MODE (*count),
23573 1, loop_label);
23574 size2 >>= 1;
23575
23576 /* Handle sizes > 3. */
23577 for (;size2 > 2; size2 >>= 1)
23578 expand_small_movmem_or_setmem (destmem, srcmem,
23579 *destptr, *srcptr,
23580 value, vec_value,
23581 *count,
23582 size2, *done_label, issetmem);
23583 /* Nothing to copy? Jump to DONE_LABEL if so */
23584 emit_cmp_and_jump_insns (*count, const0_rtx, EQ, 0, GET_MODE (*count),
23585 1, *done_label);
23586
23587 /* Do a byte copy. */
23588 destmem = change_address (destmem, QImode, *destptr);
23589 if (issetmem)
23590 emit_move_insn (destmem, gen_lowpart (QImode, value));
23591 else
23592 {
23593 srcmem = change_address (srcmem, QImode, *srcptr);
23594 emit_move_insn (destmem, srcmem);
23595 }
23596
23597 /* Handle sizes 2 and 3. */
23598 label = ix86_expand_aligntest (*count, 2, false);
23599 destmem = change_address (destmem, HImode, *destptr);
23600 destmem = offset_address (destmem, *count, 1);
23601 destmem = offset_address (destmem, GEN_INT (-2), 2);
23602 if (issetmem)
23603 emit_move_insn (destmem, gen_lowpart (HImode, value));
23604 else
23605 {
23606 srcmem = change_address (srcmem, HImode, *srcptr);
23607 srcmem = offset_address (srcmem, *count, 1);
23608 srcmem = offset_address (srcmem, GEN_INT (-2), 2);
23609 emit_move_insn (destmem, srcmem);
23610 }
23611
23612 emit_label (label);
23613 LABEL_NUSES (label) = 1;
23614 emit_jump_insn (gen_jump (*done_label));
23615 emit_barrier ();
23616 }
23617 else
23618 gcc_assert (*min_size >= (unsigned HOST_WIDE_INT)size
23619 || UINTVAL (*count) >= (unsigned HOST_WIDE_INT)size);
23620
23621 /* Start memcpy for COUNT >= SIZE. */
23622 if (loop_label)
23623 {
23624 emit_label (loop_label);
23625 LABEL_NUSES (loop_label) = 1;
23626 }
23627
23628 /* Copy first desired_align bytes. */
23629 if (!issetmem)
23630 srcmem = change_address (srcmem, mode, *srcptr);
23631 destmem = change_address (destmem, mode, *destptr);
23632 modesize = GEN_INT (GET_MODE_SIZE (mode));
23633 for (n = 0; prolog_size < desired_align - align; n++)
23634 {
23635 if (issetmem)
23636 emit_move_insn (destmem, mode_value);
23637 else
23638 {
23639 emit_move_insn (destmem, srcmem);
23640 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23641 }
23642 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23643 prolog_size += GET_MODE_SIZE (mode);
23644 }
23645
23646
23647 /* Copy last SIZE bytes. */
23648 destmem = offset_address (destmem, *count, 1);
23649 destmem = offset_address (destmem,
23650 GEN_INT (-size - prolog_size),
23651 1);
23652 if (issetmem)
23653 emit_move_insn (destmem, mode_value);
23654 else
23655 {
23656 srcmem = offset_address (srcmem, *count, 1);
23657 srcmem = offset_address (srcmem,
23658 GEN_INT (-size - prolog_size),
23659 1);
23660 emit_move_insn (destmem, srcmem);
23661 }
23662 for (n = 1; n * GET_MODE_SIZE (mode) < size; n++)
23663 {
23664 destmem = offset_address (destmem, modesize, 1);
23665 if (issetmem)
23666 emit_move_insn (destmem, mode_value);
23667 else
23668 {
23669 srcmem = offset_address (srcmem, modesize, 1);
23670 emit_move_insn (destmem, srcmem);
23671 }
23672 }
23673
23674 /* Align destination. */
23675 if (desired_align > 1 && desired_align > align)
23676 {
23677 rtx saveddest = *destptr;
23678
23679 gcc_assert (desired_align <= size);
23680 /* Align destptr up, place it to new register. */
23681 *destptr = expand_simple_binop (GET_MODE (*destptr), PLUS, *destptr,
23682 GEN_INT (prolog_size),
23683 NULL_RTX, 1, OPTAB_DIRECT);
23684 *destptr = expand_simple_binop (GET_MODE (*destptr), AND, *destptr,
23685 GEN_INT (-desired_align),
23686 *destptr, 1, OPTAB_DIRECT);
23687 /* See how many bytes we skipped. */
23688 saveddest = expand_simple_binop (GET_MODE (*destptr), MINUS, saveddest,
23689 *destptr,
23690 saveddest, 1, OPTAB_DIRECT);
23691 /* Adjust srcptr and count. */
23692 if (!issetmem)
23693 *srcptr = expand_simple_binop (GET_MODE (*srcptr), MINUS, *srcptr, saveddest,
23694 *srcptr, 1, OPTAB_DIRECT);
23695 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
23696 saveddest, *count, 1, OPTAB_DIRECT);
23697 /* We copied at most size + prolog_size. */
23698 if (*min_size > (unsigned HOST_WIDE_INT)(size + prolog_size))
23699 *min_size = (*min_size - size) & ~(unsigned HOST_WIDE_INT)(size - 1);
23700 else
23701 *min_size = 0;
23702
23703 /* Our loops always round down the bock size, but for dispatch to library
23704 we need precise value. */
23705 if (dynamic_check)
23706 *count = expand_simple_binop (GET_MODE (*count), AND, *count,
23707 GEN_INT (-size), *count, 1, OPTAB_DIRECT);
23708 }
23709 else
23710 {
23711 gcc_assert (prolog_size == 0);
23712 /* Decrease count, so we won't end up copying last word twice. */
23713 if (!CONST_INT_P (*count))
23714 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
23715 constm1_rtx, *count, 1, OPTAB_DIRECT);
23716 else
23717 *count = GEN_INT ((UINTVAL (*count) - 1) & ~(unsigned HOST_WIDE_INT)(size - 1));
23718 if (*min_size)
23719 *min_size = (*min_size - 1) & ~(unsigned HOST_WIDE_INT)(size - 1);
23720 }
23721 }
23722
23723
23724 /* This function is like the previous one, except here we know how many bytes
23725 need to be copied. That allows us to update alignment not only of DST, which
23726 is returned, but also of SRC, which is passed as a pointer for that
23727 reason. */
23728 static rtx
23729 expand_set_or_movmem_constant_prologue (rtx dst, rtx *srcp, rtx destreg,
23730 rtx srcreg, rtx value, rtx vec_value,
23731 int desired_align, int align_bytes,
23732 bool issetmem)
23733 {
23734 rtx src = NULL;
23735 rtx orig_dst = dst;
23736 rtx orig_src = NULL;
23737 int piece_size = 1;
23738 int copied_bytes = 0;
23739
23740 if (!issetmem)
23741 {
23742 gcc_assert (srcp != NULL);
23743 src = *srcp;
23744 orig_src = src;
23745 }
23746
23747 for (piece_size = 1;
23748 piece_size <= desired_align && copied_bytes < align_bytes;
23749 piece_size <<= 1)
23750 {
23751 if (align_bytes & piece_size)
23752 {
23753 if (issetmem)
23754 {
23755 if (vec_value && piece_size > GET_MODE_SIZE (GET_MODE (value)))
23756 dst = emit_memset (dst, destreg, vec_value, piece_size);
23757 else
23758 dst = emit_memset (dst, destreg, value, piece_size);
23759 }
23760 else
23761 dst = emit_memmov (dst, &src, destreg, srcreg, piece_size);
23762 copied_bytes += piece_size;
23763 }
23764 }
23765 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
23766 set_mem_align (dst, desired_align * BITS_PER_UNIT);
23767 if (MEM_SIZE_KNOWN_P (orig_dst))
23768 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
23769
23770 if (!issetmem)
23771 {
23772 int src_align_bytes = get_mem_align_offset (src, desired_align
23773 * BITS_PER_UNIT);
23774 if (src_align_bytes >= 0)
23775 src_align_bytes = desired_align - src_align_bytes;
23776 if (src_align_bytes >= 0)
23777 {
23778 unsigned int src_align;
23779 for (src_align = desired_align; src_align >= 2; src_align >>= 1)
23780 {
23781 if ((src_align_bytes & (src_align - 1))
23782 == (align_bytes & (src_align - 1)))
23783 break;
23784 }
23785 if (src_align > (unsigned int) desired_align)
23786 src_align = desired_align;
23787 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
23788 set_mem_align (src, src_align * BITS_PER_UNIT);
23789 }
23790 if (MEM_SIZE_KNOWN_P (orig_src))
23791 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
23792 *srcp = src;
23793 }
23794
23795 return dst;
23796 }
23797
23798 /* Return true if ALG can be used in current context.
23799 Assume we expand memset if MEMSET is true. */
23800 static bool
23801 alg_usable_p (enum stringop_alg alg, bool memset)
23802 {
23803 if (alg == no_stringop)
23804 return false;
23805 if (alg == vector_loop)
23806 return TARGET_SSE || TARGET_AVX;
23807 /* Algorithms using the rep prefix want at least edi and ecx;
23808 additionally, memset wants eax and memcpy wants esi. Don't
23809 consider such algorithms if the user has appropriated those
23810 registers for their own purposes. */
23811 if (alg == rep_prefix_1_byte
23812 || alg == rep_prefix_4_byte
23813 || alg == rep_prefix_8_byte)
23814 return !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
23815 || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
23816 return true;
23817 }
23818
23819 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
23820 static enum stringop_alg
23821 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
23822 unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size,
23823 bool memset, bool zero_memset, int *dynamic_check, bool *noalign)
23824 {
23825 const struct stringop_algs * algs;
23826 bool optimize_for_speed;
23827 int max = 0;
23828 const struct processor_costs *cost;
23829 int i;
23830 bool any_alg_usable_p = false;
23831
23832 *noalign = false;
23833 *dynamic_check = -1;
23834
23835 /* Even if the string operation call is cold, we still might spend a lot
23836 of time processing large blocks. */
23837 if (optimize_function_for_size_p (cfun)
23838 || (optimize_insn_for_size_p ()
23839 && (max_size < 256
23840 || (expected_size != -1 && expected_size < 256))))
23841 optimize_for_speed = false;
23842 else
23843 optimize_for_speed = true;
23844
23845 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
23846 if (memset)
23847 algs = &cost->memset[TARGET_64BIT != 0];
23848 else
23849 algs = &cost->memcpy[TARGET_64BIT != 0];
23850
23851 /* See maximal size for user defined algorithm. */
23852 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
23853 {
23854 enum stringop_alg candidate = algs->size[i].alg;
23855 bool usable = alg_usable_p (candidate, memset);
23856 any_alg_usable_p |= usable;
23857
23858 if (candidate != libcall && candidate && usable)
23859 max = algs->size[i].max;
23860 }
23861
23862 /* If expected size is not known but max size is small enough
23863 so inline version is a win, set expected size into
23864 the range. */
23865 if (((max > 1 && (unsigned HOST_WIDE_INT) max >= max_size) || max == -1)
23866 && expected_size == -1)
23867 expected_size = min_size / 2 + max_size / 2;
23868
23869 /* If user specified the algorithm, honnor it if possible. */
23870 if (ix86_stringop_alg != no_stringop
23871 && alg_usable_p (ix86_stringop_alg, memset))
23872 return ix86_stringop_alg;
23873 /* rep; movq or rep; movl is the smallest variant. */
23874 else if (!optimize_for_speed)
23875 {
23876 *noalign = true;
23877 if (!count || (count & 3) || (memset && !zero_memset))
23878 return alg_usable_p (rep_prefix_1_byte, memset)
23879 ? rep_prefix_1_byte : loop_1_byte;
23880 else
23881 return alg_usable_p (rep_prefix_4_byte, memset)
23882 ? rep_prefix_4_byte : loop;
23883 }
23884 /* Very tiny blocks are best handled via the loop, REP is expensive to
23885 setup. */
23886 else if (expected_size != -1 && expected_size < 4)
23887 return loop_1_byte;
23888 else if (expected_size != -1)
23889 {
23890 enum stringop_alg alg = libcall;
23891 bool alg_noalign = false;
23892 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
23893 {
23894 /* We get here if the algorithms that were not libcall-based
23895 were rep-prefix based and we are unable to use rep prefixes
23896 based on global register usage. Break out of the loop and
23897 use the heuristic below. */
23898 if (algs->size[i].max == 0)
23899 break;
23900 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
23901 {
23902 enum stringop_alg candidate = algs->size[i].alg;
23903
23904 if (candidate != libcall && alg_usable_p (candidate, memset))
23905 {
23906 alg = candidate;
23907 alg_noalign = algs->size[i].noalign;
23908 }
23909 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
23910 last non-libcall inline algorithm. */
23911 if (TARGET_INLINE_ALL_STRINGOPS)
23912 {
23913 /* When the current size is best to be copied by a libcall,
23914 but we are still forced to inline, run the heuristic below
23915 that will pick code for medium sized blocks. */
23916 if (alg != libcall)
23917 {
23918 *noalign = alg_noalign;
23919 return alg;
23920 }
23921 break;
23922 }
23923 else if (alg_usable_p (candidate, memset))
23924 {
23925 *noalign = algs->size[i].noalign;
23926 return candidate;
23927 }
23928 }
23929 }
23930 }
23931 /* When asked to inline the call anyway, try to pick meaningful choice.
23932 We look for maximal size of block that is faster to copy by hand and
23933 take blocks of at most of that size guessing that average size will
23934 be roughly half of the block.
23935
23936 If this turns out to be bad, we might simply specify the preferred
23937 choice in ix86_costs. */
23938 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
23939 && (algs->unknown_size == libcall
23940 || !alg_usable_p (algs->unknown_size, memset)))
23941 {
23942 enum stringop_alg alg;
23943
23944 /* If there aren't any usable algorithms, then recursing on
23945 smaller sizes isn't going to find anything. Just return the
23946 simple byte-at-a-time copy loop. */
23947 if (!any_alg_usable_p)
23948 {
23949 /* Pick something reasonable. */
23950 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
23951 *dynamic_check = 128;
23952 return loop_1_byte;
23953 }
23954 if (max <= 0)
23955 max = 4096;
23956 alg = decide_alg (count, max / 2, min_size, max_size, memset,
23957 zero_memset, dynamic_check, noalign);
23958 gcc_assert (*dynamic_check == -1);
23959 gcc_assert (alg != libcall);
23960 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
23961 *dynamic_check = max;
23962 return alg;
23963 }
23964 return (alg_usable_p (algs->unknown_size, memset)
23965 ? algs->unknown_size : libcall);
23966 }
23967
23968 /* Decide on alignment. We know that the operand is already aligned to ALIGN
23969 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
23970 static int
23971 decide_alignment (int align,
23972 enum stringop_alg alg,
23973 int expected_size,
23974 enum machine_mode move_mode)
23975 {
23976 int desired_align = 0;
23977
23978 gcc_assert (alg != no_stringop);
23979
23980 if (alg == libcall)
23981 return 0;
23982 if (move_mode == VOIDmode)
23983 return 0;
23984
23985 desired_align = GET_MODE_SIZE (move_mode);
23986 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
23987 copying whole cacheline at once. */
23988 if (TARGET_PENTIUMPRO
23989 && (alg == rep_prefix_4_byte || alg == rep_prefix_1_byte))
23990 desired_align = 8;
23991
23992 if (optimize_size)
23993 desired_align = 1;
23994 if (desired_align < align)
23995 desired_align = align;
23996 if (expected_size != -1 && expected_size < 4)
23997 desired_align = align;
23998
23999 return desired_align;
24000 }
24001
24002
24003 /* Helper function for memcpy. For QImode value 0xXY produce
24004 0xXYXYXYXY of wide specified by MODE. This is essentially
24005 a * 0x10101010, but we can do slightly better than
24006 synth_mult by unwinding the sequence by hand on CPUs with
24007 slow multiply. */
24008 static rtx
24009 promote_duplicated_reg (enum machine_mode mode, rtx val)
24010 {
24011 enum machine_mode valmode = GET_MODE (val);
24012 rtx tmp;
24013 int nops = mode == DImode ? 3 : 2;
24014
24015 gcc_assert (mode == SImode || mode == DImode || val == const0_rtx);
24016 if (val == const0_rtx)
24017 return copy_to_mode_reg (mode, CONST0_RTX (mode));
24018 if (CONST_INT_P (val))
24019 {
24020 HOST_WIDE_INT v = INTVAL (val) & 255;
24021
24022 v |= v << 8;
24023 v |= v << 16;
24024 if (mode == DImode)
24025 v |= (v << 16) << 16;
24026 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
24027 }
24028
24029 if (valmode == VOIDmode)
24030 valmode = QImode;
24031 if (valmode != QImode)
24032 val = gen_lowpart (QImode, val);
24033 if (mode == QImode)
24034 return val;
24035 if (!TARGET_PARTIAL_REG_STALL)
24036 nops--;
24037 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
24038 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
24039 <= (ix86_cost->shift_const + ix86_cost->add) * nops
24040 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
24041 {
24042 rtx reg = convert_modes (mode, QImode, val, true);
24043 tmp = promote_duplicated_reg (mode, const1_rtx);
24044 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
24045 OPTAB_DIRECT);
24046 }
24047 else
24048 {
24049 rtx reg = convert_modes (mode, QImode, val, true);
24050
24051 if (!TARGET_PARTIAL_REG_STALL)
24052 if (mode == SImode)
24053 emit_insn (gen_movsi_insv_1 (reg, reg));
24054 else
24055 emit_insn (gen_movdi_insv_1 (reg, reg));
24056 else
24057 {
24058 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
24059 NULL, 1, OPTAB_DIRECT);
24060 reg =
24061 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24062 }
24063 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
24064 NULL, 1, OPTAB_DIRECT);
24065 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24066 if (mode == SImode)
24067 return reg;
24068 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
24069 NULL, 1, OPTAB_DIRECT);
24070 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24071 return reg;
24072 }
24073 }
24074
24075 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
24076 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
24077 alignment from ALIGN to DESIRED_ALIGN. */
24078 static rtx
24079 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align,
24080 int align)
24081 {
24082 rtx promoted_val;
24083
24084 if (TARGET_64BIT
24085 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
24086 promoted_val = promote_duplicated_reg (DImode, val);
24087 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
24088 promoted_val = promote_duplicated_reg (SImode, val);
24089 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
24090 promoted_val = promote_duplicated_reg (HImode, val);
24091 else
24092 promoted_val = val;
24093
24094 return promoted_val;
24095 }
24096
24097 /* Expand string move (memcpy) ot store (memset) operation. Use i386 string
24098 operations when profitable. The code depends upon architecture, block size
24099 and alignment, but always has one of the following overall structures:
24100
24101 Aligned move sequence:
24102
24103 1) Prologue guard: Conditional that jumps up to epilogues for small
24104 blocks that can be handled by epilogue alone. This is faster
24105 but also needed for correctness, since prologue assume the block
24106 is larger than the desired alignment.
24107
24108 Optional dynamic check for size and libcall for large
24109 blocks is emitted here too, with -minline-stringops-dynamically.
24110
24111 2) Prologue: copy first few bytes in order to get destination
24112 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
24113 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
24114 copied. We emit either a jump tree on power of two sized
24115 blocks, or a byte loop.
24116
24117 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24118 with specified algorithm.
24119
24120 4) Epilogue: code copying tail of the block that is too small to be
24121 handled by main body (or up to size guarded by prologue guard).
24122
24123 Misaligned move sequence
24124
24125 1) missaligned move prologue/epilogue containing:
24126 a) Prologue handling small memory blocks and jumping to done_label
24127 (skipped if blocks are known to be large enough)
24128 b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is
24129 needed by single possibly misaligned move
24130 (skipped if alignment is not needed)
24131 c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves
24132
24133 2) Zero size guard dispatching to done_label, if needed
24134
24135 3) dispatch to library call, if needed,
24136
24137 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24138 with specified algorithm. */
24139 bool
24140 ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
24141 rtx align_exp, rtx expected_align_exp,
24142 rtx expected_size_exp, rtx min_size_exp,
24143 rtx max_size_exp, rtx probable_max_size_exp,
24144 bool issetmem)
24145 {
24146 rtx destreg;
24147 rtx srcreg = NULL;
24148 rtx label = NULL;
24149 rtx tmp;
24150 rtx jump_around_label = NULL;
24151 HOST_WIDE_INT align = 1;
24152 unsigned HOST_WIDE_INT count = 0;
24153 HOST_WIDE_INT expected_size = -1;
24154 int size_needed = 0, epilogue_size_needed;
24155 int desired_align = 0, align_bytes = 0;
24156 enum stringop_alg alg;
24157 rtx promoted_val = NULL;
24158 rtx vec_promoted_val = NULL;
24159 bool force_loopy_epilogue = false;
24160 int dynamic_check;
24161 bool need_zero_guard = false;
24162 bool noalign;
24163 enum machine_mode move_mode = VOIDmode;
24164 int unroll_factor = 1;
24165 /* TODO: Once value ranges are available, fill in proper data. */
24166 unsigned HOST_WIDE_INT min_size = 0;
24167 unsigned HOST_WIDE_INT max_size = -1;
24168 unsigned HOST_WIDE_INT probable_max_size = -1;
24169 bool misaligned_prologue_used = false;
24170
24171 if (CONST_INT_P (align_exp))
24172 align = INTVAL (align_exp);
24173 /* i386 can do misaligned access on reasonably increased cost. */
24174 if (CONST_INT_P (expected_align_exp)
24175 && INTVAL (expected_align_exp) > align)
24176 align = INTVAL (expected_align_exp);
24177 /* ALIGN is the minimum of destination and source alignment, but we care here
24178 just about destination alignment. */
24179 else if (!issetmem
24180 && MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
24181 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
24182
24183 if (CONST_INT_P (count_exp))
24184 {
24185 min_size = max_size = probable_max_size = count = expected_size
24186 = INTVAL (count_exp);
24187 /* When COUNT is 0, there is nothing to do. */
24188 if (!count)
24189 return true;
24190 }
24191 else
24192 {
24193 if (min_size_exp)
24194 min_size = INTVAL (min_size_exp);
24195 if (max_size_exp)
24196 max_size = INTVAL (max_size_exp);
24197 if (probable_max_size_exp)
24198 probable_max_size = INTVAL (probable_max_size_exp);
24199 if (CONST_INT_P (expected_size_exp))
24200 expected_size = INTVAL (expected_size_exp);
24201 }
24202
24203 /* Make sure we don't need to care about overflow later on. */
24204 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
24205 return false;
24206
24207 /* Step 0: Decide on preferred algorithm, desired alignment and
24208 size of chunks to be copied by main loop. */
24209 alg = decide_alg (count, expected_size, min_size, probable_max_size,
24210 issetmem,
24211 issetmem && val_exp == const0_rtx,
24212 &dynamic_check, &noalign);
24213 if (alg == libcall)
24214 return false;
24215 gcc_assert (alg != no_stringop);
24216
24217 /* For now vector-version of memset is generated only for memory zeroing, as
24218 creating of promoted vector value is very cheap in this case. */
24219 if (issetmem && alg == vector_loop && val_exp != const0_rtx)
24220 alg = unrolled_loop;
24221
24222 if (!count)
24223 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
24224 destreg = ix86_copy_addr_to_reg (XEXP (dst, 0));
24225 if (!issetmem)
24226 srcreg = ix86_copy_addr_to_reg (XEXP (src, 0));
24227
24228 unroll_factor = 1;
24229 move_mode = word_mode;
24230 switch (alg)
24231 {
24232 case libcall:
24233 case no_stringop:
24234 case last_alg:
24235 gcc_unreachable ();
24236 case loop_1_byte:
24237 need_zero_guard = true;
24238 move_mode = QImode;
24239 break;
24240 case loop:
24241 need_zero_guard = true;
24242 break;
24243 case unrolled_loop:
24244 need_zero_guard = true;
24245 unroll_factor = (TARGET_64BIT ? 4 : 2);
24246 break;
24247 case vector_loop:
24248 need_zero_guard = true;
24249 unroll_factor = 4;
24250 /* Find the widest supported mode. */
24251 move_mode = word_mode;
24252 while (optab_handler (mov_optab, GET_MODE_WIDER_MODE (move_mode))
24253 != CODE_FOR_nothing)
24254 move_mode = GET_MODE_WIDER_MODE (move_mode);
24255
24256 /* Find the corresponding vector mode with the same size as MOVE_MODE.
24257 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
24258 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
24259 {
24260 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
24261 move_mode = mode_for_vector (word_mode, nunits);
24262 if (optab_handler (mov_optab, move_mode) == CODE_FOR_nothing)
24263 move_mode = word_mode;
24264 }
24265 gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing);
24266 break;
24267 case rep_prefix_8_byte:
24268 move_mode = DImode;
24269 break;
24270 case rep_prefix_4_byte:
24271 move_mode = SImode;
24272 break;
24273 case rep_prefix_1_byte:
24274 move_mode = QImode;
24275 break;
24276 }
24277 size_needed = GET_MODE_SIZE (move_mode) * unroll_factor;
24278 epilogue_size_needed = size_needed;
24279
24280 desired_align = decide_alignment (align, alg, expected_size, move_mode);
24281 if (!TARGET_ALIGN_STRINGOPS || noalign)
24282 align = desired_align;
24283
24284 /* Step 1: Prologue guard. */
24285
24286 /* Alignment code needs count to be in register. */
24287 if (CONST_INT_P (count_exp) && desired_align > align)
24288 {
24289 if (INTVAL (count_exp) > desired_align
24290 && INTVAL (count_exp) > size_needed)
24291 {
24292 align_bytes
24293 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
24294 if (align_bytes <= 0)
24295 align_bytes = 0;
24296 else
24297 align_bytes = desired_align - align_bytes;
24298 }
24299 if (align_bytes == 0)
24300 count_exp = force_reg (counter_mode (count_exp), count_exp);
24301 }
24302 gcc_assert (desired_align >= 1 && align >= 1);
24303
24304 /* Misaligned move sequences handle both prologue and epilogue at once.
24305 Default code generation results in a smaller code for large alignments
24306 and also avoids redundant job when sizes are known precisely. */
24307 misaligned_prologue_used
24308 = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES
24309 && MAX (desired_align, epilogue_size_needed) <= 32
24310 && desired_align <= epilogue_size_needed
24311 && ((desired_align > align && !align_bytes)
24312 || (!count && epilogue_size_needed > 1)));
24313
24314 /* Do the cheap promotion to allow better CSE across the
24315 main loop and epilogue (ie one load of the big constant in the
24316 front of all code.
24317 For now the misaligned move sequences do not have fast path
24318 without broadcasting. */
24319 if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used)))
24320 {
24321 if (alg == vector_loop)
24322 {
24323 gcc_assert (val_exp == const0_rtx);
24324 vec_promoted_val = promote_duplicated_reg (move_mode, val_exp);
24325 promoted_val = promote_duplicated_reg_to_size (val_exp,
24326 GET_MODE_SIZE (word_mode),
24327 desired_align, align);
24328 }
24329 else
24330 {
24331 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
24332 desired_align, align);
24333 }
24334 }
24335 /* Misaligned move sequences handles both prologues and epilogues at once.
24336 Default code generation results in smaller code for large alignments and
24337 also avoids redundant job when sizes are known precisely. */
24338 if (misaligned_prologue_used)
24339 {
24340 /* Misaligned move prologue handled small blocks by itself. */
24341 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves
24342 (dst, src, &destreg, &srcreg,
24343 move_mode, promoted_val, vec_promoted_val,
24344 &count_exp,
24345 &jump_around_label,
24346 desired_align < align
24347 ? MAX (desired_align, epilogue_size_needed) : epilogue_size_needed,
24348 desired_align, align, &min_size, dynamic_check, issetmem);
24349 if (!issetmem)
24350 src = change_address (src, BLKmode, srcreg);
24351 dst = change_address (dst, BLKmode, destreg);
24352 set_mem_align (dst, desired_align * BITS_PER_UNIT);
24353 epilogue_size_needed = 0;
24354 if (need_zero_guard && !min_size)
24355 {
24356 /* It is possible that we copied enough so the main loop will not
24357 execute. */
24358 gcc_assert (size_needed > 1);
24359 if (jump_around_label == NULL_RTX)
24360 jump_around_label = gen_label_rtx ();
24361 emit_cmp_and_jump_insns (count_exp,
24362 GEN_INT (size_needed),
24363 LTU, 0, counter_mode (count_exp), 1, jump_around_label);
24364 if (expected_size == -1
24365 || expected_size < (desired_align - align) / 2 + size_needed)
24366 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24367 else
24368 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24369 }
24370 }
24371 /* Ensure that alignment prologue won't copy past end of block. */
24372 else if (size_needed > 1 || (desired_align > 1 && desired_align > align))
24373 {
24374 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
24375 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
24376 Make sure it is power of 2. */
24377 epilogue_size_needed = 1 << (floor_log2 (epilogue_size_needed) + 1);
24378
24379 /* To improve performance of small blocks, we jump around the VAL
24380 promoting mode. This mean that if the promoted VAL is not constant,
24381 we might not use it in the epilogue and have to use byte
24382 loop variant. */
24383 if (issetmem && epilogue_size_needed > 2 && !promoted_val)
24384 force_loopy_epilogue = true;
24385 if ((count && count < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24386 || max_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24387 {
24388 /* If main algorithm works on QImode, no epilogue is needed.
24389 For small sizes just don't align anything. */
24390 if (size_needed == 1)
24391 desired_align = align;
24392 else
24393 goto epilogue;
24394 }
24395 else if (!count
24396 && min_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24397 {
24398 label = gen_label_rtx ();
24399 emit_cmp_and_jump_insns (count_exp,
24400 GEN_INT (epilogue_size_needed),
24401 LTU, 0, counter_mode (count_exp), 1, label);
24402 if (expected_size == -1 || expected_size < epilogue_size_needed)
24403 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24404 else
24405 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24406 }
24407 }
24408
24409 /* Emit code to decide on runtime whether library call or inline should be
24410 used. */
24411 if (dynamic_check != -1)
24412 {
24413 if (!issetmem && CONST_INT_P (count_exp))
24414 {
24415 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
24416 {
24417 emit_block_move_via_libcall (dst, src, count_exp, false);
24418 count_exp = const0_rtx;
24419 goto epilogue;
24420 }
24421 }
24422 else
24423 {
24424 rtx hot_label = gen_label_rtx ();
24425 if (jump_around_label == NULL_RTX)
24426 jump_around_label = gen_label_rtx ();
24427 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
24428 LEU, 0, counter_mode (count_exp),
24429 1, hot_label);
24430 predict_jump (REG_BR_PROB_BASE * 90 / 100);
24431 if (issetmem)
24432 set_storage_via_libcall (dst, count_exp, val_exp, false);
24433 else
24434 emit_block_move_via_libcall (dst, src, count_exp, false);
24435 emit_jump (jump_around_label);
24436 emit_label (hot_label);
24437 }
24438 }
24439
24440 /* Step 2: Alignment prologue. */
24441 /* Do the expensive promotion once we branched off the small blocks. */
24442 if (issetmem && !promoted_val)
24443 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
24444 desired_align, align);
24445
24446 if (desired_align > align && !misaligned_prologue_used)
24447 {
24448 if (align_bytes == 0)
24449 {
24450 /* Except for the first move in prologue, we no longer know
24451 constant offset in aliasing info. It don't seems to worth
24452 the pain to maintain it for the first move, so throw away
24453 the info early. */
24454 dst = change_address (dst, BLKmode, destreg);
24455 if (!issetmem)
24456 src = change_address (src, BLKmode, srcreg);
24457 dst = expand_set_or_movmem_prologue (dst, src, destreg, srcreg,
24458 promoted_val, vec_promoted_val,
24459 count_exp, align, desired_align,
24460 issetmem);
24461 /* At most desired_align - align bytes are copied. */
24462 if (min_size < (unsigned)(desired_align - align))
24463 min_size = 0;
24464 else
24465 min_size -= desired_align - align;
24466 }
24467 else
24468 {
24469 /* If we know how many bytes need to be stored before dst is
24470 sufficiently aligned, maintain aliasing info accurately. */
24471 dst = expand_set_or_movmem_constant_prologue (dst, &src, destreg,
24472 srcreg,
24473 promoted_val,
24474 vec_promoted_val,
24475 desired_align,
24476 align_bytes,
24477 issetmem);
24478
24479 count_exp = plus_constant (counter_mode (count_exp),
24480 count_exp, -align_bytes);
24481 count -= align_bytes;
24482 min_size -= align_bytes;
24483 max_size -= align_bytes;
24484 }
24485 if (need_zero_guard
24486 && !min_size
24487 && (count < (unsigned HOST_WIDE_INT) size_needed
24488 || (align_bytes == 0
24489 && count < ((unsigned HOST_WIDE_INT) size_needed
24490 + desired_align - align))))
24491 {
24492 /* It is possible that we copied enough so the main loop will not
24493 execute. */
24494 gcc_assert (size_needed > 1);
24495 if (label == NULL_RTX)
24496 label = gen_label_rtx ();
24497 emit_cmp_and_jump_insns (count_exp,
24498 GEN_INT (size_needed),
24499 LTU, 0, counter_mode (count_exp), 1, label);
24500 if (expected_size == -1
24501 || expected_size < (desired_align - align) / 2 + size_needed)
24502 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24503 else
24504 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24505 }
24506 }
24507 if (label && size_needed == 1)
24508 {
24509 emit_label (label);
24510 LABEL_NUSES (label) = 1;
24511 label = NULL;
24512 epilogue_size_needed = 1;
24513 if (issetmem)
24514 promoted_val = val_exp;
24515 }
24516 else if (label == NULL_RTX && !misaligned_prologue_used)
24517 epilogue_size_needed = size_needed;
24518
24519 /* Step 3: Main loop. */
24520
24521 switch (alg)
24522 {
24523 case libcall:
24524 case no_stringop:
24525 case last_alg:
24526 gcc_unreachable ();
24527 case loop_1_byte:
24528 case loop:
24529 case unrolled_loop:
24530 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, promoted_val,
24531 count_exp, move_mode, unroll_factor,
24532 expected_size, issetmem);
24533 break;
24534 case vector_loop:
24535 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg,
24536 vec_promoted_val, count_exp, move_mode,
24537 unroll_factor, expected_size, issetmem);
24538 break;
24539 case rep_prefix_8_byte:
24540 case rep_prefix_4_byte:
24541 case rep_prefix_1_byte:
24542 expand_set_or_movmem_via_rep (dst, src, destreg, srcreg, promoted_val,
24543 val_exp, count_exp, move_mode, issetmem);
24544 break;
24545 }
24546 /* Adjust properly the offset of src and dest memory for aliasing. */
24547 if (CONST_INT_P (count_exp))
24548 {
24549 if (!issetmem)
24550 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
24551 (count / size_needed) * size_needed);
24552 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
24553 (count / size_needed) * size_needed);
24554 }
24555 else
24556 {
24557 if (!issetmem)
24558 src = change_address (src, BLKmode, srcreg);
24559 dst = change_address (dst, BLKmode, destreg);
24560 }
24561
24562 /* Step 4: Epilogue to copy the remaining bytes. */
24563 epilogue:
24564 if (label)
24565 {
24566 /* When the main loop is done, COUNT_EXP might hold original count,
24567 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
24568 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
24569 bytes. Compensate if needed. */
24570
24571 if (size_needed < epilogue_size_needed)
24572 {
24573 tmp =
24574 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
24575 GEN_INT (size_needed - 1), count_exp, 1,
24576 OPTAB_DIRECT);
24577 if (tmp != count_exp)
24578 emit_move_insn (count_exp, tmp);
24579 }
24580 emit_label (label);
24581 LABEL_NUSES (label) = 1;
24582 }
24583
24584 if (count_exp != const0_rtx && epilogue_size_needed > 1)
24585 {
24586 if (force_loopy_epilogue)
24587 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
24588 epilogue_size_needed);
24589 else
24590 {
24591 if (issetmem)
24592 expand_setmem_epilogue (dst, destreg, promoted_val,
24593 vec_promoted_val, count_exp,
24594 epilogue_size_needed);
24595 else
24596 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
24597 epilogue_size_needed);
24598 }
24599 }
24600 if (jump_around_label)
24601 emit_label (jump_around_label);
24602 return true;
24603 }
24604
24605
24606 /* Expand the appropriate insns for doing strlen if not just doing
24607 repnz; scasb
24608
24609 out = result, initialized with the start address
24610 align_rtx = alignment of the address.
24611 scratch = scratch register, initialized with the startaddress when
24612 not aligned, otherwise undefined
24613
24614 This is just the body. It needs the initializations mentioned above and
24615 some address computing at the end. These things are done in i386.md. */
24616
24617 static void
24618 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
24619 {
24620 int align;
24621 rtx tmp;
24622 rtx align_2_label = NULL_RTX;
24623 rtx align_3_label = NULL_RTX;
24624 rtx align_4_label = gen_label_rtx ();
24625 rtx end_0_label = gen_label_rtx ();
24626 rtx mem;
24627 rtx tmpreg = gen_reg_rtx (SImode);
24628 rtx scratch = gen_reg_rtx (SImode);
24629 rtx cmp;
24630
24631 align = 0;
24632 if (CONST_INT_P (align_rtx))
24633 align = INTVAL (align_rtx);
24634
24635 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
24636
24637 /* Is there a known alignment and is it less than 4? */
24638 if (align < 4)
24639 {
24640 rtx scratch1 = gen_reg_rtx (Pmode);
24641 emit_move_insn (scratch1, out);
24642 /* Is there a known alignment and is it not 2? */
24643 if (align != 2)
24644 {
24645 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
24646 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
24647
24648 /* Leave just the 3 lower bits. */
24649 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
24650 NULL_RTX, 0, OPTAB_WIDEN);
24651
24652 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
24653 Pmode, 1, align_4_label);
24654 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
24655 Pmode, 1, align_2_label);
24656 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
24657 Pmode, 1, align_3_label);
24658 }
24659 else
24660 {
24661 /* Since the alignment is 2, we have to check 2 or 0 bytes;
24662 check if is aligned to 4 - byte. */
24663
24664 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
24665 NULL_RTX, 0, OPTAB_WIDEN);
24666
24667 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
24668 Pmode, 1, align_4_label);
24669 }
24670
24671 mem = change_address (src, QImode, out);
24672
24673 /* Now compare the bytes. */
24674
24675 /* Compare the first n unaligned byte on a byte per byte basis. */
24676 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
24677 QImode, 1, end_0_label);
24678
24679 /* Increment the address. */
24680 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
24681
24682 /* Not needed with an alignment of 2 */
24683 if (align != 2)
24684 {
24685 emit_label (align_2_label);
24686
24687 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
24688 end_0_label);
24689
24690 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
24691
24692 emit_label (align_3_label);
24693 }
24694
24695 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
24696 end_0_label);
24697
24698 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
24699 }
24700
24701 /* Generate loop to check 4 bytes at a time. It is not a good idea to
24702 align this loop. It gives only huge programs, but does not help to
24703 speed up. */
24704 emit_label (align_4_label);
24705
24706 mem = change_address (src, SImode, out);
24707 emit_move_insn (scratch, mem);
24708 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
24709
24710 /* This formula yields a nonzero result iff one of the bytes is zero.
24711 This saves three branches inside loop and many cycles. */
24712
24713 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
24714 emit_insn (gen_one_cmplsi2 (scratch, scratch));
24715 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
24716 emit_insn (gen_andsi3 (tmpreg, tmpreg,
24717 gen_int_mode (0x80808080, SImode)));
24718 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
24719 align_4_label);
24720
24721 if (TARGET_CMOVE)
24722 {
24723 rtx reg = gen_reg_rtx (SImode);
24724 rtx reg2 = gen_reg_rtx (Pmode);
24725 emit_move_insn (reg, tmpreg);
24726 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
24727
24728 /* If zero is not in the first two bytes, move two bytes forward. */
24729 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
24730 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24731 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
24732 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
24733 gen_rtx_IF_THEN_ELSE (SImode, tmp,
24734 reg,
24735 tmpreg)));
24736 /* Emit lea manually to avoid clobbering of flags. */
24737 emit_insn (gen_rtx_SET (SImode, reg2,
24738 gen_rtx_PLUS (Pmode, out, const2_rtx)));
24739
24740 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24741 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
24742 emit_insn (gen_rtx_SET (VOIDmode, out,
24743 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
24744 reg2,
24745 out)));
24746 }
24747 else
24748 {
24749 rtx end_2_label = gen_label_rtx ();
24750 /* Is zero in the first two bytes? */
24751
24752 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
24753 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24754 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
24755 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
24756 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
24757 pc_rtx);
24758 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
24759 JUMP_LABEL (tmp) = end_2_label;
24760
24761 /* Not in the first two. Move two bytes forward. */
24762 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
24763 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
24764
24765 emit_label (end_2_label);
24766
24767 }
24768
24769 /* Avoid branch in fixing the byte. */
24770 tmpreg = gen_lowpart (QImode, tmpreg);
24771 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
24772 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
24773 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
24774 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
24775
24776 emit_label (end_0_label);
24777 }
24778
24779 /* Expand strlen. */
24780
24781 bool
24782 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
24783 {
24784 rtx addr, scratch1, scratch2, scratch3, scratch4;
24785
24786 /* The generic case of strlen expander is long. Avoid it's
24787 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
24788
24789 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
24790 && !TARGET_INLINE_ALL_STRINGOPS
24791 && !optimize_insn_for_size_p ()
24792 && (!CONST_INT_P (align) || INTVAL (align) < 4))
24793 return false;
24794
24795 addr = force_reg (Pmode, XEXP (src, 0));
24796 scratch1 = gen_reg_rtx (Pmode);
24797
24798 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
24799 && !optimize_insn_for_size_p ())
24800 {
24801 /* Well it seems that some optimizer does not combine a call like
24802 foo(strlen(bar), strlen(bar));
24803 when the move and the subtraction is done here. It does calculate
24804 the length just once when these instructions are done inside of
24805 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
24806 often used and I use one fewer register for the lifetime of
24807 output_strlen_unroll() this is better. */
24808
24809 emit_move_insn (out, addr);
24810
24811 ix86_expand_strlensi_unroll_1 (out, src, align);
24812
24813 /* strlensi_unroll_1 returns the address of the zero at the end of
24814 the string, like memchr(), so compute the length by subtracting
24815 the start address. */
24816 emit_insn (ix86_gen_sub3 (out, out, addr));
24817 }
24818 else
24819 {
24820 rtx unspec;
24821
24822 /* Can't use this if the user has appropriated eax, ecx, or edi. */
24823 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
24824 return false;
24825
24826 scratch2 = gen_reg_rtx (Pmode);
24827 scratch3 = gen_reg_rtx (Pmode);
24828 scratch4 = force_reg (Pmode, constm1_rtx);
24829
24830 emit_move_insn (scratch3, addr);
24831 eoschar = force_reg (QImode, eoschar);
24832
24833 src = replace_equiv_address_nv (src, scratch3);
24834
24835 /* If .md starts supporting :P, this can be done in .md. */
24836 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
24837 scratch4), UNSPEC_SCAS);
24838 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
24839 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
24840 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
24841 }
24842 return true;
24843 }
24844
24845 /* For given symbol (function) construct code to compute address of it's PLT
24846 entry in large x86-64 PIC model. */
24847 static rtx
24848 construct_plt_address (rtx symbol)
24849 {
24850 rtx tmp, unspec;
24851
24852 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
24853 gcc_assert (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF);
24854 gcc_assert (Pmode == DImode);
24855
24856 tmp = gen_reg_rtx (Pmode);
24857 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
24858
24859 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
24860 emit_insn (ix86_gen_add3 (tmp, tmp, pic_offset_table_rtx));
24861 return tmp;
24862 }
24863
24864 rtx
24865 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
24866 rtx callarg2,
24867 rtx pop, bool sibcall)
24868 {
24869 unsigned int const cregs_size
24870 = ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers);
24871 rtx vec[3 + cregs_size];
24872 rtx use = NULL, call;
24873 unsigned int vec_len = 0;
24874
24875 if (pop == const0_rtx)
24876 pop = NULL;
24877 gcc_assert (!TARGET_64BIT || !pop);
24878
24879 if (TARGET_MACHO && !TARGET_64BIT)
24880 {
24881 #if TARGET_MACHO
24882 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
24883 fnaddr = machopic_indirect_call_target (fnaddr);
24884 #endif
24885 }
24886 else
24887 {
24888 /* Static functions and indirect calls don't need the pic register. */
24889 if (flag_pic
24890 && (!TARGET_64BIT
24891 || (ix86_cmodel == CM_LARGE_PIC
24892 && DEFAULT_ABI != MS_ABI))
24893 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
24894 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
24895 use_reg (&use, pic_offset_table_rtx);
24896 }
24897
24898 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
24899 {
24900 rtx al = gen_rtx_REG (QImode, AX_REG);
24901 emit_move_insn (al, callarg2);
24902 use_reg (&use, al);
24903 }
24904
24905 if (ix86_cmodel == CM_LARGE_PIC
24906 && !TARGET_PECOFF
24907 && MEM_P (fnaddr)
24908 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
24909 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
24910 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
24911 else if (sibcall
24912 ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
24913 : !call_insn_operand (XEXP (fnaddr, 0), word_mode))
24914 {
24915 fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
24916 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));
24917 }
24918
24919 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
24920 if (retval)
24921 call = gen_rtx_SET (VOIDmode, retval, call);
24922 vec[vec_len++] = call;
24923
24924 if (pop)
24925 {
24926 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
24927 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
24928 vec[vec_len++] = pop;
24929 }
24930
24931 if (TARGET_64BIT_MS_ABI
24932 && (!callarg2 || INTVAL (callarg2) != -2))
24933 {
24934 unsigned i;
24935
24936 vec[vec_len++] = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
24937 UNSPEC_MS_TO_SYSV_CALL);
24938
24939 for (i = 0; i < cregs_size; i++)
24940 {
24941 int regno = x86_64_ms_sysv_extra_clobbered_registers[i];
24942 enum machine_mode mode = SSE_REGNO_P (regno) ? TImode : DImode;
24943
24944 vec[vec_len++]
24945 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (mode, regno));
24946 }
24947 }
24948
24949 if (vec_len > 1)
24950 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
24951 call = emit_call_insn (call);
24952 if (use)
24953 CALL_INSN_FUNCTION_USAGE (call) = use;
24954
24955 return call;
24956 }
24957
24958 /* Output the assembly for a call instruction. */
24959
24960 const char *
24961 ix86_output_call_insn (rtx insn, rtx call_op)
24962 {
24963 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
24964 bool seh_nop_p = false;
24965 const char *xasm;
24966
24967 if (SIBLING_CALL_P (insn))
24968 {
24969 if (direct_p)
24970 xasm = "jmp\t%P0";
24971 /* SEH epilogue detection requires the indirect branch case
24972 to include REX.W. */
24973 else if (TARGET_SEH)
24974 xasm = "rex.W jmp %A0";
24975 else
24976 xasm = "jmp\t%A0";
24977
24978 output_asm_insn (xasm, &call_op);
24979 return "";
24980 }
24981
24982 /* SEH unwinding can require an extra nop to be emitted in several
24983 circumstances. Determine if we have one of those. */
24984 if (TARGET_SEH)
24985 {
24986 rtx i;
24987
24988 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
24989 {
24990 /* If we get to another real insn, we don't need the nop. */
24991 if (INSN_P (i))
24992 break;
24993
24994 /* If we get to the epilogue note, prevent a catch region from
24995 being adjacent to the standard epilogue sequence. If non-
24996 call-exceptions, we'll have done this during epilogue emission. */
24997 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
24998 && !flag_non_call_exceptions
24999 && !can_throw_internal (insn))
25000 {
25001 seh_nop_p = true;
25002 break;
25003 }
25004 }
25005
25006 /* If we didn't find a real insn following the call, prevent the
25007 unwinder from looking into the next function. */
25008 if (i == NULL)
25009 seh_nop_p = true;
25010 }
25011
25012 if (direct_p)
25013 xasm = "call\t%P0";
25014 else
25015 xasm = "call\t%A0";
25016
25017 output_asm_insn (xasm, &call_op);
25018
25019 if (seh_nop_p)
25020 return "nop";
25021
25022 return "";
25023 }
25024 \f
25025 /* Clear stack slot assignments remembered from previous functions.
25026 This is called from INIT_EXPANDERS once before RTL is emitted for each
25027 function. */
25028
25029 static struct machine_function *
25030 ix86_init_machine_status (void)
25031 {
25032 struct machine_function *f;
25033
25034 f = ggc_cleared_alloc<machine_function> ();
25035 f->use_fast_prologue_epilogue_nregs = -1;
25036 f->call_abi = ix86_abi;
25037
25038 return f;
25039 }
25040
25041 /* Return a MEM corresponding to a stack slot with mode MODE.
25042 Allocate a new slot if necessary.
25043
25044 The RTL for a function can have several slots available: N is
25045 which slot to use. */
25046
25047 rtx
25048 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
25049 {
25050 struct stack_local_entry *s;
25051
25052 gcc_assert (n < MAX_386_STACK_LOCALS);
25053
25054 for (s = ix86_stack_locals; s; s = s->next)
25055 if (s->mode == mode && s->n == n)
25056 return validize_mem (copy_rtx (s->rtl));
25057
25058 s = ggc_alloc<stack_local_entry> ();
25059 s->n = n;
25060 s->mode = mode;
25061 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
25062
25063 s->next = ix86_stack_locals;
25064 ix86_stack_locals = s;
25065 return validize_mem (copy_rtx (s->rtl));
25066 }
25067
25068 static void
25069 ix86_instantiate_decls (void)
25070 {
25071 struct stack_local_entry *s;
25072
25073 for (s = ix86_stack_locals; s; s = s->next)
25074 if (s->rtl != NULL_RTX)
25075 instantiate_decl_rtl (s->rtl);
25076 }
25077 \f
25078 /* Check whether x86 address PARTS is a pc-relative address. */
25079
25080 static bool
25081 rip_relative_addr_p (struct ix86_address *parts)
25082 {
25083 rtx base, index, disp;
25084
25085 base = parts->base;
25086 index = parts->index;
25087 disp = parts->disp;
25088
25089 if (disp && !base && !index)
25090 {
25091 if (TARGET_64BIT)
25092 {
25093 rtx symbol = disp;
25094
25095 if (GET_CODE (disp) == CONST)
25096 symbol = XEXP (disp, 0);
25097 if (GET_CODE (symbol) == PLUS
25098 && CONST_INT_P (XEXP (symbol, 1)))
25099 symbol = XEXP (symbol, 0);
25100
25101 if (GET_CODE (symbol) == LABEL_REF
25102 || (GET_CODE (symbol) == SYMBOL_REF
25103 && SYMBOL_REF_TLS_MODEL (symbol) == 0)
25104 || (GET_CODE (symbol) == UNSPEC
25105 && (XINT (symbol, 1) == UNSPEC_GOTPCREL
25106 || XINT (symbol, 1) == UNSPEC_PCREL
25107 || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
25108 return true;
25109 }
25110 }
25111 return false;
25112 }
25113
25114 /* Calculate the length of the memory address in the instruction encoding.
25115 Includes addr32 prefix, does not include the one-byte modrm, opcode,
25116 or other prefixes. We never generate addr32 prefix for LEA insn. */
25117
25118 int
25119 memory_address_length (rtx addr, bool lea)
25120 {
25121 struct ix86_address parts;
25122 rtx base, index, disp;
25123 int len;
25124 int ok;
25125
25126 if (GET_CODE (addr) == PRE_DEC
25127 || GET_CODE (addr) == POST_INC
25128 || GET_CODE (addr) == PRE_MODIFY
25129 || GET_CODE (addr) == POST_MODIFY)
25130 return 0;
25131
25132 ok = ix86_decompose_address (addr, &parts);
25133 gcc_assert (ok);
25134
25135 len = (parts.seg == SEG_DEFAULT) ? 0 : 1;
25136
25137 /* If this is not LEA instruction, add the length of addr32 prefix. */
25138 if (TARGET_64BIT && !lea
25139 && (SImode_address_operand (addr, VOIDmode)
25140 || (parts.base && GET_MODE (parts.base) == SImode)
25141 || (parts.index && GET_MODE (parts.index) == SImode)))
25142 len++;
25143
25144 base = parts.base;
25145 index = parts.index;
25146 disp = parts.disp;
25147
25148 if (base && GET_CODE (base) == SUBREG)
25149 base = SUBREG_REG (base);
25150 if (index && GET_CODE (index) == SUBREG)
25151 index = SUBREG_REG (index);
25152
25153 gcc_assert (base == NULL_RTX || REG_P (base));
25154 gcc_assert (index == NULL_RTX || REG_P (index));
25155
25156 /* Rule of thumb:
25157 - esp as the base always wants an index,
25158 - ebp as the base always wants a displacement,
25159 - r12 as the base always wants an index,
25160 - r13 as the base always wants a displacement. */
25161
25162 /* Register Indirect. */
25163 if (base && !index && !disp)
25164 {
25165 /* esp (for its index) and ebp (for its displacement) need
25166 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
25167 code. */
25168 if (base == arg_pointer_rtx
25169 || base == frame_pointer_rtx
25170 || REGNO (base) == SP_REG
25171 || REGNO (base) == BP_REG
25172 || REGNO (base) == R12_REG
25173 || REGNO (base) == R13_REG)
25174 len++;
25175 }
25176
25177 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
25178 is not disp32, but disp32(%rip), so for disp32
25179 SIB byte is needed, unless print_operand_address
25180 optimizes it into disp32(%rip) or (%rip) is implied
25181 by UNSPEC. */
25182 else if (disp && !base && !index)
25183 {
25184 len += 4;
25185 if (rip_relative_addr_p (&parts))
25186 len++;
25187 }
25188 else
25189 {
25190 /* Find the length of the displacement constant. */
25191 if (disp)
25192 {
25193 if (base && satisfies_constraint_K (disp))
25194 len += 1;
25195 else
25196 len += 4;
25197 }
25198 /* ebp always wants a displacement. Similarly r13. */
25199 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
25200 len++;
25201
25202 /* An index requires the two-byte modrm form.... */
25203 if (index
25204 /* ...like esp (or r12), which always wants an index. */
25205 || base == arg_pointer_rtx
25206 || base == frame_pointer_rtx
25207 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
25208 len++;
25209 }
25210
25211 return len;
25212 }
25213
25214 /* Compute default value for "length_immediate" attribute. When SHORTFORM
25215 is set, expect that insn have 8bit immediate alternative. */
25216 int
25217 ix86_attr_length_immediate_default (rtx insn, bool shortform)
25218 {
25219 int len = 0;
25220 int i;
25221 extract_insn_cached (insn);
25222 for (i = recog_data.n_operands - 1; i >= 0; --i)
25223 if (CONSTANT_P (recog_data.operand[i]))
25224 {
25225 enum attr_mode mode = get_attr_mode (insn);
25226
25227 gcc_assert (!len);
25228 if (shortform && CONST_INT_P (recog_data.operand[i]))
25229 {
25230 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
25231 switch (mode)
25232 {
25233 case MODE_QI:
25234 len = 1;
25235 continue;
25236 case MODE_HI:
25237 ival = trunc_int_for_mode (ival, HImode);
25238 break;
25239 case MODE_SI:
25240 ival = trunc_int_for_mode (ival, SImode);
25241 break;
25242 default:
25243 break;
25244 }
25245 if (IN_RANGE (ival, -128, 127))
25246 {
25247 len = 1;
25248 continue;
25249 }
25250 }
25251 switch (mode)
25252 {
25253 case MODE_QI:
25254 len = 1;
25255 break;
25256 case MODE_HI:
25257 len = 2;
25258 break;
25259 case MODE_SI:
25260 len = 4;
25261 break;
25262 /* Immediates for DImode instructions are encoded
25263 as 32bit sign extended values. */
25264 case MODE_DI:
25265 len = 4;
25266 break;
25267 default:
25268 fatal_insn ("unknown insn mode", insn);
25269 }
25270 }
25271 return len;
25272 }
25273
25274 /* Compute default value for "length_address" attribute. */
25275 int
25276 ix86_attr_length_address_default (rtx insn)
25277 {
25278 int i;
25279
25280 if (get_attr_type (insn) == TYPE_LEA)
25281 {
25282 rtx set = PATTERN (insn), addr;
25283
25284 if (GET_CODE (set) == PARALLEL)
25285 set = XVECEXP (set, 0, 0);
25286
25287 gcc_assert (GET_CODE (set) == SET);
25288
25289 addr = SET_SRC (set);
25290
25291 return memory_address_length (addr, true);
25292 }
25293
25294 extract_insn_cached (insn);
25295 for (i = recog_data.n_operands - 1; i >= 0; --i)
25296 if (MEM_P (recog_data.operand[i]))
25297 {
25298 constrain_operands_cached (reload_completed);
25299 if (which_alternative != -1)
25300 {
25301 const char *constraints = recog_data.constraints[i];
25302 int alt = which_alternative;
25303
25304 while (*constraints == '=' || *constraints == '+')
25305 constraints++;
25306 while (alt-- > 0)
25307 while (*constraints++ != ',')
25308 ;
25309 /* Skip ignored operands. */
25310 if (*constraints == 'X')
25311 continue;
25312 }
25313 return memory_address_length (XEXP (recog_data.operand[i], 0), false);
25314 }
25315 return 0;
25316 }
25317
25318 /* Compute default value for "length_vex" attribute. It includes
25319 2 or 3 byte VEX prefix and 1 opcode byte. */
25320
25321 int
25322 ix86_attr_length_vex_default (rtx insn, bool has_0f_opcode, bool has_vex_w)
25323 {
25324 int i;
25325
25326 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
25327 byte VEX prefix. */
25328 if (!has_0f_opcode || has_vex_w)
25329 return 3 + 1;
25330
25331 /* We can always use 2 byte VEX prefix in 32bit. */
25332 if (!TARGET_64BIT)
25333 return 2 + 1;
25334
25335 extract_insn_cached (insn);
25336
25337 for (i = recog_data.n_operands - 1; i >= 0; --i)
25338 if (REG_P (recog_data.operand[i]))
25339 {
25340 /* REX.W bit uses 3 byte VEX prefix. */
25341 if (GET_MODE (recog_data.operand[i]) == DImode
25342 && GENERAL_REG_P (recog_data.operand[i]))
25343 return 3 + 1;
25344 }
25345 else
25346 {
25347 /* REX.X or REX.B bits use 3 byte VEX prefix. */
25348 if (MEM_P (recog_data.operand[i])
25349 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
25350 return 3 + 1;
25351 }
25352
25353 return 2 + 1;
25354 }
25355 \f
25356 /* Return the maximum number of instructions a cpu can issue. */
25357
25358 static int
25359 ix86_issue_rate (void)
25360 {
25361 switch (ix86_tune)
25362 {
25363 case PROCESSOR_PENTIUM:
25364 case PROCESSOR_BONNELL:
25365 case PROCESSOR_SILVERMONT:
25366 case PROCESSOR_INTEL:
25367 case PROCESSOR_K6:
25368 case PROCESSOR_BTVER2:
25369 case PROCESSOR_PENTIUM4:
25370 case PROCESSOR_NOCONA:
25371 return 2;
25372
25373 case PROCESSOR_PENTIUMPRO:
25374 case PROCESSOR_ATHLON:
25375 case PROCESSOR_K8:
25376 case PROCESSOR_AMDFAM10:
25377 case PROCESSOR_GENERIC:
25378 case PROCESSOR_BTVER1:
25379 return 3;
25380
25381 case PROCESSOR_BDVER1:
25382 case PROCESSOR_BDVER2:
25383 case PROCESSOR_BDVER3:
25384 case PROCESSOR_BDVER4:
25385 case PROCESSOR_CORE2:
25386 case PROCESSOR_NEHALEM:
25387 case PROCESSOR_SANDYBRIDGE:
25388 case PROCESSOR_HASWELL:
25389 return 4;
25390
25391 default:
25392 return 1;
25393 }
25394 }
25395
25396 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
25397 by DEP_INSN and nothing set by DEP_INSN. */
25398
25399 static bool
25400 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
25401 {
25402 rtx set, set2;
25403
25404 /* Simplify the test for uninteresting insns. */
25405 if (insn_type != TYPE_SETCC
25406 && insn_type != TYPE_ICMOV
25407 && insn_type != TYPE_FCMOV
25408 && insn_type != TYPE_IBR)
25409 return false;
25410
25411 if ((set = single_set (dep_insn)) != 0)
25412 {
25413 set = SET_DEST (set);
25414 set2 = NULL_RTX;
25415 }
25416 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
25417 && XVECLEN (PATTERN (dep_insn), 0) == 2
25418 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
25419 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
25420 {
25421 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
25422 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
25423 }
25424 else
25425 return false;
25426
25427 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
25428 return false;
25429
25430 /* This test is true if the dependent insn reads the flags but
25431 not any other potentially set register. */
25432 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
25433 return false;
25434
25435 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
25436 return false;
25437
25438 return true;
25439 }
25440
25441 /* Return true iff USE_INSN has a memory address with operands set by
25442 SET_INSN. */
25443
25444 bool
25445 ix86_agi_dependent (rtx set_insn, rtx use_insn)
25446 {
25447 int i;
25448 extract_insn_cached (use_insn);
25449 for (i = recog_data.n_operands - 1; i >= 0; --i)
25450 if (MEM_P (recog_data.operand[i]))
25451 {
25452 rtx addr = XEXP (recog_data.operand[i], 0);
25453 return modified_in_p (addr, set_insn) != 0;
25454 }
25455 return false;
25456 }
25457
25458 /* Helper function for exact_store_load_dependency.
25459 Return true if addr is found in insn. */
25460 static bool
25461 exact_dependency_1 (rtx addr, rtx insn)
25462 {
25463 enum rtx_code code;
25464 const char *format_ptr;
25465 int i, j;
25466
25467 code = GET_CODE (insn);
25468 switch (code)
25469 {
25470 case MEM:
25471 if (rtx_equal_p (addr, insn))
25472 return true;
25473 break;
25474 case REG:
25475 CASE_CONST_ANY:
25476 case SYMBOL_REF:
25477 case CODE_LABEL:
25478 case PC:
25479 case CC0:
25480 case EXPR_LIST:
25481 return false;
25482 default:
25483 break;
25484 }
25485
25486 format_ptr = GET_RTX_FORMAT (code);
25487 for (i = 0; i < GET_RTX_LENGTH (code); i++)
25488 {
25489 switch (*format_ptr++)
25490 {
25491 case 'e':
25492 if (exact_dependency_1 (addr, XEXP (insn, i)))
25493 return true;
25494 break;
25495 case 'E':
25496 for (j = 0; j < XVECLEN (insn, i); j++)
25497 if (exact_dependency_1 (addr, XVECEXP (insn, i, j)))
25498 return true;
25499 break;
25500 }
25501 }
25502 return false;
25503 }
25504
25505 /* Return true if there exists exact dependency for store & load, i.e.
25506 the same memory address is used in them. */
25507 static bool
25508 exact_store_load_dependency (rtx store, rtx load)
25509 {
25510 rtx set1, set2;
25511
25512 set1 = single_set (store);
25513 if (!set1)
25514 return false;
25515 if (!MEM_P (SET_DEST (set1)))
25516 return false;
25517 set2 = single_set (load);
25518 if (!set2)
25519 return false;
25520 if (exact_dependency_1 (SET_DEST (set1), SET_SRC (set2)))
25521 return true;
25522 return false;
25523 }
25524
25525 static int
25526 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
25527 {
25528 enum attr_type insn_type, dep_insn_type;
25529 enum attr_memory memory;
25530 rtx set, set2;
25531 int dep_insn_code_number;
25532
25533 /* Anti and output dependencies have zero cost on all CPUs. */
25534 if (REG_NOTE_KIND (link) != 0)
25535 return 0;
25536
25537 dep_insn_code_number = recog_memoized (dep_insn);
25538
25539 /* If we can't recognize the insns, we can't really do anything. */
25540 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
25541 return cost;
25542
25543 insn_type = get_attr_type (insn);
25544 dep_insn_type = get_attr_type (dep_insn);
25545
25546 switch (ix86_tune)
25547 {
25548 case PROCESSOR_PENTIUM:
25549 /* Address Generation Interlock adds a cycle of latency. */
25550 if (insn_type == TYPE_LEA)
25551 {
25552 rtx addr = PATTERN (insn);
25553
25554 if (GET_CODE (addr) == PARALLEL)
25555 addr = XVECEXP (addr, 0, 0);
25556
25557 gcc_assert (GET_CODE (addr) == SET);
25558
25559 addr = SET_SRC (addr);
25560 if (modified_in_p (addr, dep_insn))
25561 cost += 1;
25562 }
25563 else if (ix86_agi_dependent (dep_insn, insn))
25564 cost += 1;
25565
25566 /* ??? Compares pair with jump/setcc. */
25567 if (ix86_flags_dependent (insn, dep_insn, insn_type))
25568 cost = 0;
25569
25570 /* Floating point stores require value to be ready one cycle earlier. */
25571 if (insn_type == TYPE_FMOV
25572 && get_attr_memory (insn) == MEMORY_STORE
25573 && !ix86_agi_dependent (dep_insn, insn))
25574 cost += 1;
25575 break;
25576
25577 case PROCESSOR_PENTIUMPRO:
25578 /* INT->FP conversion is expensive. */
25579 if (get_attr_fp_int_src (dep_insn))
25580 cost += 5;
25581
25582 /* There is one cycle extra latency between an FP op and a store. */
25583 if (insn_type == TYPE_FMOV
25584 && (set = single_set (dep_insn)) != NULL_RTX
25585 && (set2 = single_set (insn)) != NULL_RTX
25586 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
25587 && MEM_P (SET_DEST (set2)))
25588 cost += 1;
25589
25590 memory = get_attr_memory (insn);
25591
25592 /* Show ability of reorder buffer to hide latency of load by executing
25593 in parallel with previous instruction in case
25594 previous instruction is not needed to compute the address. */
25595 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25596 && !ix86_agi_dependent (dep_insn, insn))
25597 {
25598 /* Claim moves to take one cycle, as core can issue one load
25599 at time and the next load can start cycle later. */
25600 if (dep_insn_type == TYPE_IMOV
25601 || dep_insn_type == TYPE_FMOV)
25602 cost = 1;
25603 else if (cost > 1)
25604 cost--;
25605 }
25606 break;
25607
25608 case PROCESSOR_K6:
25609 /* The esp dependency is resolved before
25610 the instruction is really finished. */
25611 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25612 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25613 return 1;
25614
25615 /* INT->FP conversion is expensive. */
25616 if (get_attr_fp_int_src (dep_insn))
25617 cost += 5;
25618
25619 memory = get_attr_memory (insn);
25620
25621 /* Show ability of reorder buffer to hide latency of load by executing
25622 in parallel with previous instruction in case
25623 previous instruction is not needed to compute the address. */
25624 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25625 && !ix86_agi_dependent (dep_insn, insn))
25626 {
25627 /* Claim moves to take one cycle, as core can issue one load
25628 at time and the next load can start cycle later. */
25629 if (dep_insn_type == TYPE_IMOV
25630 || dep_insn_type == TYPE_FMOV)
25631 cost = 1;
25632 else if (cost > 2)
25633 cost -= 2;
25634 else
25635 cost = 1;
25636 }
25637 break;
25638
25639 case PROCESSOR_AMDFAM10:
25640 case PROCESSOR_BDVER1:
25641 case PROCESSOR_BDVER2:
25642 case PROCESSOR_BDVER3:
25643 case PROCESSOR_BDVER4:
25644 case PROCESSOR_BTVER1:
25645 case PROCESSOR_BTVER2:
25646 case PROCESSOR_GENERIC:
25647 /* Stack engine allows to execute push&pop instructions in parall. */
25648 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25649 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25650 return 0;
25651 /* FALLTHRU */
25652
25653 case PROCESSOR_ATHLON:
25654 case PROCESSOR_K8:
25655 memory = get_attr_memory (insn);
25656
25657 /* Show ability of reorder buffer to hide latency of load by executing
25658 in parallel with previous instruction in case
25659 previous instruction is not needed to compute the address. */
25660 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25661 && !ix86_agi_dependent (dep_insn, insn))
25662 {
25663 enum attr_unit unit = get_attr_unit (insn);
25664 int loadcost = 3;
25665
25666 /* Because of the difference between the length of integer and
25667 floating unit pipeline preparation stages, the memory operands
25668 for floating point are cheaper.
25669
25670 ??? For Athlon it the difference is most probably 2. */
25671 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
25672 loadcost = 3;
25673 else
25674 loadcost = TARGET_ATHLON ? 2 : 0;
25675
25676 if (cost >= loadcost)
25677 cost -= loadcost;
25678 else
25679 cost = 0;
25680 }
25681 break;
25682
25683 case PROCESSOR_CORE2:
25684 case PROCESSOR_NEHALEM:
25685 case PROCESSOR_SANDYBRIDGE:
25686 case PROCESSOR_HASWELL:
25687 /* Stack engine allows to execute push&pop instructions in parall. */
25688 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25689 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25690 return 0;
25691
25692 memory = get_attr_memory (insn);
25693
25694 /* Show ability of reorder buffer to hide latency of load by executing
25695 in parallel with previous instruction in case
25696 previous instruction is not needed to compute the address. */
25697 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25698 && !ix86_agi_dependent (dep_insn, insn))
25699 {
25700 if (cost >= 4)
25701 cost -= 4;
25702 else
25703 cost = 0;
25704 }
25705 break;
25706
25707 case PROCESSOR_SILVERMONT:
25708 case PROCESSOR_INTEL:
25709 if (!reload_completed)
25710 return cost;
25711
25712 /* Increase cost of integer loads. */
25713 memory = get_attr_memory (dep_insn);
25714 if (memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25715 {
25716 enum attr_unit unit = get_attr_unit (dep_insn);
25717 if (unit == UNIT_INTEGER && cost == 1)
25718 {
25719 if (memory == MEMORY_LOAD)
25720 cost = 3;
25721 else
25722 {
25723 /* Increase cost of ld/st for short int types only
25724 because of store forwarding issue. */
25725 rtx set = single_set (dep_insn);
25726 if (set && (GET_MODE (SET_DEST (set)) == QImode
25727 || GET_MODE (SET_DEST (set)) == HImode))
25728 {
25729 /* Increase cost of store/load insn if exact
25730 dependence exists and it is load insn. */
25731 enum attr_memory insn_memory = get_attr_memory (insn);
25732 if (insn_memory == MEMORY_LOAD
25733 && exact_store_load_dependency (dep_insn, insn))
25734 cost = 3;
25735 }
25736 }
25737 }
25738 }
25739
25740 default:
25741 break;
25742 }
25743
25744 return cost;
25745 }
25746
25747 /* How many alternative schedules to try. This should be as wide as the
25748 scheduling freedom in the DFA, but no wider. Making this value too
25749 large results extra work for the scheduler. */
25750
25751 static int
25752 ia32_multipass_dfa_lookahead (void)
25753 {
25754 switch (ix86_tune)
25755 {
25756 case PROCESSOR_PENTIUM:
25757 return 2;
25758
25759 case PROCESSOR_PENTIUMPRO:
25760 case PROCESSOR_K6:
25761 return 1;
25762
25763 case PROCESSOR_BDVER1:
25764 case PROCESSOR_BDVER2:
25765 case PROCESSOR_BDVER3:
25766 case PROCESSOR_BDVER4:
25767 /* We use lookahead value 4 for BD both before and after reload
25768 schedules. Plan is to have value 8 included for O3. */
25769 return 4;
25770
25771 case PROCESSOR_CORE2:
25772 case PROCESSOR_NEHALEM:
25773 case PROCESSOR_SANDYBRIDGE:
25774 case PROCESSOR_HASWELL:
25775 case PROCESSOR_BONNELL:
25776 case PROCESSOR_SILVERMONT:
25777 case PROCESSOR_INTEL:
25778 /* Generally, we want haifa-sched:max_issue() to look ahead as far
25779 as many instructions can be executed on a cycle, i.e.,
25780 issue_rate. I wonder why tuning for many CPUs does not do this. */
25781 if (reload_completed)
25782 return ix86_issue_rate ();
25783 /* Don't use lookahead for pre-reload schedule to save compile time. */
25784 return 0;
25785
25786 default:
25787 return 0;
25788 }
25789 }
25790
25791 /* Return true if target platform supports macro-fusion. */
25792
25793 static bool
25794 ix86_macro_fusion_p ()
25795 {
25796 return TARGET_FUSE_CMP_AND_BRANCH;
25797 }
25798
25799 /* Check whether current microarchitecture support macro fusion
25800 for insn pair "CONDGEN + CONDJMP". Refer to
25801 "Intel Architectures Optimization Reference Manual". */
25802
25803 static bool
25804 ix86_macro_fusion_pair_p (rtx condgen, rtx condjmp)
25805 {
25806 rtx src, dest;
25807 rtx single_set = single_set (condgen);
25808 enum rtx_code ccode;
25809 rtx compare_set = NULL_RTX, test_if, cond;
25810 rtx alu_set = NULL_RTX, addr = NULL_RTX;
25811
25812 if (!any_condjump_p (condjmp))
25813 return false;
25814
25815 if (get_attr_type (condgen) != TYPE_TEST
25816 && get_attr_type (condgen) != TYPE_ICMP
25817 && get_attr_type (condgen) != TYPE_INCDEC
25818 && get_attr_type (condgen) != TYPE_ALU)
25819 return false;
25820
25821 if (single_set == NULL_RTX
25822 && !TARGET_FUSE_ALU_AND_BRANCH)
25823 return false;
25824
25825 if (single_set != NULL_RTX)
25826 compare_set = single_set;
25827 else
25828 {
25829 int i;
25830 rtx pat = PATTERN (condgen);
25831 for (i = 0; i < XVECLEN (pat, 0); i++)
25832 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
25833 {
25834 rtx set_src = SET_SRC (XVECEXP (pat, 0, i));
25835 if (GET_CODE (set_src) == COMPARE)
25836 compare_set = XVECEXP (pat, 0, i);
25837 else
25838 alu_set = XVECEXP (pat, 0, i);
25839 }
25840 }
25841 if (compare_set == NULL_RTX)
25842 return false;
25843 src = SET_SRC (compare_set);
25844 if (GET_CODE (src) != COMPARE)
25845 return false;
25846
25847 /* Macro-fusion for cmp/test MEM-IMM + conditional jmp is not
25848 supported. */
25849 if ((MEM_P (XEXP (src, 0))
25850 && CONST_INT_P (XEXP (src, 1)))
25851 || (MEM_P (XEXP (src, 1))
25852 && CONST_INT_P (XEXP (src, 0))))
25853 return false;
25854
25855 /* No fusion for RIP-relative address. */
25856 if (MEM_P (XEXP (src, 0)))
25857 addr = XEXP (XEXP (src, 0), 0);
25858 else if (MEM_P (XEXP (src, 1)))
25859 addr = XEXP (XEXP (src, 1), 0);
25860
25861 if (addr) {
25862 ix86_address parts;
25863 int ok = ix86_decompose_address (addr, &parts);
25864 gcc_assert (ok);
25865
25866 if (rip_relative_addr_p (&parts))
25867 return false;
25868 }
25869
25870 test_if = SET_SRC (pc_set (condjmp));
25871 cond = XEXP (test_if, 0);
25872 ccode = GET_CODE (cond);
25873 /* Check whether conditional jump use Sign or Overflow Flags. */
25874 if (!TARGET_FUSE_CMP_AND_BRANCH_SOFLAGS
25875 && (ccode == GE
25876 || ccode == GT
25877 || ccode == LE
25878 || ccode == LT))
25879 return false;
25880
25881 /* Return true for TYPE_TEST and TYPE_ICMP. */
25882 if (get_attr_type (condgen) == TYPE_TEST
25883 || get_attr_type (condgen) == TYPE_ICMP)
25884 return true;
25885
25886 /* The following is the case that macro-fusion for alu + jmp. */
25887 if (!TARGET_FUSE_ALU_AND_BRANCH || !alu_set)
25888 return false;
25889
25890 /* No fusion for alu op with memory destination operand. */
25891 dest = SET_DEST (alu_set);
25892 if (MEM_P (dest))
25893 return false;
25894
25895 /* Macro-fusion for inc/dec + unsigned conditional jump is not
25896 supported. */
25897 if (get_attr_type (condgen) == TYPE_INCDEC
25898 && (ccode == GEU
25899 || ccode == GTU
25900 || ccode == LEU
25901 || ccode == LTU))
25902 return false;
25903
25904 return true;
25905 }
25906
25907 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
25908 execution. It is applied if
25909 (1) IMUL instruction is on the top of list;
25910 (2) There exists the only producer of independent IMUL instruction in
25911 ready list.
25912 Return index of IMUL producer if it was found and -1 otherwise. */
25913 static int
25914 do_reorder_for_imul (rtx *ready, int n_ready)
25915 {
25916 rtx insn, set, insn1, insn2;
25917 sd_iterator_def sd_it;
25918 dep_t dep;
25919 int index = -1;
25920 int i;
25921
25922 if (!TARGET_BONNELL)
25923 return index;
25924
25925 /* Check that IMUL instruction is on the top of ready list. */
25926 insn = ready[n_ready - 1];
25927 set = single_set (insn);
25928 if (!set)
25929 return index;
25930 if (!(GET_CODE (SET_SRC (set)) == MULT
25931 && GET_MODE (SET_SRC (set)) == SImode))
25932 return index;
25933
25934 /* Search for producer of independent IMUL instruction. */
25935 for (i = n_ready - 2; i >= 0; i--)
25936 {
25937 insn = ready[i];
25938 if (!NONDEBUG_INSN_P (insn))
25939 continue;
25940 /* Skip IMUL instruction. */
25941 insn2 = PATTERN (insn);
25942 if (GET_CODE (insn2) == PARALLEL)
25943 insn2 = XVECEXP (insn2, 0, 0);
25944 if (GET_CODE (insn2) == SET
25945 && GET_CODE (SET_SRC (insn2)) == MULT
25946 && GET_MODE (SET_SRC (insn2)) == SImode)
25947 continue;
25948
25949 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
25950 {
25951 rtx con;
25952 con = DEP_CON (dep);
25953 if (!NONDEBUG_INSN_P (con))
25954 continue;
25955 insn1 = PATTERN (con);
25956 if (GET_CODE (insn1) == PARALLEL)
25957 insn1 = XVECEXP (insn1, 0, 0);
25958
25959 if (GET_CODE (insn1) == SET
25960 && GET_CODE (SET_SRC (insn1)) == MULT
25961 && GET_MODE (SET_SRC (insn1)) == SImode)
25962 {
25963 sd_iterator_def sd_it1;
25964 dep_t dep1;
25965 /* Check if there is no other dependee for IMUL. */
25966 index = i;
25967 FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1)
25968 {
25969 rtx pro;
25970 pro = DEP_PRO (dep1);
25971 if (!NONDEBUG_INSN_P (pro))
25972 continue;
25973 if (pro != insn)
25974 index = -1;
25975 }
25976 if (index >= 0)
25977 break;
25978 }
25979 }
25980 if (index >= 0)
25981 break;
25982 }
25983 return index;
25984 }
25985
25986 /* Try to find the best candidate on the top of ready list if two insns
25987 have the same priority - candidate is best if its dependees were
25988 scheduled earlier. Applied for Silvermont only.
25989 Return true if top 2 insns must be interchanged. */
25990 static bool
25991 swap_top_of_ready_list (rtx *ready, int n_ready)
25992 {
25993 rtx top = ready[n_ready - 1];
25994 rtx next = ready[n_ready - 2];
25995 rtx set;
25996 sd_iterator_def sd_it;
25997 dep_t dep;
25998 int clock1 = -1;
25999 int clock2 = -1;
26000 #define INSN_TICK(INSN) (HID (INSN)->tick)
26001
26002 if (!TARGET_SILVERMONT && !TARGET_INTEL)
26003 return false;
26004
26005 if (!NONDEBUG_INSN_P (top))
26006 return false;
26007 if (!NONJUMP_INSN_P (top))
26008 return false;
26009 if (!NONDEBUG_INSN_P (next))
26010 return false;
26011 if (!NONJUMP_INSN_P (next))
26012 return false;
26013 set = single_set (top);
26014 if (!set)
26015 return false;
26016 set = single_set (next);
26017 if (!set)
26018 return false;
26019
26020 if (INSN_PRIORITY_KNOWN (top) && INSN_PRIORITY_KNOWN (next))
26021 {
26022 if (INSN_PRIORITY (top) != INSN_PRIORITY (next))
26023 return false;
26024 /* Determine winner more precise. */
26025 FOR_EACH_DEP (top, SD_LIST_RES_BACK, sd_it, dep)
26026 {
26027 rtx pro;
26028 pro = DEP_PRO (dep);
26029 if (!NONDEBUG_INSN_P (pro))
26030 continue;
26031 if (INSN_TICK (pro) > clock1)
26032 clock1 = INSN_TICK (pro);
26033 }
26034 FOR_EACH_DEP (next, SD_LIST_RES_BACK, sd_it, dep)
26035 {
26036 rtx pro;
26037 pro = DEP_PRO (dep);
26038 if (!NONDEBUG_INSN_P (pro))
26039 continue;
26040 if (INSN_TICK (pro) > clock2)
26041 clock2 = INSN_TICK (pro);
26042 }
26043
26044 if (clock1 == clock2)
26045 {
26046 /* Determine winner - load must win. */
26047 enum attr_memory memory1, memory2;
26048 memory1 = get_attr_memory (top);
26049 memory2 = get_attr_memory (next);
26050 if (memory2 == MEMORY_LOAD && memory1 != MEMORY_LOAD)
26051 return true;
26052 }
26053 return (bool) (clock2 < clock1);
26054 }
26055 return false;
26056 #undef INSN_TICK
26057 }
26058
26059 /* Perform possible reodering of ready list for Atom/Silvermont only.
26060 Return issue rate. */
26061 static int
26062 ix86_sched_reorder (FILE *dump, int sched_verbose, rtx *ready, int *pn_ready,
26063 int clock_var)
26064 {
26065 int issue_rate = -1;
26066 int n_ready = *pn_ready;
26067 int i;
26068 rtx insn;
26069 int index = -1;
26070
26071 /* Set up issue rate. */
26072 issue_rate = ix86_issue_rate ();
26073
26074 /* Do reodering for BONNELL/SILVERMONT only. */
26075 if (!TARGET_BONNELL && !TARGET_SILVERMONT && !TARGET_INTEL)
26076 return issue_rate;
26077
26078 /* Nothing to do if ready list contains only 1 instruction. */
26079 if (n_ready <= 1)
26080 return issue_rate;
26081
26082 /* Do reodering for post-reload scheduler only. */
26083 if (!reload_completed)
26084 return issue_rate;
26085
26086 if ((index = do_reorder_for_imul (ready, n_ready)) >= 0)
26087 {
26088 if (sched_verbose > 1)
26089 fprintf (dump, ";;\tatom sched_reorder: put %d insn on top\n",
26090 INSN_UID (ready[index]));
26091
26092 /* Put IMUL producer (ready[index]) at the top of ready list. */
26093 insn = ready[index];
26094 for (i = index; i < n_ready - 1; i++)
26095 ready[i] = ready[i + 1];
26096 ready[n_ready - 1] = insn;
26097 return issue_rate;
26098 }
26099 if (clock_var != 0 && swap_top_of_ready_list (ready, n_ready))
26100 {
26101 if (sched_verbose > 1)
26102 fprintf (dump, ";;\tslm sched_reorder: swap %d and %d insns\n",
26103 INSN_UID (ready[n_ready - 1]), INSN_UID (ready[n_ready - 2]));
26104 /* Swap 2 top elements of ready list. */
26105 insn = ready[n_ready - 1];
26106 ready[n_ready - 1] = ready[n_ready - 2];
26107 ready[n_ready - 2] = insn;
26108 }
26109 return issue_rate;
26110 }
26111
26112 static bool
26113 ix86_class_likely_spilled_p (reg_class_t);
26114
26115 /* Returns true if lhs of insn is HW function argument register and set up
26116 is_spilled to true if it is likely spilled HW register. */
26117 static bool
26118 insn_is_function_arg (rtx insn, bool* is_spilled)
26119 {
26120 rtx dst;
26121
26122 if (!NONDEBUG_INSN_P (insn))
26123 return false;
26124 /* Call instructions are not movable, ignore it. */
26125 if (CALL_P (insn))
26126 return false;
26127 insn = PATTERN (insn);
26128 if (GET_CODE (insn) == PARALLEL)
26129 insn = XVECEXP (insn, 0, 0);
26130 if (GET_CODE (insn) != SET)
26131 return false;
26132 dst = SET_DEST (insn);
26133 if (REG_P (dst) && HARD_REGISTER_P (dst)
26134 && ix86_function_arg_regno_p (REGNO (dst)))
26135 {
26136 /* Is it likely spilled HW register? */
26137 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
26138 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
26139 *is_spilled = true;
26140 return true;
26141 }
26142 return false;
26143 }
26144
26145 /* Add output dependencies for chain of function adjacent arguments if only
26146 there is a move to likely spilled HW register. Return first argument
26147 if at least one dependence was added or NULL otherwise. */
26148 static rtx
26149 add_parameter_dependencies (rtx call, rtx head)
26150 {
26151 rtx insn;
26152 rtx last = call;
26153 rtx first_arg = NULL;
26154 bool is_spilled = false;
26155
26156 head = PREV_INSN (head);
26157
26158 /* Find nearest to call argument passing instruction. */
26159 while (true)
26160 {
26161 last = PREV_INSN (last);
26162 if (last == head)
26163 return NULL;
26164 if (!NONDEBUG_INSN_P (last))
26165 continue;
26166 if (insn_is_function_arg (last, &is_spilled))
26167 break;
26168 return NULL;
26169 }
26170
26171 first_arg = last;
26172 while (true)
26173 {
26174 insn = PREV_INSN (last);
26175 if (!INSN_P (insn))
26176 break;
26177 if (insn == head)
26178 break;
26179 if (!NONDEBUG_INSN_P (insn))
26180 {
26181 last = insn;
26182 continue;
26183 }
26184 if (insn_is_function_arg (insn, &is_spilled))
26185 {
26186 /* Add output depdendence between two function arguments if chain
26187 of output arguments contains likely spilled HW registers. */
26188 if (is_spilled)
26189 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26190 first_arg = last = insn;
26191 }
26192 else
26193 break;
26194 }
26195 if (!is_spilled)
26196 return NULL;
26197 return first_arg;
26198 }
26199
26200 /* Add output or anti dependency from insn to first_arg to restrict its code
26201 motion. */
26202 static void
26203 avoid_func_arg_motion (rtx first_arg, rtx insn)
26204 {
26205 rtx set;
26206 rtx tmp;
26207
26208 set = single_set (insn);
26209 if (!set)
26210 return;
26211 tmp = SET_DEST (set);
26212 if (REG_P (tmp))
26213 {
26214 /* Add output dependency to the first function argument. */
26215 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26216 return;
26217 }
26218 /* Add anti dependency. */
26219 add_dependence (first_arg, insn, REG_DEP_ANTI);
26220 }
26221
26222 /* Avoid cross block motion of function argument through adding dependency
26223 from the first non-jump instruction in bb. */
26224 static void
26225 add_dependee_for_func_arg (rtx arg, basic_block bb)
26226 {
26227 rtx insn = BB_END (bb);
26228
26229 while (insn)
26230 {
26231 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
26232 {
26233 rtx set = single_set (insn);
26234 if (set)
26235 {
26236 avoid_func_arg_motion (arg, insn);
26237 return;
26238 }
26239 }
26240 if (insn == BB_HEAD (bb))
26241 return;
26242 insn = PREV_INSN (insn);
26243 }
26244 }
26245
26246 /* Hook for pre-reload schedule - avoid motion of function arguments
26247 passed in likely spilled HW registers. */
26248 static void
26249 ix86_dependencies_evaluation_hook (rtx head, rtx tail)
26250 {
26251 rtx insn;
26252 rtx first_arg = NULL;
26253 if (reload_completed)
26254 return;
26255 while (head != tail && DEBUG_INSN_P (head))
26256 head = NEXT_INSN (head);
26257 for (insn = tail; insn != head; insn = PREV_INSN (insn))
26258 if (INSN_P (insn) && CALL_P (insn))
26259 {
26260 first_arg = add_parameter_dependencies (insn, head);
26261 if (first_arg)
26262 {
26263 /* Add dependee for first argument to predecessors if only
26264 region contains more than one block. */
26265 basic_block bb = BLOCK_FOR_INSN (insn);
26266 int rgn = CONTAINING_RGN (bb->index);
26267 int nr_blks = RGN_NR_BLOCKS (rgn);
26268 /* Skip trivial regions and region head blocks that can have
26269 predecessors outside of region. */
26270 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
26271 {
26272 edge e;
26273 edge_iterator ei;
26274
26275 /* Regions are SCCs with the exception of selective
26276 scheduling with pipelining of outer blocks enabled.
26277 So also check that immediate predecessors of a non-head
26278 block are in the same region. */
26279 FOR_EACH_EDGE (e, ei, bb->preds)
26280 {
26281 /* Avoid creating of loop-carried dependencies through
26282 using topological ordering in the region. */
26283 if (rgn == CONTAINING_RGN (e->src->index)
26284 && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
26285 add_dependee_for_func_arg (first_arg, e->src);
26286 }
26287 }
26288 insn = first_arg;
26289 if (insn == head)
26290 break;
26291 }
26292 }
26293 else if (first_arg)
26294 avoid_func_arg_motion (first_arg, insn);
26295 }
26296
26297 /* Hook for pre-reload schedule - set priority of moves from likely spilled
26298 HW registers to maximum, to schedule them at soon as possible. These are
26299 moves from function argument registers at the top of the function entry
26300 and moves from function return value registers after call. */
26301 static int
26302 ix86_adjust_priority (rtx insn, int priority)
26303 {
26304 rtx set;
26305
26306 if (reload_completed)
26307 return priority;
26308
26309 if (!NONDEBUG_INSN_P (insn))
26310 return priority;
26311
26312 set = single_set (insn);
26313 if (set)
26314 {
26315 rtx tmp = SET_SRC (set);
26316 if (REG_P (tmp)
26317 && HARD_REGISTER_P (tmp)
26318 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
26319 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
26320 return current_sched_info->sched_max_insns_priority;
26321 }
26322
26323 return priority;
26324 }
26325
26326 /* Model decoder of Core 2/i7.
26327 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
26328 track the instruction fetch block boundaries and make sure that long
26329 (9+ bytes) instructions are assigned to D0. */
26330
26331 /* Maximum length of an insn that can be handled by
26332 a secondary decoder unit. '8' for Core 2/i7. */
26333 static int core2i7_secondary_decoder_max_insn_size;
26334
26335 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
26336 '16' for Core 2/i7. */
26337 static int core2i7_ifetch_block_size;
26338
26339 /* Maximum number of instructions decoder can handle per cycle.
26340 '6' for Core 2/i7. */
26341 static int core2i7_ifetch_block_max_insns;
26342
26343 typedef struct ix86_first_cycle_multipass_data_ *
26344 ix86_first_cycle_multipass_data_t;
26345 typedef const struct ix86_first_cycle_multipass_data_ *
26346 const_ix86_first_cycle_multipass_data_t;
26347
26348 /* A variable to store target state across calls to max_issue within
26349 one cycle. */
26350 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
26351 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
26352
26353 /* Initialize DATA. */
26354 static void
26355 core2i7_first_cycle_multipass_init (void *_data)
26356 {
26357 ix86_first_cycle_multipass_data_t data
26358 = (ix86_first_cycle_multipass_data_t) _data;
26359
26360 data->ifetch_block_len = 0;
26361 data->ifetch_block_n_insns = 0;
26362 data->ready_try_change = NULL;
26363 data->ready_try_change_size = 0;
26364 }
26365
26366 /* Advancing the cycle; reset ifetch block counts. */
26367 static void
26368 core2i7_dfa_post_advance_cycle (void)
26369 {
26370 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
26371
26372 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
26373
26374 data->ifetch_block_len = 0;
26375 data->ifetch_block_n_insns = 0;
26376 }
26377
26378 static int min_insn_size (rtx);
26379
26380 /* Filter out insns from ready_try that the core will not be able to issue
26381 on current cycle due to decoder. */
26382 static void
26383 core2i7_first_cycle_multipass_filter_ready_try
26384 (const_ix86_first_cycle_multipass_data_t data,
26385 signed char *ready_try, int n_ready, bool first_cycle_insn_p)
26386 {
26387 while (n_ready--)
26388 {
26389 rtx insn;
26390 int insn_size;
26391
26392 if (ready_try[n_ready])
26393 continue;
26394
26395 insn = get_ready_element (n_ready);
26396 insn_size = min_insn_size (insn);
26397
26398 if (/* If this is a too long an insn for a secondary decoder ... */
26399 (!first_cycle_insn_p
26400 && insn_size > core2i7_secondary_decoder_max_insn_size)
26401 /* ... or it would not fit into the ifetch block ... */
26402 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
26403 /* ... or the decoder is full already ... */
26404 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
26405 /* ... mask the insn out. */
26406 {
26407 ready_try[n_ready] = 1;
26408
26409 if (data->ready_try_change)
26410 bitmap_set_bit (data->ready_try_change, n_ready);
26411 }
26412 }
26413 }
26414
26415 /* Prepare for a new round of multipass lookahead scheduling. */
26416 static void
26417 core2i7_first_cycle_multipass_begin (void *_data,
26418 signed char *ready_try, int n_ready,
26419 bool first_cycle_insn_p)
26420 {
26421 ix86_first_cycle_multipass_data_t data
26422 = (ix86_first_cycle_multipass_data_t) _data;
26423 const_ix86_first_cycle_multipass_data_t prev_data
26424 = ix86_first_cycle_multipass_data;
26425
26426 /* Restore the state from the end of the previous round. */
26427 data->ifetch_block_len = prev_data->ifetch_block_len;
26428 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
26429
26430 /* Filter instructions that cannot be issued on current cycle due to
26431 decoder restrictions. */
26432 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
26433 first_cycle_insn_p);
26434 }
26435
26436 /* INSN is being issued in current solution. Account for its impact on
26437 the decoder model. */
26438 static void
26439 core2i7_first_cycle_multipass_issue (void *_data,
26440 signed char *ready_try, int n_ready,
26441 rtx insn, const void *_prev_data)
26442 {
26443 ix86_first_cycle_multipass_data_t data
26444 = (ix86_first_cycle_multipass_data_t) _data;
26445 const_ix86_first_cycle_multipass_data_t prev_data
26446 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
26447
26448 int insn_size = min_insn_size (insn);
26449
26450 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
26451 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
26452 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
26453 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
26454
26455 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
26456 if (!data->ready_try_change)
26457 {
26458 data->ready_try_change = sbitmap_alloc (n_ready);
26459 data->ready_try_change_size = n_ready;
26460 }
26461 else if (data->ready_try_change_size < n_ready)
26462 {
26463 data->ready_try_change = sbitmap_resize (data->ready_try_change,
26464 n_ready, 0);
26465 data->ready_try_change_size = n_ready;
26466 }
26467 bitmap_clear (data->ready_try_change);
26468
26469 /* Filter out insns from ready_try that the core will not be able to issue
26470 on current cycle due to decoder. */
26471 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
26472 false);
26473 }
26474
26475 /* Revert the effect on ready_try. */
26476 static void
26477 core2i7_first_cycle_multipass_backtrack (const void *_data,
26478 signed char *ready_try,
26479 int n_ready ATTRIBUTE_UNUSED)
26480 {
26481 const_ix86_first_cycle_multipass_data_t data
26482 = (const_ix86_first_cycle_multipass_data_t) _data;
26483 unsigned int i = 0;
26484 sbitmap_iterator sbi;
26485
26486 gcc_assert (bitmap_last_set_bit (data->ready_try_change) < n_ready);
26487 EXECUTE_IF_SET_IN_BITMAP (data->ready_try_change, 0, i, sbi)
26488 {
26489 ready_try[i] = 0;
26490 }
26491 }
26492
26493 /* Save the result of multipass lookahead scheduling for the next round. */
26494 static void
26495 core2i7_first_cycle_multipass_end (const void *_data)
26496 {
26497 const_ix86_first_cycle_multipass_data_t data
26498 = (const_ix86_first_cycle_multipass_data_t) _data;
26499 ix86_first_cycle_multipass_data_t next_data
26500 = ix86_first_cycle_multipass_data;
26501
26502 if (data != NULL)
26503 {
26504 next_data->ifetch_block_len = data->ifetch_block_len;
26505 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
26506 }
26507 }
26508
26509 /* Deallocate target data. */
26510 static void
26511 core2i7_first_cycle_multipass_fini (void *_data)
26512 {
26513 ix86_first_cycle_multipass_data_t data
26514 = (ix86_first_cycle_multipass_data_t) _data;
26515
26516 if (data->ready_try_change)
26517 {
26518 sbitmap_free (data->ready_try_change);
26519 data->ready_try_change = NULL;
26520 data->ready_try_change_size = 0;
26521 }
26522 }
26523
26524 /* Prepare for scheduling pass. */
26525 static void
26526 ix86_sched_init_global (FILE *, int, int)
26527 {
26528 /* Install scheduling hooks for current CPU. Some of these hooks are used
26529 in time-critical parts of the scheduler, so we only set them up when
26530 they are actually used. */
26531 switch (ix86_tune)
26532 {
26533 case PROCESSOR_CORE2:
26534 case PROCESSOR_NEHALEM:
26535 case PROCESSOR_SANDYBRIDGE:
26536 case PROCESSOR_HASWELL:
26537 /* Do not perform multipass scheduling for pre-reload schedule
26538 to save compile time. */
26539 if (reload_completed)
26540 {
26541 targetm.sched.dfa_post_advance_cycle
26542 = core2i7_dfa_post_advance_cycle;
26543 targetm.sched.first_cycle_multipass_init
26544 = core2i7_first_cycle_multipass_init;
26545 targetm.sched.first_cycle_multipass_begin
26546 = core2i7_first_cycle_multipass_begin;
26547 targetm.sched.first_cycle_multipass_issue
26548 = core2i7_first_cycle_multipass_issue;
26549 targetm.sched.first_cycle_multipass_backtrack
26550 = core2i7_first_cycle_multipass_backtrack;
26551 targetm.sched.first_cycle_multipass_end
26552 = core2i7_first_cycle_multipass_end;
26553 targetm.sched.first_cycle_multipass_fini
26554 = core2i7_first_cycle_multipass_fini;
26555
26556 /* Set decoder parameters. */
26557 core2i7_secondary_decoder_max_insn_size = 8;
26558 core2i7_ifetch_block_size = 16;
26559 core2i7_ifetch_block_max_insns = 6;
26560 break;
26561 }
26562 /* ... Fall through ... */
26563 default:
26564 targetm.sched.dfa_post_advance_cycle = NULL;
26565 targetm.sched.first_cycle_multipass_init = NULL;
26566 targetm.sched.first_cycle_multipass_begin = NULL;
26567 targetm.sched.first_cycle_multipass_issue = NULL;
26568 targetm.sched.first_cycle_multipass_backtrack = NULL;
26569 targetm.sched.first_cycle_multipass_end = NULL;
26570 targetm.sched.first_cycle_multipass_fini = NULL;
26571 break;
26572 }
26573 }
26574
26575 \f
26576 /* Compute the alignment given to a constant that is being placed in memory.
26577 EXP is the constant and ALIGN is the alignment that the object would
26578 ordinarily have.
26579 The value of this function is used instead of that alignment to align
26580 the object. */
26581
26582 int
26583 ix86_constant_alignment (tree exp, int align)
26584 {
26585 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
26586 || TREE_CODE (exp) == INTEGER_CST)
26587 {
26588 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
26589 return 64;
26590 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
26591 return 128;
26592 }
26593 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
26594 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
26595 return BITS_PER_WORD;
26596
26597 return align;
26598 }
26599
26600 /* Compute the alignment for a static variable.
26601 TYPE is the data type, and ALIGN is the alignment that
26602 the object would ordinarily have. The value of this function is used
26603 instead of that alignment to align the object. */
26604
26605 int
26606 ix86_data_alignment (tree type, int align, bool opt)
26607 {
26608 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
26609 for symbols from other compilation units or symbols that don't need
26610 to bind locally. In order to preserve some ABI compatibility with
26611 those compilers, ensure we don't decrease alignment from what we
26612 used to assume. */
26613
26614 int max_align_compat
26615 = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
26616
26617 /* A data structure, equal or greater than the size of a cache line
26618 (64 bytes in the Pentium 4 and other recent Intel processors, including
26619 processors based on Intel Core microarchitecture) should be aligned
26620 so that its base address is a multiple of a cache line size. */
26621
26622 int max_align
26623 = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
26624
26625 if (max_align < BITS_PER_WORD)
26626 max_align = BITS_PER_WORD;
26627
26628 if (opt
26629 && AGGREGATE_TYPE_P (type)
26630 && TYPE_SIZE (type)
26631 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
26632 {
26633 if (wi::geu_p (TYPE_SIZE (type), max_align_compat)
26634 && align < max_align_compat)
26635 align = max_align_compat;
26636 if (wi::geu_p (TYPE_SIZE (type), max_align)
26637 && align < max_align)
26638 align = max_align;
26639 }
26640
26641 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
26642 to 16byte boundary. */
26643 if (TARGET_64BIT)
26644 {
26645 if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
26646 && TYPE_SIZE (type)
26647 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
26648 && wi::geu_p (TYPE_SIZE (type), 128)
26649 && align < 128)
26650 return 128;
26651 }
26652
26653 if (!opt)
26654 return align;
26655
26656 if (TREE_CODE (type) == ARRAY_TYPE)
26657 {
26658 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
26659 return 64;
26660 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
26661 return 128;
26662 }
26663 else if (TREE_CODE (type) == COMPLEX_TYPE)
26664 {
26665
26666 if (TYPE_MODE (type) == DCmode && align < 64)
26667 return 64;
26668 if ((TYPE_MODE (type) == XCmode
26669 || TYPE_MODE (type) == TCmode) && align < 128)
26670 return 128;
26671 }
26672 else if ((TREE_CODE (type) == RECORD_TYPE
26673 || TREE_CODE (type) == UNION_TYPE
26674 || TREE_CODE (type) == QUAL_UNION_TYPE)
26675 && TYPE_FIELDS (type))
26676 {
26677 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
26678 return 64;
26679 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
26680 return 128;
26681 }
26682 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
26683 || TREE_CODE (type) == INTEGER_TYPE)
26684 {
26685 if (TYPE_MODE (type) == DFmode && align < 64)
26686 return 64;
26687 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
26688 return 128;
26689 }
26690
26691 return align;
26692 }
26693
26694 /* Compute the alignment for a local variable or a stack slot. EXP is
26695 the data type or decl itself, MODE is the widest mode available and
26696 ALIGN is the alignment that the object would ordinarily have. The
26697 value of this macro is used instead of that alignment to align the
26698 object. */
26699
26700 unsigned int
26701 ix86_local_alignment (tree exp, enum machine_mode mode,
26702 unsigned int align)
26703 {
26704 tree type, decl;
26705
26706 if (exp && DECL_P (exp))
26707 {
26708 type = TREE_TYPE (exp);
26709 decl = exp;
26710 }
26711 else
26712 {
26713 type = exp;
26714 decl = NULL;
26715 }
26716
26717 /* Don't do dynamic stack realignment for long long objects with
26718 -mpreferred-stack-boundary=2. */
26719 if (!TARGET_64BIT
26720 && align == 64
26721 && ix86_preferred_stack_boundary < 64
26722 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
26723 && (!type || !TYPE_USER_ALIGN (type))
26724 && (!decl || !DECL_USER_ALIGN (decl)))
26725 align = 32;
26726
26727 /* If TYPE is NULL, we are allocating a stack slot for caller-save
26728 register in MODE. We will return the largest alignment of XF
26729 and DF. */
26730 if (!type)
26731 {
26732 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
26733 align = GET_MODE_ALIGNMENT (DFmode);
26734 return align;
26735 }
26736
26737 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
26738 to 16byte boundary. Exact wording is:
26739
26740 An array uses the same alignment as its elements, except that a local or
26741 global array variable of length at least 16 bytes or
26742 a C99 variable-length array variable always has alignment of at least 16 bytes.
26743
26744 This was added to allow use of aligned SSE instructions at arrays. This
26745 rule is meant for static storage (where compiler can not do the analysis
26746 by itself). We follow it for automatic variables only when convenient.
26747 We fully control everything in the function compiled and functions from
26748 other unit can not rely on the alignment.
26749
26750 Exclude va_list type. It is the common case of local array where
26751 we can not benefit from the alignment.
26752
26753 TODO: Probably one should optimize for size only when var is not escaping. */
26754 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
26755 && TARGET_SSE)
26756 {
26757 if (AGGREGATE_TYPE_P (type)
26758 && (va_list_type_node == NULL_TREE
26759 || (TYPE_MAIN_VARIANT (type)
26760 != TYPE_MAIN_VARIANT (va_list_type_node)))
26761 && TYPE_SIZE (type)
26762 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
26763 && wi::geu_p (TYPE_SIZE (type), 16)
26764 && align < 128)
26765 return 128;
26766 }
26767 if (TREE_CODE (type) == ARRAY_TYPE)
26768 {
26769 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
26770 return 64;
26771 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
26772 return 128;
26773 }
26774 else if (TREE_CODE (type) == COMPLEX_TYPE)
26775 {
26776 if (TYPE_MODE (type) == DCmode && align < 64)
26777 return 64;
26778 if ((TYPE_MODE (type) == XCmode
26779 || TYPE_MODE (type) == TCmode) && align < 128)
26780 return 128;
26781 }
26782 else if ((TREE_CODE (type) == RECORD_TYPE
26783 || TREE_CODE (type) == UNION_TYPE
26784 || TREE_CODE (type) == QUAL_UNION_TYPE)
26785 && TYPE_FIELDS (type))
26786 {
26787 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
26788 return 64;
26789 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
26790 return 128;
26791 }
26792 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
26793 || TREE_CODE (type) == INTEGER_TYPE)
26794 {
26795
26796 if (TYPE_MODE (type) == DFmode && align < 64)
26797 return 64;
26798 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
26799 return 128;
26800 }
26801 return align;
26802 }
26803
26804 /* Compute the minimum required alignment for dynamic stack realignment
26805 purposes for a local variable, parameter or a stack slot. EXP is
26806 the data type or decl itself, MODE is its mode and ALIGN is the
26807 alignment that the object would ordinarily have. */
26808
26809 unsigned int
26810 ix86_minimum_alignment (tree exp, enum machine_mode mode,
26811 unsigned int align)
26812 {
26813 tree type, decl;
26814
26815 if (exp && DECL_P (exp))
26816 {
26817 type = TREE_TYPE (exp);
26818 decl = exp;
26819 }
26820 else
26821 {
26822 type = exp;
26823 decl = NULL;
26824 }
26825
26826 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
26827 return align;
26828
26829 /* Don't do dynamic stack realignment for long long objects with
26830 -mpreferred-stack-boundary=2. */
26831 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
26832 && (!type || !TYPE_USER_ALIGN (type))
26833 && (!decl || !DECL_USER_ALIGN (decl)))
26834 return 32;
26835
26836 return align;
26837 }
26838 \f
26839 /* Find a location for the static chain incoming to a nested function.
26840 This is a register, unless all free registers are used by arguments. */
26841
26842 static rtx
26843 ix86_static_chain (const_tree fndecl, bool incoming_p)
26844 {
26845 unsigned regno;
26846
26847 if (!DECL_STATIC_CHAIN (fndecl))
26848 return NULL;
26849
26850 if (TARGET_64BIT)
26851 {
26852 /* We always use R10 in 64-bit mode. */
26853 regno = R10_REG;
26854 }
26855 else
26856 {
26857 tree fntype;
26858 unsigned int ccvt;
26859
26860 /* By default in 32-bit mode we use ECX to pass the static chain. */
26861 regno = CX_REG;
26862
26863 fntype = TREE_TYPE (fndecl);
26864 ccvt = ix86_get_callcvt (fntype);
26865 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
26866 {
26867 /* Fastcall functions use ecx/edx for arguments, which leaves
26868 us with EAX for the static chain.
26869 Thiscall functions use ecx for arguments, which also
26870 leaves us with EAX for the static chain. */
26871 regno = AX_REG;
26872 }
26873 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
26874 {
26875 /* Thiscall functions use ecx for arguments, which leaves
26876 us with EAX and EDX for the static chain.
26877 We are using for abi-compatibility EAX. */
26878 regno = AX_REG;
26879 }
26880 else if (ix86_function_regparm (fntype, fndecl) == 3)
26881 {
26882 /* For regparm 3, we have no free call-clobbered registers in
26883 which to store the static chain. In order to implement this,
26884 we have the trampoline push the static chain to the stack.
26885 However, we can't push a value below the return address when
26886 we call the nested function directly, so we have to use an
26887 alternate entry point. For this we use ESI, and have the
26888 alternate entry point push ESI, so that things appear the
26889 same once we're executing the nested function. */
26890 if (incoming_p)
26891 {
26892 if (fndecl == current_function_decl)
26893 ix86_static_chain_on_stack = true;
26894 return gen_frame_mem (SImode,
26895 plus_constant (Pmode,
26896 arg_pointer_rtx, -8));
26897 }
26898 regno = SI_REG;
26899 }
26900 }
26901
26902 return gen_rtx_REG (Pmode, regno);
26903 }
26904
26905 /* Emit RTL insns to initialize the variable parts of a trampoline.
26906 FNDECL is the decl of the target address; M_TRAMP is a MEM for
26907 the trampoline, and CHAIN_VALUE is an RTX for the static chain
26908 to be passed to the target function. */
26909
26910 static void
26911 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
26912 {
26913 rtx mem, fnaddr;
26914 int opcode;
26915 int offset = 0;
26916
26917 fnaddr = XEXP (DECL_RTL (fndecl), 0);
26918
26919 if (TARGET_64BIT)
26920 {
26921 int size;
26922
26923 /* Load the function address to r11. Try to load address using
26924 the shorter movl instead of movabs. We may want to support
26925 movq for kernel mode, but kernel does not use trampolines at
26926 the moment. FNADDR is a 32bit address and may not be in
26927 DImode when ptr_mode == SImode. Always use movl in this
26928 case. */
26929 if (ptr_mode == SImode
26930 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
26931 {
26932 fnaddr = copy_addr_to_reg (fnaddr);
26933
26934 mem = adjust_address (m_tramp, HImode, offset);
26935 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
26936
26937 mem = adjust_address (m_tramp, SImode, offset + 2);
26938 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
26939 offset += 6;
26940 }
26941 else
26942 {
26943 mem = adjust_address (m_tramp, HImode, offset);
26944 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
26945
26946 mem = adjust_address (m_tramp, DImode, offset + 2);
26947 emit_move_insn (mem, fnaddr);
26948 offset += 10;
26949 }
26950
26951 /* Load static chain using movabs to r10. Use the shorter movl
26952 instead of movabs when ptr_mode == SImode. */
26953 if (ptr_mode == SImode)
26954 {
26955 opcode = 0xba41;
26956 size = 6;
26957 }
26958 else
26959 {
26960 opcode = 0xba49;
26961 size = 10;
26962 }
26963
26964 mem = adjust_address (m_tramp, HImode, offset);
26965 emit_move_insn (mem, gen_int_mode (opcode, HImode));
26966
26967 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
26968 emit_move_insn (mem, chain_value);
26969 offset += size;
26970
26971 /* Jump to r11; the last (unused) byte is a nop, only there to
26972 pad the write out to a single 32-bit store. */
26973 mem = adjust_address (m_tramp, SImode, offset);
26974 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
26975 offset += 4;
26976 }
26977 else
26978 {
26979 rtx disp, chain;
26980
26981 /* Depending on the static chain location, either load a register
26982 with a constant, or push the constant to the stack. All of the
26983 instructions are the same size. */
26984 chain = ix86_static_chain (fndecl, true);
26985 if (REG_P (chain))
26986 {
26987 switch (REGNO (chain))
26988 {
26989 case AX_REG:
26990 opcode = 0xb8; break;
26991 case CX_REG:
26992 opcode = 0xb9; break;
26993 default:
26994 gcc_unreachable ();
26995 }
26996 }
26997 else
26998 opcode = 0x68;
26999
27000 mem = adjust_address (m_tramp, QImode, offset);
27001 emit_move_insn (mem, gen_int_mode (opcode, QImode));
27002
27003 mem = adjust_address (m_tramp, SImode, offset + 1);
27004 emit_move_insn (mem, chain_value);
27005 offset += 5;
27006
27007 mem = adjust_address (m_tramp, QImode, offset);
27008 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
27009
27010 mem = adjust_address (m_tramp, SImode, offset + 1);
27011
27012 /* Compute offset from the end of the jmp to the target function.
27013 In the case in which the trampoline stores the static chain on
27014 the stack, we need to skip the first insn which pushes the
27015 (call-saved) register static chain; this push is 1 byte. */
27016 offset += 5;
27017 disp = expand_binop (SImode, sub_optab, fnaddr,
27018 plus_constant (Pmode, XEXP (m_tramp, 0),
27019 offset - (MEM_P (chain) ? 1 : 0)),
27020 NULL_RTX, 1, OPTAB_DIRECT);
27021 emit_move_insn (mem, disp);
27022 }
27023
27024 gcc_assert (offset <= TRAMPOLINE_SIZE);
27025
27026 #ifdef HAVE_ENABLE_EXECUTE_STACK
27027 #ifdef CHECK_EXECUTE_STACK_ENABLED
27028 if (CHECK_EXECUTE_STACK_ENABLED)
27029 #endif
27030 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
27031 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
27032 #endif
27033 }
27034 \f
27035 /* The following file contains several enumerations and data structures
27036 built from the definitions in i386-builtin-types.def. */
27037
27038 #include "i386-builtin-types.inc"
27039
27040 /* Table for the ix86 builtin non-function types. */
27041 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
27042
27043 /* Retrieve an element from the above table, building some of
27044 the types lazily. */
27045
27046 static tree
27047 ix86_get_builtin_type (enum ix86_builtin_type tcode)
27048 {
27049 unsigned int index;
27050 tree type, itype;
27051
27052 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
27053
27054 type = ix86_builtin_type_tab[(int) tcode];
27055 if (type != NULL)
27056 return type;
27057
27058 gcc_assert (tcode > IX86_BT_LAST_PRIM);
27059 if (tcode <= IX86_BT_LAST_VECT)
27060 {
27061 enum machine_mode mode;
27062
27063 index = tcode - IX86_BT_LAST_PRIM - 1;
27064 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
27065 mode = ix86_builtin_type_vect_mode[index];
27066
27067 type = build_vector_type_for_mode (itype, mode);
27068 }
27069 else
27070 {
27071 int quals;
27072
27073 index = tcode - IX86_BT_LAST_VECT - 1;
27074 if (tcode <= IX86_BT_LAST_PTR)
27075 quals = TYPE_UNQUALIFIED;
27076 else
27077 quals = TYPE_QUAL_CONST;
27078
27079 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
27080 if (quals != TYPE_UNQUALIFIED)
27081 itype = build_qualified_type (itype, quals);
27082
27083 type = build_pointer_type (itype);
27084 }
27085
27086 ix86_builtin_type_tab[(int) tcode] = type;
27087 return type;
27088 }
27089
27090 /* Table for the ix86 builtin function types. */
27091 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
27092
27093 /* Retrieve an element from the above table, building some of
27094 the types lazily. */
27095
27096 static tree
27097 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
27098 {
27099 tree type;
27100
27101 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
27102
27103 type = ix86_builtin_func_type_tab[(int) tcode];
27104 if (type != NULL)
27105 return type;
27106
27107 if (tcode <= IX86_BT_LAST_FUNC)
27108 {
27109 unsigned start = ix86_builtin_func_start[(int) tcode];
27110 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
27111 tree rtype, atype, args = void_list_node;
27112 unsigned i;
27113
27114 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
27115 for (i = after - 1; i > start; --i)
27116 {
27117 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
27118 args = tree_cons (NULL, atype, args);
27119 }
27120
27121 type = build_function_type (rtype, args);
27122 }
27123 else
27124 {
27125 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
27126 enum ix86_builtin_func_type icode;
27127
27128 icode = ix86_builtin_func_alias_base[index];
27129 type = ix86_get_builtin_func_type (icode);
27130 }
27131
27132 ix86_builtin_func_type_tab[(int) tcode] = type;
27133 return type;
27134 }
27135
27136
27137 /* Codes for all the SSE/MMX builtins. */
27138 enum ix86_builtins
27139 {
27140 IX86_BUILTIN_ADDPS,
27141 IX86_BUILTIN_ADDSS,
27142 IX86_BUILTIN_DIVPS,
27143 IX86_BUILTIN_DIVSS,
27144 IX86_BUILTIN_MULPS,
27145 IX86_BUILTIN_MULSS,
27146 IX86_BUILTIN_SUBPS,
27147 IX86_BUILTIN_SUBSS,
27148
27149 IX86_BUILTIN_CMPEQPS,
27150 IX86_BUILTIN_CMPLTPS,
27151 IX86_BUILTIN_CMPLEPS,
27152 IX86_BUILTIN_CMPGTPS,
27153 IX86_BUILTIN_CMPGEPS,
27154 IX86_BUILTIN_CMPNEQPS,
27155 IX86_BUILTIN_CMPNLTPS,
27156 IX86_BUILTIN_CMPNLEPS,
27157 IX86_BUILTIN_CMPNGTPS,
27158 IX86_BUILTIN_CMPNGEPS,
27159 IX86_BUILTIN_CMPORDPS,
27160 IX86_BUILTIN_CMPUNORDPS,
27161 IX86_BUILTIN_CMPEQSS,
27162 IX86_BUILTIN_CMPLTSS,
27163 IX86_BUILTIN_CMPLESS,
27164 IX86_BUILTIN_CMPNEQSS,
27165 IX86_BUILTIN_CMPNLTSS,
27166 IX86_BUILTIN_CMPNLESS,
27167 IX86_BUILTIN_CMPORDSS,
27168 IX86_BUILTIN_CMPUNORDSS,
27169
27170 IX86_BUILTIN_COMIEQSS,
27171 IX86_BUILTIN_COMILTSS,
27172 IX86_BUILTIN_COMILESS,
27173 IX86_BUILTIN_COMIGTSS,
27174 IX86_BUILTIN_COMIGESS,
27175 IX86_BUILTIN_COMINEQSS,
27176 IX86_BUILTIN_UCOMIEQSS,
27177 IX86_BUILTIN_UCOMILTSS,
27178 IX86_BUILTIN_UCOMILESS,
27179 IX86_BUILTIN_UCOMIGTSS,
27180 IX86_BUILTIN_UCOMIGESS,
27181 IX86_BUILTIN_UCOMINEQSS,
27182
27183 IX86_BUILTIN_CVTPI2PS,
27184 IX86_BUILTIN_CVTPS2PI,
27185 IX86_BUILTIN_CVTSI2SS,
27186 IX86_BUILTIN_CVTSI642SS,
27187 IX86_BUILTIN_CVTSS2SI,
27188 IX86_BUILTIN_CVTSS2SI64,
27189 IX86_BUILTIN_CVTTPS2PI,
27190 IX86_BUILTIN_CVTTSS2SI,
27191 IX86_BUILTIN_CVTTSS2SI64,
27192
27193 IX86_BUILTIN_MAXPS,
27194 IX86_BUILTIN_MAXSS,
27195 IX86_BUILTIN_MINPS,
27196 IX86_BUILTIN_MINSS,
27197
27198 IX86_BUILTIN_LOADUPS,
27199 IX86_BUILTIN_STOREUPS,
27200 IX86_BUILTIN_MOVSS,
27201
27202 IX86_BUILTIN_MOVHLPS,
27203 IX86_BUILTIN_MOVLHPS,
27204 IX86_BUILTIN_LOADHPS,
27205 IX86_BUILTIN_LOADLPS,
27206 IX86_BUILTIN_STOREHPS,
27207 IX86_BUILTIN_STORELPS,
27208
27209 IX86_BUILTIN_MASKMOVQ,
27210 IX86_BUILTIN_MOVMSKPS,
27211 IX86_BUILTIN_PMOVMSKB,
27212
27213 IX86_BUILTIN_MOVNTPS,
27214 IX86_BUILTIN_MOVNTQ,
27215
27216 IX86_BUILTIN_LOADDQU,
27217 IX86_BUILTIN_STOREDQU,
27218
27219 IX86_BUILTIN_PACKSSWB,
27220 IX86_BUILTIN_PACKSSDW,
27221 IX86_BUILTIN_PACKUSWB,
27222
27223 IX86_BUILTIN_PADDB,
27224 IX86_BUILTIN_PADDW,
27225 IX86_BUILTIN_PADDD,
27226 IX86_BUILTIN_PADDQ,
27227 IX86_BUILTIN_PADDSB,
27228 IX86_BUILTIN_PADDSW,
27229 IX86_BUILTIN_PADDUSB,
27230 IX86_BUILTIN_PADDUSW,
27231 IX86_BUILTIN_PSUBB,
27232 IX86_BUILTIN_PSUBW,
27233 IX86_BUILTIN_PSUBD,
27234 IX86_BUILTIN_PSUBQ,
27235 IX86_BUILTIN_PSUBSB,
27236 IX86_BUILTIN_PSUBSW,
27237 IX86_BUILTIN_PSUBUSB,
27238 IX86_BUILTIN_PSUBUSW,
27239
27240 IX86_BUILTIN_PAND,
27241 IX86_BUILTIN_PANDN,
27242 IX86_BUILTIN_POR,
27243 IX86_BUILTIN_PXOR,
27244
27245 IX86_BUILTIN_PAVGB,
27246 IX86_BUILTIN_PAVGW,
27247
27248 IX86_BUILTIN_PCMPEQB,
27249 IX86_BUILTIN_PCMPEQW,
27250 IX86_BUILTIN_PCMPEQD,
27251 IX86_BUILTIN_PCMPGTB,
27252 IX86_BUILTIN_PCMPGTW,
27253 IX86_BUILTIN_PCMPGTD,
27254
27255 IX86_BUILTIN_PMADDWD,
27256
27257 IX86_BUILTIN_PMAXSW,
27258 IX86_BUILTIN_PMAXUB,
27259 IX86_BUILTIN_PMINSW,
27260 IX86_BUILTIN_PMINUB,
27261
27262 IX86_BUILTIN_PMULHUW,
27263 IX86_BUILTIN_PMULHW,
27264 IX86_BUILTIN_PMULLW,
27265
27266 IX86_BUILTIN_PSADBW,
27267 IX86_BUILTIN_PSHUFW,
27268
27269 IX86_BUILTIN_PSLLW,
27270 IX86_BUILTIN_PSLLD,
27271 IX86_BUILTIN_PSLLQ,
27272 IX86_BUILTIN_PSRAW,
27273 IX86_BUILTIN_PSRAD,
27274 IX86_BUILTIN_PSRLW,
27275 IX86_BUILTIN_PSRLD,
27276 IX86_BUILTIN_PSRLQ,
27277 IX86_BUILTIN_PSLLWI,
27278 IX86_BUILTIN_PSLLDI,
27279 IX86_BUILTIN_PSLLQI,
27280 IX86_BUILTIN_PSRAWI,
27281 IX86_BUILTIN_PSRADI,
27282 IX86_BUILTIN_PSRLWI,
27283 IX86_BUILTIN_PSRLDI,
27284 IX86_BUILTIN_PSRLQI,
27285
27286 IX86_BUILTIN_PUNPCKHBW,
27287 IX86_BUILTIN_PUNPCKHWD,
27288 IX86_BUILTIN_PUNPCKHDQ,
27289 IX86_BUILTIN_PUNPCKLBW,
27290 IX86_BUILTIN_PUNPCKLWD,
27291 IX86_BUILTIN_PUNPCKLDQ,
27292
27293 IX86_BUILTIN_SHUFPS,
27294
27295 IX86_BUILTIN_RCPPS,
27296 IX86_BUILTIN_RCPSS,
27297 IX86_BUILTIN_RSQRTPS,
27298 IX86_BUILTIN_RSQRTPS_NR,
27299 IX86_BUILTIN_RSQRTSS,
27300 IX86_BUILTIN_RSQRTF,
27301 IX86_BUILTIN_SQRTPS,
27302 IX86_BUILTIN_SQRTPS_NR,
27303 IX86_BUILTIN_SQRTSS,
27304
27305 IX86_BUILTIN_UNPCKHPS,
27306 IX86_BUILTIN_UNPCKLPS,
27307
27308 IX86_BUILTIN_ANDPS,
27309 IX86_BUILTIN_ANDNPS,
27310 IX86_BUILTIN_ORPS,
27311 IX86_BUILTIN_XORPS,
27312
27313 IX86_BUILTIN_EMMS,
27314 IX86_BUILTIN_LDMXCSR,
27315 IX86_BUILTIN_STMXCSR,
27316 IX86_BUILTIN_SFENCE,
27317
27318 IX86_BUILTIN_FXSAVE,
27319 IX86_BUILTIN_FXRSTOR,
27320 IX86_BUILTIN_FXSAVE64,
27321 IX86_BUILTIN_FXRSTOR64,
27322
27323 IX86_BUILTIN_XSAVE,
27324 IX86_BUILTIN_XRSTOR,
27325 IX86_BUILTIN_XSAVE64,
27326 IX86_BUILTIN_XRSTOR64,
27327
27328 IX86_BUILTIN_XSAVEOPT,
27329 IX86_BUILTIN_XSAVEOPT64,
27330
27331 IX86_BUILTIN_XSAVEC,
27332 IX86_BUILTIN_XSAVEC64,
27333
27334 IX86_BUILTIN_XSAVES,
27335 IX86_BUILTIN_XRSTORS,
27336 IX86_BUILTIN_XSAVES64,
27337 IX86_BUILTIN_XRSTORS64,
27338
27339 /* 3DNow! Original */
27340 IX86_BUILTIN_FEMMS,
27341 IX86_BUILTIN_PAVGUSB,
27342 IX86_BUILTIN_PF2ID,
27343 IX86_BUILTIN_PFACC,
27344 IX86_BUILTIN_PFADD,
27345 IX86_BUILTIN_PFCMPEQ,
27346 IX86_BUILTIN_PFCMPGE,
27347 IX86_BUILTIN_PFCMPGT,
27348 IX86_BUILTIN_PFMAX,
27349 IX86_BUILTIN_PFMIN,
27350 IX86_BUILTIN_PFMUL,
27351 IX86_BUILTIN_PFRCP,
27352 IX86_BUILTIN_PFRCPIT1,
27353 IX86_BUILTIN_PFRCPIT2,
27354 IX86_BUILTIN_PFRSQIT1,
27355 IX86_BUILTIN_PFRSQRT,
27356 IX86_BUILTIN_PFSUB,
27357 IX86_BUILTIN_PFSUBR,
27358 IX86_BUILTIN_PI2FD,
27359 IX86_BUILTIN_PMULHRW,
27360
27361 /* 3DNow! Athlon Extensions */
27362 IX86_BUILTIN_PF2IW,
27363 IX86_BUILTIN_PFNACC,
27364 IX86_BUILTIN_PFPNACC,
27365 IX86_BUILTIN_PI2FW,
27366 IX86_BUILTIN_PSWAPDSI,
27367 IX86_BUILTIN_PSWAPDSF,
27368
27369 /* SSE2 */
27370 IX86_BUILTIN_ADDPD,
27371 IX86_BUILTIN_ADDSD,
27372 IX86_BUILTIN_DIVPD,
27373 IX86_BUILTIN_DIVSD,
27374 IX86_BUILTIN_MULPD,
27375 IX86_BUILTIN_MULSD,
27376 IX86_BUILTIN_SUBPD,
27377 IX86_BUILTIN_SUBSD,
27378
27379 IX86_BUILTIN_CMPEQPD,
27380 IX86_BUILTIN_CMPLTPD,
27381 IX86_BUILTIN_CMPLEPD,
27382 IX86_BUILTIN_CMPGTPD,
27383 IX86_BUILTIN_CMPGEPD,
27384 IX86_BUILTIN_CMPNEQPD,
27385 IX86_BUILTIN_CMPNLTPD,
27386 IX86_BUILTIN_CMPNLEPD,
27387 IX86_BUILTIN_CMPNGTPD,
27388 IX86_BUILTIN_CMPNGEPD,
27389 IX86_BUILTIN_CMPORDPD,
27390 IX86_BUILTIN_CMPUNORDPD,
27391 IX86_BUILTIN_CMPEQSD,
27392 IX86_BUILTIN_CMPLTSD,
27393 IX86_BUILTIN_CMPLESD,
27394 IX86_BUILTIN_CMPNEQSD,
27395 IX86_BUILTIN_CMPNLTSD,
27396 IX86_BUILTIN_CMPNLESD,
27397 IX86_BUILTIN_CMPORDSD,
27398 IX86_BUILTIN_CMPUNORDSD,
27399
27400 IX86_BUILTIN_COMIEQSD,
27401 IX86_BUILTIN_COMILTSD,
27402 IX86_BUILTIN_COMILESD,
27403 IX86_BUILTIN_COMIGTSD,
27404 IX86_BUILTIN_COMIGESD,
27405 IX86_BUILTIN_COMINEQSD,
27406 IX86_BUILTIN_UCOMIEQSD,
27407 IX86_BUILTIN_UCOMILTSD,
27408 IX86_BUILTIN_UCOMILESD,
27409 IX86_BUILTIN_UCOMIGTSD,
27410 IX86_BUILTIN_UCOMIGESD,
27411 IX86_BUILTIN_UCOMINEQSD,
27412
27413 IX86_BUILTIN_MAXPD,
27414 IX86_BUILTIN_MAXSD,
27415 IX86_BUILTIN_MINPD,
27416 IX86_BUILTIN_MINSD,
27417
27418 IX86_BUILTIN_ANDPD,
27419 IX86_BUILTIN_ANDNPD,
27420 IX86_BUILTIN_ORPD,
27421 IX86_BUILTIN_XORPD,
27422
27423 IX86_BUILTIN_SQRTPD,
27424 IX86_BUILTIN_SQRTSD,
27425
27426 IX86_BUILTIN_UNPCKHPD,
27427 IX86_BUILTIN_UNPCKLPD,
27428
27429 IX86_BUILTIN_SHUFPD,
27430
27431 IX86_BUILTIN_LOADUPD,
27432 IX86_BUILTIN_STOREUPD,
27433 IX86_BUILTIN_MOVSD,
27434
27435 IX86_BUILTIN_LOADHPD,
27436 IX86_BUILTIN_LOADLPD,
27437
27438 IX86_BUILTIN_CVTDQ2PD,
27439 IX86_BUILTIN_CVTDQ2PS,
27440
27441 IX86_BUILTIN_CVTPD2DQ,
27442 IX86_BUILTIN_CVTPD2PI,
27443 IX86_BUILTIN_CVTPD2PS,
27444 IX86_BUILTIN_CVTTPD2DQ,
27445 IX86_BUILTIN_CVTTPD2PI,
27446
27447 IX86_BUILTIN_CVTPI2PD,
27448 IX86_BUILTIN_CVTSI2SD,
27449 IX86_BUILTIN_CVTSI642SD,
27450
27451 IX86_BUILTIN_CVTSD2SI,
27452 IX86_BUILTIN_CVTSD2SI64,
27453 IX86_BUILTIN_CVTSD2SS,
27454 IX86_BUILTIN_CVTSS2SD,
27455 IX86_BUILTIN_CVTTSD2SI,
27456 IX86_BUILTIN_CVTTSD2SI64,
27457
27458 IX86_BUILTIN_CVTPS2DQ,
27459 IX86_BUILTIN_CVTPS2PD,
27460 IX86_BUILTIN_CVTTPS2DQ,
27461
27462 IX86_BUILTIN_MOVNTI,
27463 IX86_BUILTIN_MOVNTI64,
27464 IX86_BUILTIN_MOVNTPD,
27465 IX86_BUILTIN_MOVNTDQ,
27466
27467 IX86_BUILTIN_MOVQ128,
27468
27469 /* SSE2 MMX */
27470 IX86_BUILTIN_MASKMOVDQU,
27471 IX86_BUILTIN_MOVMSKPD,
27472 IX86_BUILTIN_PMOVMSKB128,
27473
27474 IX86_BUILTIN_PACKSSWB128,
27475 IX86_BUILTIN_PACKSSDW128,
27476 IX86_BUILTIN_PACKUSWB128,
27477
27478 IX86_BUILTIN_PADDB128,
27479 IX86_BUILTIN_PADDW128,
27480 IX86_BUILTIN_PADDD128,
27481 IX86_BUILTIN_PADDQ128,
27482 IX86_BUILTIN_PADDSB128,
27483 IX86_BUILTIN_PADDSW128,
27484 IX86_BUILTIN_PADDUSB128,
27485 IX86_BUILTIN_PADDUSW128,
27486 IX86_BUILTIN_PSUBB128,
27487 IX86_BUILTIN_PSUBW128,
27488 IX86_BUILTIN_PSUBD128,
27489 IX86_BUILTIN_PSUBQ128,
27490 IX86_BUILTIN_PSUBSB128,
27491 IX86_BUILTIN_PSUBSW128,
27492 IX86_BUILTIN_PSUBUSB128,
27493 IX86_BUILTIN_PSUBUSW128,
27494
27495 IX86_BUILTIN_PAND128,
27496 IX86_BUILTIN_PANDN128,
27497 IX86_BUILTIN_POR128,
27498 IX86_BUILTIN_PXOR128,
27499
27500 IX86_BUILTIN_PAVGB128,
27501 IX86_BUILTIN_PAVGW128,
27502
27503 IX86_BUILTIN_PCMPEQB128,
27504 IX86_BUILTIN_PCMPEQW128,
27505 IX86_BUILTIN_PCMPEQD128,
27506 IX86_BUILTIN_PCMPGTB128,
27507 IX86_BUILTIN_PCMPGTW128,
27508 IX86_BUILTIN_PCMPGTD128,
27509
27510 IX86_BUILTIN_PMADDWD128,
27511
27512 IX86_BUILTIN_PMAXSW128,
27513 IX86_BUILTIN_PMAXUB128,
27514 IX86_BUILTIN_PMINSW128,
27515 IX86_BUILTIN_PMINUB128,
27516
27517 IX86_BUILTIN_PMULUDQ,
27518 IX86_BUILTIN_PMULUDQ128,
27519 IX86_BUILTIN_PMULHUW128,
27520 IX86_BUILTIN_PMULHW128,
27521 IX86_BUILTIN_PMULLW128,
27522
27523 IX86_BUILTIN_PSADBW128,
27524 IX86_BUILTIN_PSHUFHW,
27525 IX86_BUILTIN_PSHUFLW,
27526 IX86_BUILTIN_PSHUFD,
27527
27528 IX86_BUILTIN_PSLLDQI128,
27529 IX86_BUILTIN_PSLLWI128,
27530 IX86_BUILTIN_PSLLDI128,
27531 IX86_BUILTIN_PSLLQI128,
27532 IX86_BUILTIN_PSRAWI128,
27533 IX86_BUILTIN_PSRADI128,
27534 IX86_BUILTIN_PSRLDQI128,
27535 IX86_BUILTIN_PSRLWI128,
27536 IX86_BUILTIN_PSRLDI128,
27537 IX86_BUILTIN_PSRLQI128,
27538
27539 IX86_BUILTIN_PSLLDQ128,
27540 IX86_BUILTIN_PSLLW128,
27541 IX86_BUILTIN_PSLLD128,
27542 IX86_BUILTIN_PSLLQ128,
27543 IX86_BUILTIN_PSRAW128,
27544 IX86_BUILTIN_PSRAD128,
27545 IX86_BUILTIN_PSRLW128,
27546 IX86_BUILTIN_PSRLD128,
27547 IX86_BUILTIN_PSRLQ128,
27548
27549 IX86_BUILTIN_PUNPCKHBW128,
27550 IX86_BUILTIN_PUNPCKHWD128,
27551 IX86_BUILTIN_PUNPCKHDQ128,
27552 IX86_BUILTIN_PUNPCKHQDQ128,
27553 IX86_BUILTIN_PUNPCKLBW128,
27554 IX86_BUILTIN_PUNPCKLWD128,
27555 IX86_BUILTIN_PUNPCKLDQ128,
27556 IX86_BUILTIN_PUNPCKLQDQ128,
27557
27558 IX86_BUILTIN_CLFLUSH,
27559 IX86_BUILTIN_MFENCE,
27560 IX86_BUILTIN_LFENCE,
27561 IX86_BUILTIN_PAUSE,
27562
27563 IX86_BUILTIN_FNSTENV,
27564 IX86_BUILTIN_FLDENV,
27565 IX86_BUILTIN_FNSTSW,
27566 IX86_BUILTIN_FNCLEX,
27567
27568 IX86_BUILTIN_BSRSI,
27569 IX86_BUILTIN_BSRDI,
27570 IX86_BUILTIN_RDPMC,
27571 IX86_BUILTIN_RDTSC,
27572 IX86_BUILTIN_RDTSCP,
27573 IX86_BUILTIN_ROLQI,
27574 IX86_BUILTIN_ROLHI,
27575 IX86_BUILTIN_RORQI,
27576 IX86_BUILTIN_RORHI,
27577
27578 /* SSE3. */
27579 IX86_BUILTIN_ADDSUBPS,
27580 IX86_BUILTIN_HADDPS,
27581 IX86_BUILTIN_HSUBPS,
27582 IX86_BUILTIN_MOVSHDUP,
27583 IX86_BUILTIN_MOVSLDUP,
27584 IX86_BUILTIN_ADDSUBPD,
27585 IX86_BUILTIN_HADDPD,
27586 IX86_BUILTIN_HSUBPD,
27587 IX86_BUILTIN_LDDQU,
27588
27589 IX86_BUILTIN_MONITOR,
27590 IX86_BUILTIN_MWAIT,
27591
27592 /* SSSE3. */
27593 IX86_BUILTIN_PHADDW,
27594 IX86_BUILTIN_PHADDD,
27595 IX86_BUILTIN_PHADDSW,
27596 IX86_BUILTIN_PHSUBW,
27597 IX86_BUILTIN_PHSUBD,
27598 IX86_BUILTIN_PHSUBSW,
27599 IX86_BUILTIN_PMADDUBSW,
27600 IX86_BUILTIN_PMULHRSW,
27601 IX86_BUILTIN_PSHUFB,
27602 IX86_BUILTIN_PSIGNB,
27603 IX86_BUILTIN_PSIGNW,
27604 IX86_BUILTIN_PSIGND,
27605 IX86_BUILTIN_PALIGNR,
27606 IX86_BUILTIN_PABSB,
27607 IX86_BUILTIN_PABSW,
27608 IX86_BUILTIN_PABSD,
27609
27610 IX86_BUILTIN_PHADDW128,
27611 IX86_BUILTIN_PHADDD128,
27612 IX86_BUILTIN_PHADDSW128,
27613 IX86_BUILTIN_PHSUBW128,
27614 IX86_BUILTIN_PHSUBD128,
27615 IX86_BUILTIN_PHSUBSW128,
27616 IX86_BUILTIN_PMADDUBSW128,
27617 IX86_BUILTIN_PMULHRSW128,
27618 IX86_BUILTIN_PSHUFB128,
27619 IX86_BUILTIN_PSIGNB128,
27620 IX86_BUILTIN_PSIGNW128,
27621 IX86_BUILTIN_PSIGND128,
27622 IX86_BUILTIN_PALIGNR128,
27623 IX86_BUILTIN_PABSB128,
27624 IX86_BUILTIN_PABSW128,
27625 IX86_BUILTIN_PABSD128,
27626
27627 /* AMDFAM10 - SSE4A New Instructions. */
27628 IX86_BUILTIN_MOVNTSD,
27629 IX86_BUILTIN_MOVNTSS,
27630 IX86_BUILTIN_EXTRQI,
27631 IX86_BUILTIN_EXTRQ,
27632 IX86_BUILTIN_INSERTQI,
27633 IX86_BUILTIN_INSERTQ,
27634
27635 /* SSE4.1. */
27636 IX86_BUILTIN_BLENDPD,
27637 IX86_BUILTIN_BLENDPS,
27638 IX86_BUILTIN_BLENDVPD,
27639 IX86_BUILTIN_BLENDVPS,
27640 IX86_BUILTIN_PBLENDVB128,
27641 IX86_BUILTIN_PBLENDW128,
27642
27643 IX86_BUILTIN_DPPD,
27644 IX86_BUILTIN_DPPS,
27645
27646 IX86_BUILTIN_INSERTPS128,
27647
27648 IX86_BUILTIN_MOVNTDQA,
27649 IX86_BUILTIN_MPSADBW128,
27650 IX86_BUILTIN_PACKUSDW128,
27651 IX86_BUILTIN_PCMPEQQ,
27652 IX86_BUILTIN_PHMINPOSUW128,
27653
27654 IX86_BUILTIN_PMAXSB128,
27655 IX86_BUILTIN_PMAXSD128,
27656 IX86_BUILTIN_PMAXUD128,
27657 IX86_BUILTIN_PMAXUW128,
27658
27659 IX86_BUILTIN_PMINSB128,
27660 IX86_BUILTIN_PMINSD128,
27661 IX86_BUILTIN_PMINUD128,
27662 IX86_BUILTIN_PMINUW128,
27663
27664 IX86_BUILTIN_PMOVSXBW128,
27665 IX86_BUILTIN_PMOVSXBD128,
27666 IX86_BUILTIN_PMOVSXBQ128,
27667 IX86_BUILTIN_PMOVSXWD128,
27668 IX86_BUILTIN_PMOVSXWQ128,
27669 IX86_BUILTIN_PMOVSXDQ128,
27670
27671 IX86_BUILTIN_PMOVZXBW128,
27672 IX86_BUILTIN_PMOVZXBD128,
27673 IX86_BUILTIN_PMOVZXBQ128,
27674 IX86_BUILTIN_PMOVZXWD128,
27675 IX86_BUILTIN_PMOVZXWQ128,
27676 IX86_BUILTIN_PMOVZXDQ128,
27677
27678 IX86_BUILTIN_PMULDQ128,
27679 IX86_BUILTIN_PMULLD128,
27680
27681 IX86_BUILTIN_ROUNDSD,
27682 IX86_BUILTIN_ROUNDSS,
27683
27684 IX86_BUILTIN_ROUNDPD,
27685 IX86_BUILTIN_ROUNDPS,
27686
27687 IX86_BUILTIN_FLOORPD,
27688 IX86_BUILTIN_CEILPD,
27689 IX86_BUILTIN_TRUNCPD,
27690 IX86_BUILTIN_RINTPD,
27691 IX86_BUILTIN_ROUNDPD_AZ,
27692
27693 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX,
27694 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX,
27695 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX,
27696
27697 IX86_BUILTIN_FLOORPS,
27698 IX86_BUILTIN_CEILPS,
27699 IX86_BUILTIN_TRUNCPS,
27700 IX86_BUILTIN_RINTPS,
27701 IX86_BUILTIN_ROUNDPS_AZ,
27702
27703 IX86_BUILTIN_FLOORPS_SFIX,
27704 IX86_BUILTIN_CEILPS_SFIX,
27705 IX86_BUILTIN_ROUNDPS_AZ_SFIX,
27706
27707 IX86_BUILTIN_PTESTZ,
27708 IX86_BUILTIN_PTESTC,
27709 IX86_BUILTIN_PTESTNZC,
27710
27711 IX86_BUILTIN_VEC_INIT_V2SI,
27712 IX86_BUILTIN_VEC_INIT_V4HI,
27713 IX86_BUILTIN_VEC_INIT_V8QI,
27714 IX86_BUILTIN_VEC_EXT_V2DF,
27715 IX86_BUILTIN_VEC_EXT_V2DI,
27716 IX86_BUILTIN_VEC_EXT_V4SF,
27717 IX86_BUILTIN_VEC_EXT_V4SI,
27718 IX86_BUILTIN_VEC_EXT_V8HI,
27719 IX86_BUILTIN_VEC_EXT_V2SI,
27720 IX86_BUILTIN_VEC_EXT_V4HI,
27721 IX86_BUILTIN_VEC_EXT_V16QI,
27722 IX86_BUILTIN_VEC_SET_V2DI,
27723 IX86_BUILTIN_VEC_SET_V4SF,
27724 IX86_BUILTIN_VEC_SET_V4SI,
27725 IX86_BUILTIN_VEC_SET_V8HI,
27726 IX86_BUILTIN_VEC_SET_V4HI,
27727 IX86_BUILTIN_VEC_SET_V16QI,
27728
27729 IX86_BUILTIN_VEC_PACK_SFIX,
27730 IX86_BUILTIN_VEC_PACK_SFIX256,
27731
27732 /* SSE4.2. */
27733 IX86_BUILTIN_CRC32QI,
27734 IX86_BUILTIN_CRC32HI,
27735 IX86_BUILTIN_CRC32SI,
27736 IX86_BUILTIN_CRC32DI,
27737
27738 IX86_BUILTIN_PCMPESTRI128,
27739 IX86_BUILTIN_PCMPESTRM128,
27740 IX86_BUILTIN_PCMPESTRA128,
27741 IX86_BUILTIN_PCMPESTRC128,
27742 IX86_BUILTIN_PCMPESTRO128,
27743 IX86_BUILTIN_PCMPESTRS128,
27744 IX86_BUILTIN_PCMPESTRZ128,
27745 IX86_BUILTIN_PCMPISTRI128,
27746 IX86_BUILTIN_PCMPISTRM128,
27747 IX86_BUILTIN_PCMPISTRA128,
27748 IX86_BUILTIN_PCMPISTRC128,
27749 IX86_BUILTIN_PCMPISTRO128,
27750 IX86_BUILTIN_PCMPISTRS128,
27751 IX86_BUILTIN_PCMPISTRZ128,
27752
27753 IX86_BUILTIN_PCMPGTQ,
27754
27755 /* AES instructions */
27756 IX86_BUILTIN_AESENC128,
27757 IX86_BUILTIN_AESENCLAST128,
27758 IX86_BUILTIN_AESDEC128,
27759 IX86_BUILTIN_AESDECLAST128,
27760 IX86_BUILTIN_AESIMC128,
27761 IX86_BUILTIN_AESKEYGENASSIST128,
27762
27763 /* PCLMUL instruction */
27764 IX86_BUILTIN_PCLMULQDQ128,
27765
27766 /* AVX */
27767 IX86_BUILTIN_ADDPD256,
27768 IX86_BUILTIN_ADDPS256,
27769 IX86_BUILTIN_ADDSUBPD256,
27770 IX86_BUILTIN_ADDSUBPS256,
27771 IX86_BUILTIN_ANDPD256,
27772 IX86_BUILTIN_ANDPS256,
27773 IX86_BUILTIN_ANDNPD256,
27774 IX86_BUILTIN_ANDNPS256,
27775 IX86_BUILTIN_BLENDPD256,
27776 IX86_BUILTIN_BLENDPS256,
27777 IX86_BUILTIN_BLENDVPD256,
27778 IX86_BUILTIN_BLENDVPS256,
27779 IX86_BUILTIN_DIVPD256,
27780 IX86_BUILTIN_DIVPS256,
27781 IX86_BUILTIN_DPPS256,
27782 IX86_BUILTIN_HADDPD256,
27783 IX86_BUILTIN_HADDPS256,
27784 IX86_BUILTIN_HSUBPD256,
27785 IX86_BUILTIN_HSUBPS256,
27786 IX86_BUILTIN_MAXPD256,
27787 IX86_BUILTIN_MAXPS256,
27788 IX86_BUILTIN_MINPD256,
27789 IX86_BUILTIN_MINPS256,
27790 IX86_BUILTIN_MULPD256,
27791 IX86_BUILTIN_MULPS256,
27792 IX86_BUILTIN_ORPD256,
27793 IX86_BUILTIN_ORPS256,
27794 IX86_BUILTIN_SHUFPD256,
27795 IX86_BUILTIN_SHUFPS256,
27796 IX86_BUILTIN_SUBPD256,
27797 IX86_BUILTIN_SUBPS256,
27798 IX86_BUILTIN_XORPD256,
27799 IX86_BUILTIN_XORPS256,
27800 IX86_BUILTIN_CMPSD,
27801 IX86_BUILTIN_CMPSS,
27802 IX86_BUILTIN_CMPPD,
27803 IX86_BUILTIN_CMPPS,
27804 IX86_BUILTIN_CMPPD256,
27805 IX86_BUILTIN_CMPPS256,
27806 IX86_BUILTIN_CVTDQ2PD256,
27807 IX86_BUILTIN_CVTDQ2PS256,
27808 IX86_BUILTIN_CVTPD2PS256,
27809 IX86_BUILTIN_CVTPS2DQ256,
27810 IX86_BUILTIN_CVTPS2PD256,
27811 IX86_BUILTIN_CVTTPD2DQ256,
27812 IX86_BUILTIN_CVTPD2DQ256,
27813 IX86_BUILTIN_CVTTPS2DQ256,
27814 IX86_BUILTIN_EXTRACTF128PD256,
27815 IX86_BUILTIN_EXTRACTF128PS256,
27816 IX86_BUILTIN_EXTRACTF128SI256,
27817 IX86_BUILTIN_VZEROALL,
27818 IX86_BUILTIN_VZEROUPPER,
27819 IX86_BUILTIN_VPERMILVARPD,
27820 IX86_BUILTIN_VPERMILVARPS,
27821 IX86_BUILTIN_VPERMILVARPD256,
27822 IX86_BUILTIN_VPERMILVARPS256,
27823 IX86_BUILTIN_VPERMILPD,
27824 IX86_BUILTIN_VPERMILPS,
27825 IX86_BUILTIN_VPERMILPD256,
27826 IX86_BUILTIN_VPERMILPS256,
27827 IX86_BUILTIN_VPERMIL2PD,
27828 IX86_BUILTIN_VPERMIL2PS,
27829 IX86_BUILTIN_VPERMIL2PD256,
27830 IX86_BUILTIN_VPERMIL2PS256,
27831 IX86_BUILTIN_VPERM2F128PD256,
27832 IX86_BUILTIN_VPERM2F128PS256,
27833 IX86_BUILTIN_VPERM2F128SI256,
27834 IX86_BUILTIN_VBROADCASTSS,
27835 IX86_BUILTIN_VBROADCASTSD256,
27836 IX86_BUILTIN_VBROADCASTSS256,
27837 IX86_BUILTIN_VBROADCASTPD256,
27838 IX86_BUILTIN_VBROADCASTPS256,
27839 IX86_BUILTIN_VINSERTF128PD256,
27840 IX86_BUILTIN_VINSERTF128PS256,
27841 IX86_BUILTIN_VINSERTF128SI256,
27842 IX86_BUILTIN_LOADUPD256,
27843 IX86_BUILTIN_LOADUPS256,
27844 IX86_BUILTIN_STOREUPD256,
27845 IX86_BUILTIN_STOREUPS256,
27846 IX86_BUILTIN_LDDQU256,
27847 IX86_BUILTIN_MOVNTDQ256,
27848 IX86_BUILTIN_MOVNTPD256,
27849 IX86_BUILTIN_MOVNTPS256,
27850 IX86_BUILTIN_LOADDQU256,
27851 IX86_BUILTIN_STOREDQU256,
27852 IX86_BUILTIN_MASKLOADPD,
27853 IX86_BUILTIN_MASKLOADPS,
27854 IX86_BUILTIN_MASKSTOREPD,
27855 IX86_BUILTIN_MASKSTOREPS,
27856 IX86_BUILTIN_MASKLOADPD256,
27857 IX86_BUILTIN_MASKLOADPS256,
27858 IX86_BUILTIN_MASKSTOREPD256,
27859 IX86_BUILTIN_MASKSTOREPS256,
27860 IX86_BUILTIN_MOVSHDUP256,
27861 IX86_BUILTIN_MOVSLDUP256,
27862 IX86_BUILTIN_MOVDDUP256,
27863
27864 IX86_BUILTIN_SQRTPD256,
27865 IX86_BUILTIN_SQRTPS256,
27866 IX86_BUILTIN_SQRTPS_NR256,
27867 IX86_BUILTIN_RSQRTPS256,
27868 IX86_BUILTIN_RSQRTPS_NR256,
27869
27870 IX86_BUILTIN_RCPPS256,
27871
27872 IX86_BUILTIN_ROUNDPD256,
27873 IX86_BUILTIN_ROUNDPS256,
27874
27875 IX86_BUILTIN_FLOORPD256,
27876 IX86_BUILTIN_CEILPD256,
27877 IX86_BUILTIN_TRUNCPD256,
27878 IX86_BUILTIN_RINTPD256,
27879 IX86_BUILTIN_ROUNDPD_AZ256,
27880
27881 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256,
27882 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256,
27883 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256,
27884
27885 IX86_BUILTIN_FLOORPS256,
27886 IX86_BUILTIN_CEILPS256,
27887 IX86_BUILTIN_TRUNCPS256,
27888 IX86_BUILTIN_RINTPS256,
27889 IX86_BUILTIN_ROUNDPS_AZ256,
27890
27891 IX86_BUILTIN_FLOORPS_SFIX256,
27892 IX86_BUILTIN_CEILPS_SFIX256,
27893 IX86_BUILTIN_ROUNDPS_AZ_SFIX256,
27894
27895 IX86_BUILTIN_UNPCKHPD256,
27896 IX86_BUILTIN_UNPCKLPD256,
27897 IX86_BUILTIN_UNPCKHPS256,
27898 IX86_BUILTIN_UNPCKLPS256,
27899
27900 IX86_BUILTIN_SI256_SI,
27901 IX86_BUILTIN_PS256_PS,
27902 IX86_BUILTIN_PD256_PD,
27903 IX86_BUILTIN_SI_SI256,
27904 IX86_BUILTIN_PS_PS256,
27905 IX86_BUILTIN_PD_PD256,
27906
27907 IX86_BUILTIN_VTESTZPD,
27908 IX86_BUILTIN_VTESTCPD,
27909 IX86_BUILTIN_VTESTNZCPD,
27910 IX86_BUILTIN_VTESTZPS,
27911 IX86_BUILTIN_VTESTCPS,
27912 IX86_BUILTIN_VTESTNZCPS,
27913 IX86_BUILTIN_VTESTZPD256,
27914 IX86_BUILTIN_VTESTCPD256,
27915 IX86_BUILTIN_VTESTNZCPD256,
27916 IX86_BUILTIN_VTESTZPS256,
27917 IX86_BUILTIN_VTESTCPS256,
27918 IX86_BUILTIN_VTESTNZCPS256,
27919 IX86_BUILTIN_PTESTZ256,
27920 IX86_BUILTIN_PTESTC256,
27921 IX86_BUILTIN_PTESTNZC256,
27922
27923 IX86_BUILTIN_MOVMSKPD256,
27924 IX86_BUILTIN_MOVMSKPS256,
27925
27926 /* AVX2 */
27927 IX86_BUILTIN_MPSADBW256,
27928 IX86_BUILTIN_PABSB256,
27929 IX86_BUILTIN_PABSW256,
27930 IX86_BUILTIN_PABSD256,
27931 IX86_BUILTIN_PACKSSDW256,
27932 IX86_BUILTIN_PACKSSWB256,
27933 IX86_BUILTIN_PACKUSDW256,
27934 IX86_BUILTIN_PACKUSWB256,
27935 IX86_BUILTIN_PADDB256,
27936 IX86_BUILTIN_PADDW256,
27937 IX86_BUILTIN_PADDD256,
27938 IX86_BUILTIN_PADDQ256,
27939 IX86_BUILTIN_PADDSB256,
27940 IX86_BUILTIN_PADDSW256,
27941 IX86_BUILTIN_PADDUSB256,
27942 IX86_BUILTIN_PADDUSW256,
27943 IX86_BUILTIN_PALIGNR256,
27944 IX86_BUILTIN_AND256I,
27945 IX86_BUILTIN_ANDNOT256I,
27946 IX86_BUILTIN_PAVGB256,
27947 IX86_BUILTIN_PAVGW256,
27948 IX86_BUILTIN_PBLENDVB256,
27949 IX86_BUILTIN_PBLENDVW256,
27950 IX86_BUILTIN_PCMPEQB256,
27951 IX86_BUILTIN_PCMPEQW256,
27952 IX86_BUILTIN_PCMPEQD256,
27953 IX86_BUILTIN_PCMPEQQ256,
27954 IX86_BUILTIN_PCMPGTB256,
27955 IX86_BUILTIN_PCMPGTW256,
27956 IX86_BUILTIN_PCMPGTD256,
27957 IX86_BUILTIN_PCMPGTQ256,
27958 IX86_BUILTIN_PHADDW256,
27959 IX86_BUILTIN_PHADDD256,
27960 IX86_BUILTIN_PHADDSW256,
27961 IX86_BUILTIN_PHSUBW256,
27962 IX86_BUILTIN_PHSUBD256,
27963 IX86_BUILTIN_PHSUBSW256,
27964 IX86_BUILTIN_PMADDUBSW256,
27965 IX86_BUILTIN_PMADDWD256,
27966 IX86_BUILTIN_PMAXSB256,
27967 IX86_BUILTIN_PMAXSW256,
27968 IX86_BUILTIN_PMAXSD256,
27969 IX86_BUILTIN_PMAXUB256,
27970 IX86_BUILTIN_PMAXUW256,
27971 IX86_BUILTIN_PMAXUD256,
27972 IX86_BUILTIN_PMINSB256,
27973 IX86_BUILTIN_PMINSW256,
27974 IX86_BUILTIN_PMINSD256,
27975 IX86_BUILTIN_PMINUB256,
27976 IX86_BUILTIN_PMINUW256,
27977 IX86_BUILTIN_PMINUD256,
27978 IX86_BUILTIN_PMOVMSKB256,
27979 IX86_BUILTIN_PMOVSXBW256,
27980 IX86_BUILTIN_PMOVSXBD256,
27981 IX86_BUILTIN_PMOVSXBQ256,
27982 IX86_BUILTIN_PMOVSXWD256,
27983 IX86_BUILTIN_PMOVSXWQ256,
27984 IX86_BUILTIN_PMOVSXDQ256,
27985 IX86_BUILTIN_PMOVZXBW256,
27986 IX86_BUILTIN_PMOVZXBD256,
27987 IX86_BUILTIN_PMOVZXBQ256,
27988 IX86_BUILTIN_PMOVZXWD256,
27989 IX86_BUILTIN_PMOVZXWQ256,
27990 IX86_BUILTIN_PMOVZXDQ256,
27991 IX86_BUILTIN_PMULDQ256,
27992 IX86_BUILTIN_PMULHRSW256,
27993 IX86_BUILTIN_PMULHUW256,
27994 IX86_BUILTIN_PMULHW256,
27995 IX86_BUILTIN_PMULLW256,
27996 IX86_BUILTIN_PMULLD256,
27997 IX86_BUILTIN_PMULUDQ256,
27998 IX86_BUILTIN_POR256,
27999 IX86_BUILTIN_PSADBW256,
28000 IX86_BUILTIN_PSHUFB256,
28001 IX86_BUILTIN_PSHUFD256,
28002 IX86_BUILTIN_PSHUFHW256,
28003 IX86_BUILTIN_PSHUFLW256,
28004 IX86_BUILTIN_PSIGNB256,
28005 IX86_BUILTIN_PSIGNW256,
28006 IX86_BUILTIN_PSIGND256,
28007 IX86_BUILTIN_PSLLDQI256,
28008 IX86_BUILTIN_PSLLWI256,
28009 IX86_BUILTIN_PSLLW256,
28010 IX86_BUILTIN_PSLLDI256,
28011 IX86_BUILTIN_PSLLD256,
28012 IX86_BUILTIN_PSLLQI256,
28013 IX86_BUILTIN_PSLLQ256,
28014 IX86_BUILTIN_PSRAWI256,
28015 IX86_BUILTIN_PSRAW256,
28016 IX86_BUILTIN_PSRADI256,
28017 IX86_BUILTIN_PSRAD256,
28018 IX86_BUILTIN_PSRLDQI256,
28019 IX86_BUILTIN_PSRLWI256,
28020 IX86_BUILTIN_PSRLW256,
28021 IX86_BUILTIN_PSRLDI256,
28022 IX86_BUILTIN_PSRLD256,
28023 IX86_BUILTIN_PSRLQI256,
28024 IX86_BUILTIN_PSRLQ256,
28025 IX86_BUILTIN_PSUBB256,
28026 IX86_BUILTIN_PSUBW256,
28027 IX86_BUILTIN_PSUBD256,
28028 IX86_BUILTIN_PSUBQ256,
28029 IX86_BUILTIN_PSUBSB256,
28030 IX86_BUILTIN_PSUBSW256,
28031 IX86_BUILTIN_PSUBUSB256,
28032 IX86_BUILTIN_PSUBUSW256,
28033 IX86_BUILTIN_PUNPCKHBW256,
28034 IX86_BUILTIN_PUNPCKHWD256,
28035 IX86_BUILTIN_PUNPCKHDQ256,
28036 IX86_BUILTIN_PUNPCKHQDQ256,
28037 IX86_BUILTIN_PUNPCKLBW256,
28038 IX86_BUILTIN_PUNPCKLWD256,
28039 IX86_BUILTIN_PUNPCKLDQ256,
28040 IX86_BUILTIN_PUNPCKLQDQ256,
28041 IX86_BUILTIN_PXOR256,
28042 IX86_BUILTIN_MOVNTDQA256,
28043 IX86_BUILTIN_VBROADCASTSS_PS,
28044 IX86_BUILTIN_VBROADCASTSS_PS256,
28045 IX86_BUILTIN_VBROADCASTSD_PD256,
28046 IX86_BUILTIN_VBROADCASTSI256,
28047 IX86_BUILTIN_PBLENDD256,
28048 IX86_BUILTIN_PBLENDD128,
28049 IX86_BUILTIN_PBROADCASTB256,
28050 IX86_BUILTIN_PBROADCASTW256,
28051 IX86_BUILTIN_PBROADCASTD256,
28052 IX86_BUILTIN_PBROADCASTQ256,
28053 IX86_BUILTIN_PBROADCASTB128,
28054 IX86_BUILTIN_PBROADCASTW128,
28055 IX86_BUILTIN_PBROADCASTD128,
28056 IX86_BUILTIN_PBROADCASTQ128,
28057 IX86_BUILTIN_VPERMVARSI256,
28058 IX86_BUILTIN_VPERMDF256,
28059 IX86_BUILTIN_VPERMVARSF256,
28060 IX86_BUILTIN_VPERMDI256,
28061 IX86_BUILTIN_VPERMTI256,
28062 IX86_BUILTIN_VEXTRACT128I256,
28063 IX86_BUILTIN_VINSERT128I256,
28064 IX86_BUILTIN_MASKLOADD,
28065 IX86_BUILTIN_MASKLOADQ,
28066 IX86_BUILTIN_MASKLOADD256,
28067 IX86_BUILTIN_MASKLOADQ256,
28068 IX86_BUILTIN_MASKSTORED,
28069 IX86_BUILTIN_MASKSTOREQ,
28070 IX86_BUILTIN_MASKSTORED256,
28071 IX86_BUILTIN_MASKSTOREQ256,
28072 IX86_BUILTIN_PSLLVV4DI,
28073 IX86_BUILTIN_PSLLVV2DI,
28074 IX86_BUILTIN_PSLLVV8SI,
28075 IX86_BUILTIN_PSLLVV4SI,
28076 IX86_BUILTIN_PSRAVV8SI,
28077 IX86_BUILTIN_PSRAVV4SI,
28078 IX86_BUILTIN_PSRLVV4DI,
28079 IX86_BUILTIN_PSRLVV2DI,
28080 IX86_BUILTIN_PSRLVV8SI,
28081 IX86_BUILTIN_PSRLVV4SI,
28082
28083 IX86_BUILTIN_GATHERSIV2DF,
28084 IX86_BUILTIN_GATHERSIV4DF,
28085 IX86_BUILTIN_GATHERDIV2DF,
28086 IX86_BUILTIN_GATHERDIV4DF,
28087 IX86_BUILTIN_GATHERSIV4SF,
28088 IX86_BUILTIN_GATHERSIV8SF,
28089 IX86_BUILTIN_GATHERDIV4SF,
28090 IX86_BUILTIN_GATHERDIV8SF,
28091 IX86_BUILTIN_GATHERSIV2DI,
28092 IX86_BUILTIN_GATHERSIV4DI,
28093 IX86_BUILTIN_GATHERDIV2DI,
28094 IX86_BUILTIN_GATHERDIV4DI,
28095 IX86_BUILTIN_GATHERSIV4SI,
28096 IX86_BUILTIN_GATHERSIV8SI,
28097 IX86_BUILTIN_GATHERDIV4SI,
28098 IX86_BUILTIN_GATHERDIV8SI,
28099
28100 /* AVX512F */
28101 IX86_BUILTIN_ADDPD512,
28102 IX86_BUILTIN_ADDPS512,
28103 IX86_BUILTIN_ADDSD_ROUND,
28104 IX86_BUILTIN_ADDSS_ROUND,
28105 IX86_BUILTIN_ALIGND512,
28106 IX86_BUILTIN_ALIGNQ512,
28107 IX86_BUILTIN_BLENDMD512,
28108 IX86_BUILTIN_BLENDMPD512,
28109 IX86_BUILTIN_BLENDMPS512,
28110 IX86_BUILTIN_BLENDMQ512,
28111 IX86_BUILTIN_BROADCASTF32X4_512,
28112 IX86_BUILTIN_BROADCASTF64X4_512,
28113 IX86_BUILTIN_BROADCASTI32X4_512,
28114 IX86_BUILTIN_BROADCASTI64X4_512,
28115 IX86_BUILTIN_BROADCASTSD512,
28116 IX86_BUILTIN_BROADCASTSS512,
28117 IX86_BUILTIN_CMPD512,
28118 IX86_BUILTIN_CMPPD512,
28119 IX86_BUILTIN_CMPPS512,
28120 IX86_BUILTIN_CMPQ512,
28121 IX86_BUILTIN_CMPSD_MASK,
28122 IX86_BUILTIN_CMPSS_MASK,
28123 IX86_BUILTIN_COMIDF,
28124 IX86_BUILTIN_COMISF,
28125 IX86_BUILTIN_COMPRESSPD512,
28126 IX86_BUILTIN_COMPRESSPDSTORE512,
28127 IX86_BUILTIN_COMPRESSPS512,
28128 IX86_BUILTIN_COMPRESSPSSTORE512,
28129 IX86_BUILTIN_CVTDQ2PD512,
28130 IX86_BUILTIN_CVTDQ2PS512,
28131 IX86_BUILTIN_CVTPD2DQ512,
28132 IX86_BUILTIN_CVTPD2PS512,
28133 IX86_BUILTIN_CVTPD2UDQ512,
28134 IX86_BUILTIN_CVTPH2PS512,
28135 IX86_BUILTIN_CVTPS2DQ512,
28136 IX86_BUILTIN_CVTPS2PD512,
28137 IX86_BUILTIN_CVTPS2PH512,
28138 IX86_BUILTIN_CVTPS2UDQ512,
28139 IX86_BUILTIN_CVTSD2SS_ROUND,
28140 IX86_BUILTIN_CVTSI2SD64,
28141 IX86_BUILTIN_CVTSI2SS32,
28142 IX86_BUILTIN_CVTSI2SS64,
28143 IX86_BUILTIN_CVTSS2SD_ROUND,
28144 IX86_BUILTIN_CVTTPD2DQ512,
28145 IX86_BUILTIN_CVTTPD2UDQ512,
28146 IX86_BUILTIN_CVTTPS2DQ512,
28147 IX86_BUILTIN_CVTTPS2UDQ512,
28148 IX86_BUILTIN_CVTUDQ2PD512,
28149 IX86_BUILTIN_CVTUDQ2PS512,
28150 IX86_BUILTIN_CVTUSI2SD32,
28151 IX86_BUILTIN_CVTUSI2SD64,
28152 IX86_BUILTIN_CVTUSI2SS32,
28153 IX86_BUILTIN_CVTUSI2SS64,
28154 IX86_BUILTIN_DIVPD512,
28155 IX86_BUILTIN_DIVPS512,
28156 IX86_BUILTIN_DIVSD_ROUND,
28157 IX86_BUILTIN_DIVSS_ROUND,
28158 IX86_BUILTIN_EXPANDPD512,
28159 IX86_BUILTIN_EXPANDPD512Z,
28160 IX86_BUILTIN_EXPANDPDLOAD512,
28161 IX86_BUILTIN_EXPANDPDLOAD512Z,
28162 IX86_BUILTIN_EXPANDPS512,
28163 IX86_BUILTIN_EXPANDPS512Z,
28164 IX86_BUILTIN_EXPANDPSLOAD512,
28165 IX86_BUILTIN_EXPANDPSLOAD512Z,
28166 IX86_BUILTIN_EXTRACTF32X4,
28167 IX86_BUILTIN_EXTRACTF64X4,
28168 IX86_BUILTIN_EXTRACTI32X4,
28169 IX86_BUILTIN_EXTRACTI64X4,
28170 IX86_BUILTIN_FIXUPIMMPD512_MASK,
28171 IX86_BUILTIN_FIXUPIMMPD512_MASKZ,
28172 IX86_BUILTIN_FIXUPIMMPS512_MASK,
28173 IX86_BUILTIN_FIXUPIMMPS512_MASKZ,
28174 IX86_BUILTIN_FIXUPIMMSD128_MASK,
28175 IX86_BUILTIN_FIXUPIMMSD128_MASKZ,
28176 IX86_BUILTIN_FIXUPIMMSS128_MASK,
28177 IX86_BUILTIN_FIXUPIMMSS128_MASKZ,
28178 IX86_BUILTIN_GETEXPPD512,
28179 IX86_BUILTIN_GETEXPPS512,
28180 IX86_BUILTIN_GETEXPSD128,
28181 IX86_BUILTIN_GETEXPSS128,
28182 IX86_BUILTIN_GETMANTPD512,
28183 IX86_BUILTIN_GETMANTPS512,
28184 IX86_BUILTIN_GETMANTSD128,
28185 IX86_BUILTIN_GETMANTSS128,
28186 IX86_BUILTIN_INSERTF32X4,
28187 IX86_BUILTIN_INSERTF64X4,
28188 IX86_BUILTIN_INSERTI32X4,
28189 IX86_BUILTIN_INSERTI64X4,
28190 IX86_BUILTIN_LOADAPD512,
28191 IX86_BUILTIN_LOADAPS512,
28192 IX86_BUILTIN_LOADDQUDI512,
28193 IX86_BUILTIN_LOADDQUSI512,
28194 IX86_BUILTIN_LOADUPD512,
28195 IX86_BUILTIN_LOADUPS512,
28196 IX86_BUILTIN_MAXPD512,
28197 IX86_BUILTIN_MAXPS512,
28198 IX86_BUILTIN_MAXSD_ROUND,
28199 IX86_BUILTIN_MAXSS_ROUND,
28200 IX86_BUILTIN_MINPD512,
28201 IX86_BUILTIN_MINPS512,
28202 IX86_BUILTIN_MINSD_ROUND,
28203 IX86_BUILTIN_MINSS_ROUND,
28204 IX86_BUILTIN_MOVAPD512,
28205 IX86_BUILTIN_MOVAPS512,
28206 IX86_BUILTIN_MOVDDUP512,
28207 IX86_BUILTIN_MOVDQA32LOAD512,
28208 IX86_BUILTIN_MOVDQA32STORE512,
28209 IX86_BUILTIN_MOVDQA32_512,
28210 IX86_BUILTIN_MOVDQA64LOAD512,
28211 IX86_BUILTIN_MOVDQA64STORE512,
28212 IX86_BUILTIN_MOVDQA64_512,
28213 IX86_BUILTIN_MOVNTDQ512,
28214 IX86_BUILTIN_MOVNTDQA512,
28215 IX86_BUILTIN_MOVNTPD512,
28216 IX86_BUILTIN_MOVNTPS512,
28217 IX86_BUILTIN_MOVSHDUP512,
28218 IX86_BUILTIN_MOVSLDUP512,
28219 IX86_BUILTIN_MULPD512,
28220 IX86_BUILTIN_MULPS512,
28221 IX86_BUILTIN_MULSD_ROUND,
28222 IX86_BUILTIN_MULSS_ROUND,
28223 IX86_BUILTIN_PABSD512,
28224 IX86_BUILTIN_PABSQ512,
28225 IX86_BUILTIN_PADDD512,
28226 IX86_BUILTIN_PADDQ512,
28227 IX86_BUILTIN_PANDD512,
28228 IX86_BUILTIN_PANDND512,
28229 IX86_BUILTIN_PANDNQ512,
28230 IX86_BUILTIN_PANDQ512,
28231 IX86_BUILTIN_PBROADCASTD512,
28232 IX86_BUILTIN_PBROADCASTD512_GPR,
28233 IX86_BUILTIN_PBROADCASTMB512,
28234 IX86_BUILTIN_PBROADCASTMW512,
28235 IX86_BUILTIN_PBROADCASTQ512,
28236 IX86_BUILTIN_PBROADCASTQ512_GPR,
28237 IX86_BUILTIN_PBROADCASTQ512_MEM,
28238 IX86_BUILTIN_PCMPEQD512_MASK,
28239 IX86_BUILTIN_PCMPEQQ512_MASK,
28240 IX86_BUILTIN_PCMPGTD512_MASK,
28241 IX86_BUILTIN_PCMPGTQ512_MASK,
28242 IX86_BUILTIN_PCOMPRESSD512,
28243 IX86_BUILTIN_PCOMPRESSDSTORE512,
28244 IX86_BUILTIN_PCOMPRESSQ512,
28245 IX86_BUILTIN_PCOMPRESSQSTORE512,
28246 IX86_BUILTIN_PEXPANDD512,
28247 IX86_BUILTIN_PEXPANDD512Z,
28248 IX86_BUILTIN_PEXPANDDLOAD512,
28249 IX86_BUILTIN_PEXPANDDLOAD512Z,
28250 IX86_BUILTIN_PEXPANDQ512,
28251 IX86_BUILTIN_PEXPANDQ512Z,
28252 IX86_BUILTIN_PEXPANDQLOAD512,
28253 IX86_BUILTIN_PEXPANDQLOAD512Z,
28254 IX86_BUILTIN_PMAXSD512,
28255 IX86_BUILTIN_PMAXSQ512,
28256 IX86_BUILTIN_PMAXUD512,
28257 IX86_BUILTIN_PMAXUQ512,
28258 IX86_BUILTIN_PMINSD512,
28259 IX86_BUILTIN_PMINSQ512,
28260 IX86_BUILTIN_PMINUD512,
28261 IX86_BUILTIN_PMINUQ512,
28262 IX86_BUILTIN_PMOVDB512,
28263 IX86_BUILTIN_PMOVDB512_MEM,
28264 IX86_BUILTIN_PMOVDW512,
28265 IX86_BUILTIN_PMOVDW512_MEM,
28266 IX86_BUILTIN_PMOVQB512,
28267 IX86_BUILTIN_PMOVQB512_MEM,
28268 IX86_BUILTIN_PMOVQD512,
28269 IX86_BUILTIN_PMOVQD512_MEM,
28270 IX86_BUILTIN_PMOVQW512,
28271 IX86_BUILTIN_PMOVQW512_MEM,
28272 IX86_BUILTIN_PMOVSDB512,
28273 IX86_BUILTIN_PMOVSDB512_MEM,
28274 IX86_BUILTIN_PMOVSDW512,
28275 IX86_BUILTIN_PMOVSDW512_MEM,
28276 IX86_BUILTIN_PMOVSQB512,
28277 IX86_BUILTIN_PMOVSQB512_MEM,
28278 IX86_BUILTIN_PMOVSQD512,
28279 IX86_BUILTIN_PMOVSQD512_MEM,
28280 IX86_BUILTIN_PMOVSQW512,
28281 IX86_BUILTIN_PMOVSQW512_MEM,
28282 IX86_BUILTIN_PMOVSXBD512,
28283 IX86_BUILTIN_PMOVSXBQ512,
28284 IX86_BUILTIN_PMOVSXDQ512,
28285 IX86_BUILTIN_PMOVSXWD512,
28286 IX86_BUILTIN_PMOVSXWQ512,
28287 IX86_BUILTIN_PMOVUSDB512,
28288 IX86_BUILTIN_PMOVUSDB512_MEM,
28289 IX86_BUILTIN_PMOVUSDW512,
28290 IX86_BUILTIN_PMOVUSDW512_MEM,
28291 IX86_BUILTIN_PMOVUSQB512,
28292 IX86_BUILTIN_PMOVUSQB512_MEM,
28293 IX86_BUILTIN_PMOVUSQD512,
28294 IX86_BUILTIN_PMOVUSQD512_MEM,
28295 IX86_BUILTIN_PMOVUSQW512,
28296 IX86_BUILTIN_PMOVUSQW512_MEM,
28297 IX86_BUILTIN_PMOVZXBD512,
28298 IX86_BUILTIN_PMOVZXBQ512,
28299 IX86_BUILTIN_PMOVZXDQ512,
28300 IX86_BUILTIN_PMOVZXWD512,
28301 IX86_BUILTIN_PMOVZXWQ512,
28302 IX86_BUILTIN_PMULDQ512,
28303 IX86_BUILTIN_PMULLD512,
28304 IX86_BUILTIN_PMULUDQ512,
28305 IX86_BUILTIN_PORD512,
28306 IX86_BUILTIN_PORQ512,
28307 IX86_BUILTIN_PROLD512,
28308 IX86_BUILTIN_PROLQ512,
28309 IX86_BUILTIN_PROLVD512,
28310 IX86_BUILTIN_PROLVQ512,
28311 IX86_BUILTIN_PRORD512,
28312 IX86_BUILTIN_PRORQ512,
28313 IX86_BUILTIN_PRORVD512,
28314 IX86_BUILTIN_PRORVQ512,
28315 IX86_BUILTIN_PSHUFD512,
28316 IX86_BUILTIN_PSLLD512,
28317 IX86_BUILTIN_PSLLDI512,
28318 IX86_BUILTIN_PSLLQ512,
28319 IX86_BUILTIN_PSLLQI512,
28320 IX86_BUILTIN_PSLLVV16SI,
28321 IX86_BUILTIN_PSLLVV8DI,
28322 IX86_BUILTIN_PSRAD512,
28323 IX86_BUILTIN_PSRADI512,
28324 IX86_BUILTIN_PSRAQ512,
28325 IX86_BUILTIN_PSRAQI512,
28326 IX86_BUILTIN_PSRAVV16SI,
28327 IX86_BUILTIN_PSRAVV8DI,
28328 IX86_BUILTIN_PSRLD512,
28329 IX86_BUILTIN_PSRLDI512,
28330 IX86_BUILTIN_PSRLQ512,
28331 IX86_BUILTIN_PSRLQI512,
28332 IX86_BUILTIN_PSRLVV16SI,
28333 IX86_BUILTIN_PSRLVV8DI,
28334 IX86_BUILTIN_PSUBD512,
28335 IX86_BUILTIN_PSUBQ512,
28336 IX86_BUILTIN_PTESTMD512,
28337 IX86_BUILTIN_PTESTMQ512,
28338 IX86_BUILTIN_PTESTNMD512,
28339 IX86_BUILTIN_PTESTNMQ512,
28340 IX86_BUILTIN_PUNPCKHDQ512,
28341 IX86_BUILTIN_PUNPCKHQDQ512,
28342 IX86_BUILTIN_PUNPCKLDQ512,
28343 IX86_BUILTIN_PUNPCKLQDQ512,
28344 IX86_BUILTIN_PXORD512,
28345 IX86_BUILTIN_PXORQ512,
28346 IX86_BUILTIN_RCP14PD512,
28347 IX86_BUILTIN_RCP14PS512,
28348 IX86_BUILTIN_RCP14SD,
28349 IX86_BUILTIN_RCP14SS,
28350 IX86_BUILTIN_RNDSCALEPD,
28351 IX86_BUILTIN_RNDSCALEPS,
28352 IX86_BUILTIN_RNDSCALESD,
28353 IX86_BUILTIN_RNDSCALESS,
28354 IX86_BUILTIN_RSQRT14PD512,
28355 IX86_BUILTIN_RSQRT14PS512,
28356 IX86_BUILTIN_RSQRT14SD,
28357 IX86_BUILTIN_RSQRT14SS,
28358 IX86_BUILTIN_SCALEFPD512,
28359 IX86_BUILTIN_SCALEFPS512,
28360 IX86_BUILTIN_SCALEFSD,
28361 IX86_BUILTIN_SCALEFSS,
28362 IX86_BUILTIN_SHUFPD512,
28363 IX86_BUILTIN_SHUFPS512,
28364 IX86_BUILTIN_SHUF_F32x4,
28365 IX86_BUILTIN_SHUF_F64x2,
28366 IX86_BUILTIN_SHUF_I32x4,
28367 IX86_BUILTIN_SHUF_I64x2,
28368 IX86_BUILTIN_SQRTPD512,
28369 IX86_BUILTIN_SQRTPD512_MASK,
28370 IX86_BUILTIN_SQRTPS512_MASK,
28371 IX86_BUILTIN_SQRTPS_NR512,
28372 IX86_BUILTIN_SQRTSD_ROUND,
28373 IX86_BUILTIN_SQRTSS_ROUND,
28374 IX86_BUILTIN_STOREAPD512,
28375 IX86_BUILTIN_STOREAPS512,
28376 IX86_BUILTIN_STOREDQUDI512,
28377 IX86_BUILTIN_STOREDQUSI512,
28378 IX86_BUILTIN_STOREUPD512,
28379 IX86_BUILTIN_STOREUPS512,
28380 IX86_BUILTIN_SUBPD512,
28381 IX86_BUILTIN_SUBPS512,
28382 IX86_BUILTIN_SUBSD_ROUND,
28383 IX86_BUILTIN_SUBSS_ROUND,
28384 IX86_BUILTIN_UCMPD512,
28385 IX86_BUILTIN_UCMPQ512,
28386 IX86_BUILTIN_UNPCKHPD512,
28387 IX86_BUILTIN_UNPCKHPS512,
28388 IX86_BUILTIN_UNPCKLPD512,
28389 IX86_BUILTIN_UNPCKLPS512,
28390 IX86_BUILTIN_VCVTSD2SI32,
28391 IX86_BUILTIN_VCVTSD2SI64,
28392 IX86_BUILTIN_VCVTSD2USI32,
28393 IX86_BUILTIN_VCVTSD2USI64,
28394 IX86_BUILTIN_VCVTSS2SI32,
28395 IX86_BUILTIN_VCVTSS2SI64,
28396 IX86_BUILTIN_VCVTSS2USI32,
28397 IX86_BUILTIN_VCVTSS2USI64,
28398 IX86_BUILTIN_VCVTTSD2SI32,
28399 IX86_BUILTIN_VCVTTSD2SI64,
28400 IX86_BUILTIN_VCVTTSD2USI32,
28401 IX86_BUILTIN_VCVTTSD2USI64,
28402 IX86_BUILTIN_VCVTTSS2SI32,
28403 IX86_BUILTIN_VCVTTSS2SI64,
28404 IX86_BUILTIN_VCVTTSS2USI32,
28405 IX86_BUILTIN_VCVTTSS2USI64,
28406 IX86_BUILTIN_VFMADDPD512_MASK,
28407 IX86_BUILTIN_VFMADDPD512_MASK3,
28408 IX86_BUILTIN_VFMADDPD512_MASKZ,
28409 IX86_BUILTIN_VFMADDPS512_MASK,
28410 IX86_BUILTIN_VFMADDPS512_MASK3,
28411 IX86_BUILTIN_VFMADDPS512_MASKZ,
28412 IX86_BUILTIN_VFMADDSD3_ROUND,
28413 IX86_BUILTIN_VFMADDSS3_ROUND,
28414 IX86_BUILTIN_VFMADDSUBPD512_MASK,
28415 IX86_BUILTIN_VFMADDSUBPD512_MASK3,
28416 IX86_BUILTIN_VFMADDSUBPD512_MASKZ,
28417 IX86_BUILTIN_VFMADDSUBPS512_MASK,
28418 IX86_BUILTIN_VFMADDSUBPS512_MASK3,
28419 IX86_BUILTIN_VFMADDSUBPS512_MASKZ,
28420 IX86_BUILTIN_VFMSUBADDPD512_MASK3,
28421 IX86_BUILTIN_VFMSUBADDPS512_MASK3,
28422 IX86_BUILTIN_VFMSUBPD512_MASK3,
28423 IX86_BUILTIN_VFMSUBPS512_MASK3,
28424 IX86_BUILTIN_VFMSUBSD3_MASK3,
28425 IX86_BUILTIN_VFMSUBSS3_MASK3,
28426 IX86_BUILTIN_VFNMADDPD512_MASK,
28427 IX86_BUILTIN_VFNMADDPS512_MASK,
28428 IX86_BUILTIN_VFNMSUBPD512_MASK,
28429 IX86_BUILTIN_VFNMSUBPD512_MASK3,
28430 IX86_BUILTIN_VFNMSUBPS512_MASK,
28431 IX86_BUILTIN_VFNMSUBPS512_MASK3,
28432 IX86_BUILTIN_VPCLZCNTD512,
28433 IX86_BUILTIN_VPCLZCNTQ512,
28434 IX86_BUILTIN_VPCONFLICTD512,
28435 IX86_BUILTIN_VPCONFLICTQ512,
28436 IX86_BUILTIN_VPERMDF512,
28437 IX86_BUILTIN_VPERMDI512,
28438 IX86_BUILTIN_VPERMI2VARD512,
28439 IX86_BUILTIN_VPERMI2VARPD512,
28440 IX86_BUILTIN_VPERMI2VARPS512,
28441 IX86_BUILTIN_VPERMI2VARQ512,
28442 IX86_BUILTIN_VPERMILPD512,
28443 IX86_BUILTIN_VPERMILPS512,
28444 IX86_BUILTIN_VPERMILVARPD512,
28445 IX86_BUILTIN_VPERMILVARPS512,
28446 IX86_BUILTIN_VPERMT2VARD512,
28447 IX86_BUILTIN_VPERMT2VARD512_MASKZ,
28448 IX86_BUILTIN_VPERMT2VARPD512,
28449 IX86_BUILTIN_VPERMT2VARPD512_MASKZ,
28450 IX86_BUILTIN_VPERMT2VARPS512,
28451 IX86_BUILTIN_VPERMT2VARPS512_MASKZ,
28452 IX86_BUILTIN_VPERMT2VARQ512,
28453 IX86_BUILTIN_VPERMT2VARQ512_MASKZ,
28454 IX86_BUILTIN_VPERMVARDF512,
28455 IX86_BUILTIN_VPERMVARDI512,
28456 IX86_BUILTIN_VPERMVARSF512,
28457 IX86_BUILTIN_VPERMVARSI512,
28458 IX86_BUILTIN_VTERNLOGD512_MASK,
28459 IX86_BUILTIN_VTERNLOGD512_MASKZ,
28460 IX86_BUILTIN_VTERNLOGQ512_MASK,
28461 IX86_BUILTIN_VTERNLOGQ512_MASKZ,
28462
28463 /* Mask arithmetic operations */
28464 IX86_BUILTIN_KAND16,
28465 IX86_BUILTIN_KANDN16,
28466 IX86_BUILTIN_KNOT16,
28467 IX86_BUILTIN_KOR16,
28468 IX86_BUILTIN_KORTESTC16,
28469 IX86_BUILTIN_KORTESTZ16,
28470 IX86_BUILTIN_KUNPCKBW,
28471 IX86_BUILTIN_KXNOR16,
28472 IX86_BUILTIN_KXOR16,
28473 IX86_BUILTIN_KMOV16,
28474
28475 /* Alternate 4 and 8 element gather/scatter for the vectorizer
28476 where all operands are 32-byte or 64-byte wide respectively. */
28477 IX86_BUILTIN_GATHERALTSIV4DF,
28478 IX86_BUILTIN_GATHERALTDIV8SF,
28479 IX86_BUILTIN_GATHERALTSIV4DI,
28480 IX86_BUILTIN_GATHERALTDIV8SI,
28481 IX86_BUILTIN_GATHER3ALTDIV16SF,
28482 IX86_BUILTIN_GATHER3ALTDIV16SI,
28483 IX86_BUILTIN_GATHER3ALTSIV8DF,
28484 IX86_BUILTIN_GATHER3ALTSIV8DI,
28485 IX86_BUILTIN_GATHER3DIV16SF,
28486 IX86_BUILTIN_GATHER3DIV16SI,
28487 IX86_BUILTIN_GATHER3DIV8DF,
28488 IX86_BUILTIN_GATHER3DIV8DI,
28489 IX86_BUILTIN_GATHER3SIV16SF,
28490 IX86_BUILTIN_GATHER3SIV16SI,
28491 IX86_BUILTIN_GATHER3SIV8DF,
28492 IX86_BUILTIN_GATHER3SIV8DI,
28493 IX86_BUILTIN_SCATTERDIV16SF,
28494 IX86_BUILTIN_SCATTERDIV16SI,
28495 IX86_BUILTIN_SCATTERDIV8DF,
28496 IX86_BUILTIN_SCATTERDIV8DI,
28497 IX86_BUILTIN_SCATTERSIV16SF,
28498 IX86_BUILTIN_SCATTERSIV16SI,
28499 IX86_BUILTIN_SCATTERSIV8DF,
28500 IX86_BUILTIN_SCATTERSIV8DI,
28501
28502 /* AVX512PF */
28503 IX86_BUILTIN_GATHERPFQPD,
28504 IX86_BUILTIN_GATHERPFDPS,
28505 IX86_BUILTIN_GATHERPFDPD,
28506 IX86_BUILTIN_GATHERPFQPS,
28507 IX86_BUILTIN_SCATTERPFDPD,
28508 IX86_BUILTIN_SCATTERPFDPS,
28509 IX86_BUILTIN_SCATTERPFQPD,
28510 IX86_BUILTIN_SCATTERPFQPS,
28511
28512 /* AVX-512ER */
28513 IX86_BUILTIN_EXP2PD_MASK,
28514 IX86_BUILTIN_EXP2PS_MASK,
28515 IX86_BUILTIN_EXP2PS,
28516 IX86_BUILTIN_RCP28PD,
28517 IX86_BUILTIN_RCP28PS,
28518 IX86_BUILTIN_RCP28SD,
28519 IX86_BUILTIN_RCP28SS,
28520 IX86_BUILTIN_RSQRT28PD,
28521 IX86_BUILTIN_RSQRT28PS,
28522 IX86_BUILTIN_RSQRT28SD,
28523 IX86_BUILTIN_RSQRT28SS,
28524
28525 /* SHA builtins. */
28526 IX86_BUILTIN_SHA1MSG1,
28527 IX86_BUILTIN_SHA1MSG2,
28528 IX86_BUILTIN_SHA1NEXTE,
28529 IX86_BUILTIN_SHA1RNDS4,
28530 IX86_BUILTIN_SHA256MSG1,
28531 IX86_BUILTIN_SHA256MSG2,
28532 IX86_BUILTIN_SHA256RNDS2,
28533
28534 /* CLFLUSHOPT instructions. */
28535 IX86_BUILTIN_CLFLUSHOPT,
28536
28537 /* TFmode support builtins. */
28538 IX86_BUILTIN_INFQ,
28539 IX86_BUILTIN_HUGE_VALQ,
28540 IX86_BUILTIN_FABSQ,
28541 IX86_BUILTIN_COPYSIGNQ,
28542
28543 /* Vectorizer support builtins. */
28544 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512,
28545 IX86_BUILTIN_CPYSGNPS,
28546 IX86_BUILTIN_CPYSGNPD,
28547 IX86_BUILTIN_CPYSGNPS256,
28548 IX86_BUILTIN_CPYSGNPS512,
28549 IX86_BUILTIN_CPYSGNPD256,
28550 IX86_BUILTIN_CPYSGNPD512,
28551 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512,
28552 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512,
28553
28554
28555 /* FMA4 instructions. */
28556 IX86_BUILTIN_VFMADDSS,
28557 IX86_BUILTIN_VFMADDSD,
28558 IX86_BUILTIN_VFMADDPS,
28559 IX86_BUILTIN_VFMADDPD,
28560 IX86_BUILTIN_VFMADDPS256,
28561 IX86_BUILTIN_VFMADDPD256,
28562 IX86_BUILTIN_VFMADDSUBPS,
28563 IX86_BUILTIN_VFMADDSUBPD,
28564 IX86_BUILTIN_VFMADDSUBPS256,
28565 IX86_BUILTIN_VFMADDSUBPD256,
28566
28567 /* FMA3 instructions. */
28568 IX86_BUILTIN_VFMADDSS3,
28569 IX86_BUILTIN_VFMADDSD3,
28570
28571 /* XOP instructions. */
28572 IX86_BUILTIN_VPCMOV,
28573 IX86_BUILTIN_VPCMOV_V2DI,
28574 IX86_BUILTIN_VPCMOV_V4SI,
28575 IX86_BUILTIN_VPCMOV_V8HI,
28576 IX86_BUILTIN_VPCMOV_V16QI,
28577 IX86_BUILTIN_VPCMOV_V4SF,
28578 IX86_BUILTIN_VPCMOV_V2DF,
28579 IX86_BUILTIN_VPCMOV256,
28580 IX86_BUILTIN_VPCMOV_V4DI256,
28581 IX86_BUILTIN_VPCMOV_V8SI256,
28582 IX86_BUILTIN_VPCMOV_V16HI256,
28583 IX86_BUILTIN_VPCMOV_V32QI256,
28584 IX86_BUILTIN_VPCMOV_V8SF256,
28585 IX86_BUILTIN_VPCMOV_V4DF256,
28586
28587 IX86_BUILTIN_VPPERM,
28588
28589 IX86_BUILTIN_VPMACSSWW,
28590 IX86_BUILTIN_VPMACSWW,
28591 IX86_BUILTIN_VPMACSSWD,
28592 IX86_BUILTIN_VPMACSWD,
28593 IX86_BUILTIN_VPMACSSDD,
28594 IX86_BUILTIN_VPMACSDD,
28595 IX86_BUILTIN_VPMACSSDQL,
28596 IX86_BUILTIN_VPMACSSDQH,
28597 IX86_BUILTIN_VPMACSDQL,
28598 IX86_BUILTIN_VPMACSDQH,
28599 IX86_BUILTIN_VPMADCSSWD,
28600 IX86_BUILTIN_VPMADCSWD,
28601
28602 IX86_BUILTIN_VPHADDBW,
28603 IX86_BUILTIN_VPHADDBD,
28604 IX86_BUILTIN_VPHADDBQ,
28605 IX86_BUILTIN_VPHADDWD,
28606 IX86_BUILTIN_VPHADDWQ,
28607 IX86_BUILTIN_VPHADDDQ,
28608 IX86_BUILTIN_VPHADDUBW,
28609 IX86_BUILTIN_VPHADDUBD,
28610 IX86_BUILTIN_VPHADDUBQ,
28611 IX86_BUILTIN_VPHADDUWD,
28612 IX86_BUILTIN_VPHADDUWQ,
28613 IX86_BUILTIN_VPHADDUDQ,
28614 IX86_BUILTIN_VPHSUBBW,
28615 IX86_BUILTIN_VPHSUBWD,
28616 IX86_BUILTIN_VPHSUBDQ,
28617
28618 IX86_BUILTIN_VPROTB,
28619 IX86_BUILTIN_VPROTW,
28620 IX86_BUILTIN_VPROTD,
28621 IX86_BUILTIN_VPROTQ,
28622 IX86_BUILTIN_VPROTB_IMM,
28623 IX86_BUILTIN_VPROTW_IMM,
28624 IX86_BUILTIN_VPROTD_IMM,
28625 IX86_BUILTIN_VPROTQ_IMM,
28626
28627 IX86_BUILTIN_VPSHLB,
28628 IX86_BUILTIN_VPSHLW,
28629 IX86_BUILTIN_VPSHLD,
28630 IX86_BUILTIN_VPSHLQ,
28631 IX86_BUILTIN_VPSHAB,
28632 IX86_BUILTIN_VPSHAW,
28633 IX86_BUILTIN_VPSHAD,
28634 IX86_BUILTIN_VPSHAQ,
28635
28636 IX86_BUILTIN_VFRCZSS,
28637 IX86_BUILTIN_VFRCZSD,
28638 IX86_BUILTIN_VFRCZPS,
28639 IX86_BUILTIN_VFRCZPD,
28640 IX86_BUILTIN_VFRCZPS256,
28641 IX86_BUILTIN_VFRCZPD256,
28642
28643 IX86_BUILTIN_VPCOMEQUB,
28644 IX86_BUILTIN_VPCOMNEUB,
28645 IX86_BUILTIN_VPCOMLTUB,
28646 IX86_BUILTIN_VPCOMLEUB,
28647 IX86_BUILTIN_VPCOMGTUB,
28648 IX86_BUILTIN_VPCOMGEUB,
28649 IX86_BUILTIN_VPCOMFALSEUB,
28650 IX86_BUILTIN_VPCOMTRUEUB,
28651
28652 IX86_BUILTIN_VPCOMEQUW,
28653 IX86_BUILTIN_VPCOMNEUW,
28654 IX86_BUILTIN_VPCOMLTUW,
28655 IX86_BUILTIN_VPCOMLEUW,
28656 IX86_BUILTIN_VPCOMGTUW,
28657 IX86_BUILTIN_VPCOMGEUW,
28658 IX86_BUILTIN_VPCOMFALSEUW,
28659 IX86_BUILTIN_VPCOMTRUEUW,
28660
28661 IX86_BUILTIN_VPCOMEQUD,
28662 IX86_BUILTIN_VPCOMNEUD,
28663 IX86_BUILTIN_VPCOMLTUD,
28664 IX86_BUILTIN_VPCOMLEUD,
28665 IX86_BUILTIN_VPCOMGTUD,
28666 IX86_BUILTIN_VPCOMGEUD,
28667 IX86_BUILTIN_VPCOMFALSEUD,
28668 IX86_BUILTIN_VPCOMTRUEUD,
28669
28670 IX86_BUILTIN_VPCOMEQUQ,
28671 IX86_BUILTIN_VPCOMNEUQ,
28672 IX86_BUILTIN_VPCOMLTUQ,
28673 IX86_BUILTIN_VPCOMLEUQ,
28674 IX86_BUILTIN_VPCOMGTUQ,
28675 IX86_BUILTIN_VPCOMGEUQ,
28676 IX86_BUILTIN_VPCOMFALSEUQ,
28677 IX86_BUILTIN_VPCOMTRUEUQ,
28678
28679 IX86_BUILTIN_VPCOMEQB,
28680 IX86_BUILTIN_VPCOMNEB,
28681 IX86_BUILTIN_VPCOMLTB,
28682 IX86_BUILTIN_VPCOMLEB,
28683 IX86_BUILTIN_VPCOMGTB,
28684 IX86_BUILTIN_VPCOMGEB,
28685 IX86_BUILTIN_VPCOMFALSEB,
28686 IX86_BUILTIN_VPCOMTRUEB,
28687
28688 IX86_BUILTIN_VPCOMEQW,
28689 IX86_BUILTIN_VPCOMNEW,
28690 IX86_BUILTIN_VPCOMLTW,
28691 IX86_BUILTIN_VPCOMLEW,
28692 IX86_BUILTIN_VPCOMGTW,
28693 IX86_BUILTIN_VPCOMGEW,
28694 IX86_BUILTIN_VPCOMFALSEW,
28695 IX86_BUILTIN_VPCOMTRUEW,
28696
28697 IX86_BUILTIN_VPCOMEQD,
28698 IX86_BUILTIN_VPCOMNED,
28699 IX86_BUILTIN_VPCOMLTD,
28700 IX86_BUILTIN_VPCOMLED,
28701 IX86_BUILTIN_VPCOMGTD,
28702 IX86_BUILTIN_VPCOMGED,
28703 IX86_BUILTIN_VPCOMFALSED,
28704 IX86_BUILTIN_VPCOMTRUED,
28705
28706 IX86_BUILTIN_VPCOMEQQ,
28707 IX86_BUILTIN_VPCOMNEQ,
28708 IX86_BUILTIN_VPCOMLTQ,
28709 IX86_BUILTIN_VPCOMLEQ,
28710 IX86_BUILTIN_VPCOMGTQ,
28711 IX86_BUILTIN_VPCOMGEQ,
28712 IX86_BUILTIN_VPCOMFALSEQ,
28713 IX86_BUILTIN_VPCOMTRUEQ,
28714
28715 /* LWP instructions. */
28716 IX86_BUILTIN_LLWPCB,
28717 IX86_BUILTIN_SLWPCB,
28718 IX86_BUILTIN_LWPVAL32,
28719 IX86_BUILTIN_LWPVAL64,
28720 IX86_BUILTIN_LWPINS32,
28721 IX86_BUILTIN_LWPINS64,
28722
28723 IX86_BUILTIN_CLZS,
28724
28725 /* RTM */
28726 IX86_BUILTIN_XBEGIN,
28727 IX86_BUILTIN_XEND,
28728 IX86_BUILTIN_XABORT,
28729 IX86_BUILTIN_XTEST,
28730
28731 /* BMI instructions. */
28732 IX86_BUILTIN_BEXTR32,
28733 IX86_BUILTIN_BEXTR64,
28734 IX86_BUILTIN_CTZS,
28735
28736 /* TBM instructions. */
28737 IX86_BUILTIN_BEXTRI32,
28738 IX86_BUILTIN_BEXTRI64,
28739
28740 /* BMI2 instructions. */
28741 IX86_BUILTIN_BZHI32,
28742 IX86_BUILTIN_BZHI64,
28743 IX86_BUILTIN_PDEP32,
28744 IX86_BUILTIN_PDEP64,
28745 IX86_BUILTIN_PEXT32,
28746 IX86_BUILTIN_PEXT64,
28747
28748 /* ADX instructions. */
28749 IX86_BUILTIN_ADDCARRYX32,
28750 IX86_BUILTIN_ADDCARRYX64,
28751
28752 /* FSGSBASE instructions. */
28753 IX86_BUILTIN_RDFSBASE32,
28754 IX86_BUILTIN_RDFSBASE64,
28755 IX86_BUILTIN_RDGSBASE32,
28756 IX86_BUILTIN_RDGSBASE64,
28757 IX86_BUILTIN_WRFSBASE32,
28758 IX86_BUILTIN_WRFSBASE64,
28759 IX86_BUILTIN_WRGSBASE32,
28760 IX86_BUILTIN_WRGSBASE64,
28761
28762 /* RDRND instructions. */
28763 IX86_BUILTIN_RDRAND16_STEP,
28764 IX86_BUILTIN_RDRAND32_STEP,
28765 IX86_BUILTIN_RDRAND64_STEP,
28766
28767 /* RDSEED instructions. */
28768 IX86_BUILTIN_RDSEED16_STEP,
28769 IX86_BUILTIN_RDSEED32_STEP,
28770 IX86_BUILTIN_RDSEED64_STEP,
28771
28772 /* F16C instructions. */
28773 IX86_BUILTIN_CVTPH2PS,
28774 IX86_BUILTIN_CVTPH2PS256,
28775 IX86_BUILTIN_CVTPS2PH,
28776 IX86_BUILTIN_CVTPS2PH256,
28777
28778 /* CFString built-in for darwin */
28779 IX86_BUILTIN_CFSTRING,
28780
28781 /* Builtins to get CPU type and supported features. */
28782 IX86_BUILTIN_CPU_INIT,
28783 IX86_BUILTIN_CPU_IS,
28784 IX86_BUILTIN_CPU_SUPPORTS,
28785
28786 /* Read/write FLAGS register built-ins. */
28787 IX86_BUILTIN_READ_FLAGS,
28788 IX86_BUILTIN_WRITE_FLAGS,
28789
28790 IX86_BUILTIN_MAX
28791 };
28792
28793 /* Table for the ix86 builtin decls. */
28794 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
28795
28796 /* Table of all of the builtin functions that are possible with different ISA's
28797 but are waiting to be built until a function is declared to use that
28798 ISA. */
28799 struct builtin_isa {
28800 const char *name; /* function name */
28801 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
28802 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
28803 bool const_p; /* true if the declaration is constant */
28804 bool set_and_not_built_p;
28805 };
28806
28807 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
28808
28809
28810 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
28811 of which isa_flags to use in the ix86_builtins_isa array. Stores the
28812 function decl in the ix86_builtins array. Returns the function decl or
28813 NULL_TREE, if the builtin was not added.
28814
28815 If the front end has a special hook for builtin functions, delay adding
28816 builtin functions that aren't in the current ISA until the ISA is changed
28817 with function specific optimization. Doing so, can save about 300K for the
28818 default compiler. When the builtin is expanded, check at that time whether
28819 it is valid.
28820
28821 If the front end doesn't have a special hook, record all builtins, even if
28822 it isn't an instruction set in the current ISA in case the user uses
28823 function specific options for a different ISA, so that we don't get scope
28824 errors if a builtin is added in the middle of a function scope. */
28825
28826 static inline tree
28827 def_builtin (HOST_WIDE_INT mask, const char *name,
28828 enum ix86_builtin_func_type tcode,
28829 enum ix86_builtins code)
28830 {
28831 tree decl = NULL_TREE;
28832
28833 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
28834 {
28835 ix86_builtins_isa[(int) code].isa = mask;
28836
28837 mask &= ~OPTION_MASK_ISA_64BIT;
28838 if (mask == 0
28839 || (mask & ix86_isa_flags) != 0
28840 || (lang_hooks.builtin_function
28841 == lang_hooks.builtin_function_ext_scope))
28842
28843 {
28844 tree type = ix86_get_builtin_func_type (tcode);
28845 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
28846 NULL, NULL_TREE);
28847 ix86_builtins[(int) code] = decl;
28848 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
28849 }
28850 else
28851 {
28852 ix86_builtins[(int) code] = NULL_TREE;
28853 ix86_builtins_isa[(int) code].tcode = tcode;
28854 ix86_builtins_isa[(int) code].name = name;
28855 ix86_builtins_isa[(int) code].const_p = false;
28856 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
28857 }
28858 }
28859
28860 return decl;
28861 }
28862
28863 /* Like def_builtin, but also marks the function decl "const". */
28864
28865 static inline tree
28866 def_builtin_const (HOST_WIDE_INT mask, const char *name,
28867 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
28868 {
28869 tree decl = def_builtin (mask, name, tcode, code);
28870 if (decl)
28871 TREE_READONLY (decl) = 1;
28872 else
28873 ix86_builtins_isa[(int) code].const_p = true;
28874
28875 return decl;
28876 }
28877
28878 /* Add any new builtin functions for a given ISA that may not have been
28879 declared. This saves a bit of space compared to adding all of the
28880 declarations to the tree, even if we didn't use them. */
28881
28882 static void
28883 ix86_add_new_builtins (HOST_WIDE_INT isa)
28884 {
28885 int i;
28886
28887 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
28888 {
28889 if ((ix86_builtins_isa[i].isa & isa) != 0
28890 && ix86_builtins_isa[i].set_and_not_built_p)
28891 {
28892 tree decl, type;
28893
28894 /* Don't define the builtin again. */
28895 ix86_builtins_isa[i].set_and_not_built_p = false;
28896
28897 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
28898 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
28899 type, i, BUILT_IN_MD, NULL,
28900 NULL_TREE);
28901
28902 ix86_builtins[i] = decl;
28903 if (ix86_builtins_isa[i].const_p)
28904 TREE_READONLY (decl) = 1;
28905 }
28906 }
28907 }
28908
28909 /* Bits for builtin_description.flag. */
28910
28911 /* Set when we don't support the comparison natively, and should
28912 swap_comparison in order to support it. */
28913 #define BUILTIN_DESC_SWAP_OPERANDS 1
28914
28915 struct builtin_description
28916 {
28917 const HOST_WIDE_INT mask;
28918 const enum insn_code icode;
28919 const char *const name;
28920 const enum ix86_builtins code;
28921 const enum rtx_code comparison;
28922 const int flag;
28923 };
28924
28925 static const struct builtin_description bdesc_comi[] =
28926 {
28927 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
28928 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
28929 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
28930 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
28931 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
28932 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
28933 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
28934 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
28935 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
28936 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
28937 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
28938 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
28939 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
28940 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
28941 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
28942 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
28943 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
28944 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
28945 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
28946 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
28947 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
28948 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
28949 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
28950 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
28951 };
28952
28953 static const struct builtin_description bdesc_pcmpestr[] =
28954 {
28955 /* SSE4.2 */
28956 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
28957 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
28958 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
28959 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
28960 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
28961 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
28962 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
28963 };
28964
28965 static const struct builtin_description bdesc_pcmpistr[] =
28966 {
28967 /* SSE4.2 */
28968 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
28969 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
28970 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
28971 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
28972 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
28973 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
28974 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
28975 };
28976
28977 /* Special builtins with variable number of arguments. */
28978 static const struct builtin_description bdesc_special_args[] =
28979 {
28980 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
28981 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
28982 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
28983
28984 /* 80387 (for use internally for atomic compound assignment). */
28985 { 0, CODE_FOR_fnstenv, "__builtin_ia32_fnstenv", IX86_BUILTIN_FNSTENV, UNKNOWN, (int) VOID_FTYPE_PVOID },
28986 { 0, CODE_FOR_fldenv, "__builtin_ia32_fldenv", IX86_BUILTIN_FLDENV, UNKNOWN, (int) VOID_FTYPE_PCVOID },
28987 { 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW, UNKNOWN, (int) USHORT_FTYPE_VOID },
28988 { 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX, UNKNOWN, (int) VOID_FTYPE_VOID },
28989
28990 /* MMX */
28991 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
28992
28993 /* 3DNow! */
28994 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
28995
28996 /* FXSR, XSAVE, XSAVEOPT, XSAVEC and XSAVES. */
28997 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID },
28998 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID },
28999 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29000 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29001 { OPTION_MASK_ISA_XSAVEOPT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29002 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xsaves", IX86_BUILTIN_XSAVES, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29003 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xrstors", IX86_BUILTIN_XRSTORS, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29004 { OPTION_MASK_ISA_XSAVEC, CODE_FOR_nothing, "__builtin_ia32_xsavec", IX86_BUILTIN_XSAVEC, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29005
29006 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID },
29007 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID },
29008 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29009 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29010 { OPTION_MASK_ISA_XSAVEOPT | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29011 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaves64", IX86_BUILTIN_XSAVES64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29012 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstors64", IX86_BUILTIN_XRSTORS64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29013 { OPTION_MASK_ISA_XSAVEC | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsavec64", IX86_BUILTIN_XSAVEC64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29014
29015 /* SSE */
29016 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storeups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
29017 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
29018 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
29019
29020 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
29021 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
29022 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
29023 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
29024
29025 /* SSE or 3DNow!A */
29026 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
29027 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntq, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
29028
29029 /* SSE2 */
29030 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
29031 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
29032 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storeupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
29033 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedquv16qi, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
29034 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
29035 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
29036 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
29037 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG },
29038 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
29039 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddquv16qi, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
29040
29041 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
29042 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
29043
29044 /* SSE3 */
29045 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
29046
29047 /* SSE4.1 */
29048 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
29049
29050 /* SSE4A */
29051 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
29052 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
29053
29054 /* AVX */
29055 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
29056 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
29057
29058 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
29059 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
29060 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
29061 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
29062 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
29063
29064 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
29065 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
29066 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
29067 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
29068 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddquv32qi, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
29069 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedquv32qi, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
29070 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
29071
29072 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
29073 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
29074 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
29075
29076 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
29077 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
29078 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
29079 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
29080 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
29081 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
29082 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
29083 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
29084
29085 /* AVX2 */
29086 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_movntdqa, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256, UNKNOWN, (int) V4DI_FTYPE_PV4DI },
29087 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI },
29088 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI },
29089 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd256, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI },
29090 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq256, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI },
29091 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_V4SI },
29092 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_V2DI },
29093 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI },
29094 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI },
29095
29096 /* AVX512F */
29097 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16sf_mask, "__builtin_ia32_compressstoresf512_mask", IX86_BUILTIN_COMPRESSPSSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
29098 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16si_mask, "__builtin_ia32_compressstoresi512_mask", IX86_BUILTIN_PCOMPRESSDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
29099 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8df_mask, "__builtin_ia32_compressstoredf512_mask", IX86_BUILTIN_COMPRESSPDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
29100 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8di_mask, "__builtin_ia32_compressstoredi512_mask", IX86_BUILTIN_PCOMPRESSQSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
29101 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandloadsf512_mask", IX86_BUILTIN_EXPANDPSLOAD512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29102 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandloadsf512_maskz", IX86_BUILTIN_EXPANDPSLOAD512Z, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29103 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandloadsi512_mask", IX86_BUILTIN_PEXPANDDLOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29104 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandloadsi512_maskz", IX86_BUILTIN_PEXPANDDLOAD512Z, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29105 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expandloaddf512_mask", IX86_BUILTIN_EXPANDPDLOAD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29106 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expandloaddf512_maskz", IX86_BUILTIN_EXPANDPDLOAD512Z, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29107 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expandloaddi512_mask", IX86_BUILTIN_PEXPANDQLOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29108 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expandloaddi512_maskz", IX86_BUILTIN_PEXPANDQLOAD512Z, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29109 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv16si_mask, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29110 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv8di_mask, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29111 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadupd512_mask, "__builtin_ia32_loadupd512_mask", IX86_BUILTIN_LOADUPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29112 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadups512_mask, "__builtin_ia32_loadups512_mask", IX86_BUILTIN_LOADUPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29113 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadaps512_mask", IX86_BUILTIN_LOADAPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29114 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32load512_mask", IX86_BUILTIN_MOVDQA32LOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29115 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadapd512_mask", IX86_BUILTIN_LOADAPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29116 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64load512_mask", IX86_BUILTIN_MOVDQA64LOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29117 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv16sf, "__builtin_ia32_movntps512", IX86_BUILTIN_MOVNTPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF },
29118 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8df, "__builtin_ia32_movntpd512", IX86_BUILTIN_MOVNTPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF },
29119 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8di, "__builtin_ia32_movntdq512", IX86_BUILTIN_MOVNTDQ512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI },
29120 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntdqa, "__builtin_ia32_movntdqa512", IX86_BUILTIN_MOVNTDQA512, UNKNOWN, (int) V8DI_FTYPE_PV8DI },
29121 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
29122 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
29123 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeupd512_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
29124 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask_store, "__builtin_ia32_pmovusqd512mem_mask", IX86_BUILTIN_PMOVUSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
29125 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask_store, "__builtin_ia32_pmovsqd512mem_mask", IX86_BUILTIN_PMOVSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
29126 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask_store, "__builtin_ia32_pmovqd512mem_mask", IX86_BUILTIN_PMOVQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
29127 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovusqw512mem_mask", IX86_BUILTIN_PMOVUSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
29128 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovsqw512mem_mask", IX86_BUILTIN_PMOVSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
29129 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovqw512mem_mask", IX86_BUILTIN_PMOVQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
29130 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovusdw512mem_mask", IX86_BUILTIN_PMOVUSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
29131 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovsdw512mem_mask", IX86_BUILTIN_PMOVSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
29132 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovdw512mem_mask", IX86_BUILTIN_PMOVDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
29133 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovqb512mem_mask", IX86_BUILTIN_PMOVQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
29134 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovusqb512mem_mask", IX86_BUILTIN_PMOVUSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
29135 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovsqb512mem_mask", IX86_BUILTIN_PMOVSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
29136 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovusdb512mem_mask", IX86_BUILTIN_PMOVUSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
29137 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovsdb512mem_mask", IX86_BUILTIN_PMOVSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
29138 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovdb512mem_mask", IX86_BUILTIN_PMOVDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
29139 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeups512_mask, "__builtin_ia32_storeups512_mask", IX86_BUILTIN_STOREUPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
29140 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeaps512_mask", IX86_BUILTIN_STOREAPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
29141 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_movdqa32store512_mask", IX86_BUILTIN_MOVDQA32STORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
29142 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeapd512_mask", IX86_BUILTIN_STOREAPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
29143 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_movdqa64store512_mask", IX86_BUILTIN_MOVDQA64STORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
29144
29145 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
29146 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
29147 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
29148 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
29149 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
29150 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
29151
29152 /* FSGSBASE */
29153 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
29154 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
29155 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
29156 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
29157 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
29158 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
29159 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
29160 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
29161
29162 /* RTM */
29163 { OPTION_MASK_ISA_RTM, CODE_FOR_xbegin, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
29164 { OPTION_MASK_ISA_RTM, CODE_FOR_xend, "__builtin_ia32_xend", IX86_BUILTIN_XEND, UNKNOWN, (int) VOID_FTYPE_VOID },
29165 { OPTION_MASK_ISA_RTM, CODE_FOR_xtest, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST, UNKNOWN, (int) INT_FTYPE_VOID },
29166 };
29167
29168 /* Builtins with variable number of arguments. */
29169 static const struct builtin_description bdesc_args[] =
29170 {
29171 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
29172 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
29173 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
29174 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
29175 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
29176 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
29177 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
29178
29179 /* MMX */
29180 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29181 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29182 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29183 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29184 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29185 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29186
29187 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29188 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29189 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29190 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29191 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29192 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29193 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29194 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29195
29196 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29197 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29198
29199 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29200 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29201 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29202 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29203
29204 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29205 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29206 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29207 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29208 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29209 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29210
29211 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29212 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29213 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29214 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29215 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
29216 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
29217
29218 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
29219 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
29220 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
29221
29222 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
29223
29224 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
29225 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
29226 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
29227 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
29228 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
29229 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
29230
29231 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
29232 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
29233 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
29234 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
29235 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
29236 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
29237
29238 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
29239 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
29240 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
29241 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
29242
29243 /* 3DNow! */
29244 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
29245 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
29246 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
29247 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
29248
29249 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29250 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29251 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29252 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
29253 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
29254 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
29255 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29256 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29257 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29258 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29259 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29260 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29261 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29262 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29263 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29264
29265 /* 3DNow!A */
29266 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
29267 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
29268 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
29269 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
29270 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29271 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29272
29273 /* SSE */
29274 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
29275 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29276 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29277 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29278 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29279 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29280 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
29281 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
29282 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
29283 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
29284 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
29285 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
29286
29287 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29288
29289 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29290 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29291 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29292 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29293 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29294 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29295 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29296 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29297
29298 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
29299 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
29300 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
29301 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
29302 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
29303 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29304 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
29305 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
29306 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
29307 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
29308 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
29309 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29310 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
29311 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
29312 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
29313 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29314 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
29315 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
29316 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
29317 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29318
29319 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29320 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29321 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29322 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29323
29324 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29325 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29326 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29327 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29328
29329 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29330
29331 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29332 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29333 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29334 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29335 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29336
29337 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
29338 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
29339 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
29340
29341 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
29342
29343 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
29344 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
29345 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
29346
29347 { OPTION_MASK_ISA_SSE, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
29348 { OPTION_MASK_ISA_SSE, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
29349
29350 /* SSE MMX or 3Dnow!A */
29351 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29352 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29353 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29354
29355 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29356 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29357 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29358 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29359
29360 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
29361 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
29362
29363 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
29364
29365 /* SSE2 */
29366 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29367
29368 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
29369 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
29370 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
29371 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
29372 { OPTION_MASK_ISA_SSE2, CODE_FOR_floatv4siv4sf2, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
29373
29374 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
29375 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
29376 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
29377 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
29378 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
29379
29380 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
29381
29382 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
29383 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
29384 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
29385 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
29386
29387 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_fix_notruncv4sfv4si, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
29388 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
29389 { OPTION_MASK_ISA_SSE2, CODE_FOR_fix_truncv4sfv4si2, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
29390
29391 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29392 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29393 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29394 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29395 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29396 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29397 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29398 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29399
29400 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
29401 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
29402 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
29403 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
29404 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
29405 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29406 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
29407 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
29408 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
29409 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
29410 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
29411 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29412 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
29413 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
29414 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
29415 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29416 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
29417 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
29418 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
29419 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29420
29421 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29422 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29423 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29424 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29425
29426 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29427 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29428 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29429 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29430
29431 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29432
29433 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29434 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29435 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29436
29437 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
29438
29439 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29440 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29441 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29442 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29443 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29444 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29445 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29446 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29447
29448 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29449 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29450 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29451 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29452 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29453 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29454 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29455 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29456
29457 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29458 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
29459
29460 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29461 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29462 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29463 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29464
29465 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29466 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29467
29468 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29469 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29470 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29471 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29472 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29473 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29474
29475 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29476 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29477 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29478 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29479
29480 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29481 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29482 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29483 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29484 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29485 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29486 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29487 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29488
29489 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
29490 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
29491 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
29492
29493 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29494 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
29495
29496 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
29497 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_widen_umult_even_v4si, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
29498
29499 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
29500
29501 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
29502 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
29503 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
29504 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
29505
29506 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
29507 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
29508 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
29509 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
29510 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
29511 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
29512 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
29513
29514 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
29515 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
29516 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
29517 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
29518 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
29519 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
29520 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
29521
29522 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
29523 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
29524 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
29525 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
29526
29527 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
29528 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
29529 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
29530
29531 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
29532
29533 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
29534
29535 /* SSE2 MMX */
29536 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
29537 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
29538
29539 /* SSE3 */
29540 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
29541 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29542
29543 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29544 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29545 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29546 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29547 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29548 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29549
29550 /* SSSE3 */
29551 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
29552 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
29553 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
29554 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
29555 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
29556 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
29557
29558 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29559 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29560 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29561 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29562 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29563 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29564 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29565 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29566 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29567 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29568 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29569 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29570 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
29571 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
29572 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29573 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29574 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29575 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29576 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29577 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29578 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29579 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29580 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29581 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29582
29583 /* SSSE3. */
29584 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
29585 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
29586
29587 /* SSE4.1 */
29588 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29589 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29590 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
29591 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
29592 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29593 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29594 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29595 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
29596 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
29597 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
29598
29599 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
29600 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
29601 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
29602 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
29603 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
29604 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
29605 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
29606 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
29607 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
29608 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
29609 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
29610 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
29611 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
29612
29613 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
29614 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29615 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29616 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29617 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29618 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29619 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29620 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29621 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29622 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29623 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
29624 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29625
29626 /* SSE4.1 */
29627 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
29628 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
29629 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29630 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29631
29632 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
29633 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
29634 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
29635 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
29636
29637 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
29638 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
29639
29640 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF },
29641 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
29642
29643 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
29644 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
29645 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
29646 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
29647
29648 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V4SF_ROUND },
29649 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V4SF_ROUND },
29650
29651 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29652 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2_sfix, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX, UNKNOWN, (int) V4SI_FTYPE_V4SF },
29653
29654 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
29655 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
29656 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
29657
29658 /* SSE4.2 */
29659 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29660 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
29661 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
29662 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29663 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29664
29665 /* SSE4A */
29666 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
29667 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
29668 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
29669 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29670
29671 /* AES */
29672 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
29673 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
29674
29675 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29676 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29677 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29678 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29679
29680 /* PCLMUL */
29681 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
29682
29683 /* AVX */
29684 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29685 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29686 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29687 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29688 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29689 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29690 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29691 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29692 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29693 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29694 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29695 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29696 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29697 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29698 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29699 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29700 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29701 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29702 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29703 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29704 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29705 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29706 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29707 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29708 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29709 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29710
29711 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
29712 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
29713 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
29714 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
29715
29716 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29717 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29718 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
29719 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
29720 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29721 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29722 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29723 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29724 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29725 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29726 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29727 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29728 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29729 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
29730 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
29731 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
29732 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv4siv4df2, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
29733 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv8siv8sf2, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
29734 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
29735 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_fix_notruncv8sfv8si, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
29736 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
29737 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv4dfv4si2, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
29738 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
29739 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv8sfv8si2, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
29740 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29741 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29742 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
29743 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
29744 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
29745 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
29746 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
29747 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
29748 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
29749 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
29750
29751 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29752 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29753 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
29754
29755 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
29756 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29757 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29758 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29759 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29760
29761 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29762
29763 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
29764 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
29765
29766 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
29767 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
29768 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
29769 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
29770
29771 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
29772 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
29773
29774 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
29775 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
29776
29777 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
29778 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
29779 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
29780 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
29781
29782 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V8SF_ROUND },
29783 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V8SF_ROUND },
29784
29785 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29786 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2_sfix, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
29787
29788 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29789 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29790 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29791 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29792
29793 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
29794 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
29795 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
29796 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
29797 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
29798 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
29799
29800 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
29801 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
29802 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
29803 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
29804 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
29805 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
29806 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
29807 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
29808 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
29809 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
29810 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
29811 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
29812 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
29813 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
29814 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
29815
29816 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
29817 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
29818
29819 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29820 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29821
29822 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_pack_sfix_v4df, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
29823
29824 /* AVX2 */
29825 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
29826 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
29827 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv16hi2, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI },
29828 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv8si2, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI },
29829 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packssdw, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
29830 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packsswb, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
29831 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packusdw, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
29832 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packuswb, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
29833 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv32qi3, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29834 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv16hi3, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29835 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv8si3, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29836 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv4di3, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29837 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv32qi3, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29838 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv16hi3, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29839 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv32qi3, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29840 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv16hi3, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29841 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv2ti, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT },
29842 { OPTION_MASK_ISA_AVX2, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29843 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29844 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29845 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29846 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI },
29847 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT },
29848 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv32qi3, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29849 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv16hi3, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29850 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv8si3, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29851 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv4di3, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29852 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv32qi3, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29853 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv16hi3, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29854 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv8si3, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29855 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv4di3, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29856 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddwv16hi3, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29857 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phadddv8si3, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29858 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddswv16hi3, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29859 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubwv16hi3, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29860 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubdv8si3, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29861 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubswv16hi3, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29862 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddubsw256, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
29863 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddwd, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI },
29864 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv32qi3, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29865 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv16hi3, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29866 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv8si3 , "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29867 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv32qi3, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29868 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv16hi3, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29869 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv8si3 , "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29870 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv32qi3, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29871 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv16hi3, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29872 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv8si3 , "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29873 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv32qi3, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29874 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv16hi3, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29875 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv8si3 , "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29876 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmovmskb, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256, UNKNOWN, (int) INT_FTYPE_V32QI },
29877 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv16qiv16hi2, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
29878 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8qiv8si2 , "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
29879 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4qiv4di2 , "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
29880 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8hiv8si2 , "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
29881 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4hiv4di2 , "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
29882 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4siv4di2 , "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
29883 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv16qiv16hi2, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
29884 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8qiv8si2 , "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
29885 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4qiv4di2 , "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
29886 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8hiv8si2 , "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
29887 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2 , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
29888 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2 , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
29889 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_smult_even_v8si, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
29890 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29891 { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29892 { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29893 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29894 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256 , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29895 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_umult_even_v8si, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
29896 { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29897 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
29898 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29899 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufdv3, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT },
29900 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufhwv3, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
29901 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshuflwv3, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
29902 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv32qi3, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29903 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv16hi3, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29904 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv8si3 , "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29905 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlv2ti3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
29906 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
29907 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
29908 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
29909 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
29910 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
29911 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
29912 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
29913 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
29914 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
29915 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
29916 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrv2ti3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
29917 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
29918 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
29919 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
29920 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
29921 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
29922 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
29923 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv32qi3, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29924 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv16hi3, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29925 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv8si3, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29926 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv4di3, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29927 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv32qi3, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29928 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv16hi3, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29929 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv32qi3, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29930 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv16hi3, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29931 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv32qi, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29932 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv16hi, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29933 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv8si, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29934 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv4di, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29935 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv32qi, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29936 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv16hi, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29937 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv8si, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29938 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv4di, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29939 { OPTION_MASK_ISA_AVX2, CODE_FOR_xorv4di3, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29940 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29941 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF },
29942 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF },
29943 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vbroadcasti128_v4di, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
29944 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv4si, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
29945 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv8si, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
29946 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv32qi, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256, UNKNOWN, (int) V32QI_FTYPE_V16QI },
29947 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16hi, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256, UNKNOWN, (int) V16HI_FTYPE_V8HI },
29948 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8si, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256, UNKNOWN, (int) V8SI_FTYPE_V4SI },
29949 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4di, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
29950 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16qi, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
29951 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8hi, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
29952 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4si, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
29953 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
29954 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29955 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
29956 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
29957 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
29958 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
29959 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_extracti128, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
29960 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_inserti128, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
29961 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29962 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29963 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29964 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4si, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29965 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv8si, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29966 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv4si, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29967 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4di, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29968 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv2di, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29969 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv8si, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29970 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4si, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29971
29972 { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
29973
29974 /* BMI */
29975 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29976 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29977 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
29978
29979 /* TBM */
29980 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29981 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29982
29983 /* F16C */
29984 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
29985 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
29986 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
29987 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
29988
29989 /* BMI2 */
29990 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29991 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29992 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29993 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29994 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29995 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29996
29997 /* AVX512F */
29998 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv16si_mask, "__builtin_ia32_alignd512_mask", IX86_BUILTIN_ALIGND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
29999 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv8di_mask, "__builtin_ia32_alignq512_mask", IX86_BUILTIN_ALIGNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
30000 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16si, "__builtin_ia32_blendmd_512_mask", IX86_BUILTIN_BLENDMD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30001 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8df, "__builtin_ia32_blendmpd_512_mask", IX86_BUILTIN_BLENDMPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30002 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16sf, "__builtin_ia32_blendmps_512_mask", IX86_BUILTIN_BLENDMPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30003 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8di, "__builtin_ia32_blendmq_512_mask", IX86_BUILTIN_BLENDMQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30004 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x4_512", IX86_BUILTIN_BROADCASTF32X4_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
30005 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8df_mask, "__builtin_ia32_broadcastf64x4_512", IX86_BUILTIN_BROADCASTF64X4_512, UNKNOWN, (int) V8DF_FTYPE_V4DF_V8DF_QI },
30006 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16si_mask, "__builtin_ia32_broadcasti32x4_512", IX86_BUILTIN_BROADCASTI32X4_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
30007 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8di_mask, "__builtin_ia32_broadcasti64x4_512", IX86_BUILTIN_BROADCASTI64X4_512, UNKNOWN, (int) V8DI_FTYPE_V4DI_V8DI_QI },
30008 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8df_mask, "__builtin_ia32_broadcastsd512", IX86_BUILTIN_BROADCASTSD512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
30009 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16sf_mask, "__builtin_ia32_broadcastss512", IX86_BUILTIN_BROADCASTSS512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
30010 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16si3_mask, "__builtin_ia32_cmpd512_mask", IX86_BUILTIN_CMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
30011 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8di3_mask, "__builtin_ia32_cmpq512_mask", IX86_BUILTIN_CMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
30012 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8df_mask, "__builtin_ia32_compressdf512_mask", IX86_BUILTIN_COMPRESSPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30013 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16sf_mask, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30014 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
30015 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtps2ph512_mask, "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_HI },
30016 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv8siv8df_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
30017 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2sd32, "__builtin_ia32_cvtusi2sd32", IX86_BUILTIN_CVTUSI2SD32, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT },
30018 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30019 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30020 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandsf512_mask", IX86_BUILTIN_EXPANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30021 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandsf512_maskz", IX86_BUILTIN_EXPANDPS512Z, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30022 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf32x4_mask, "__builtin_ia32_extractf32x4_mask", IX86_BUILTIN_EXTRACTF32X4, UNKNOWN, (int) V4SF_FTYPE_V16SF_INT_V4SF_QI },
30023 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf64x4_mask, "__builtin_ia32_extractf64x4_mask", IX86_BUILTIN_EXTRACTF64X4, UNKNOWN, (int) V4DF_FTYPE_V8DF_INT_V4DF_QI },
30024 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti32x4_mask, "__builtin_ia32_extracti32x4_mask", IX86_BUILTIN_EXTRACTI32X4, UNKNOWN, (int) V4SI_FTYPE_V16SI_INT_V4SI_QI },
30025 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti64x4_mask, "__builtin_ia32_extracti64x4_mask", IX86_BUILTIN_EXTRACTI64X4, UNKNOWN, (int) V4DI_FTYPE_V8DI_INT_V4DI_QI },
30026 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf32x4_mask, "__builtin_ia32_insertf32x4_mask", IX86_BUILTIN_INSERTF32X4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI },
30027 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf64x4_mask, "__builtin_ia32_insertf64x4_mask", IX86_BUILTIN_INSERTF64X4, UNKNOWN, (int) V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI },
30028 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti32x4_mask, "__builtin_ia32_inserti32x4_mask", IX86_BUILTIN_INSERTI32X4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI },
30029 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti64x4_mask, "__builtin_ia32_inserti64x4_mask", IX86_BUILTIN_INSERTI64X4, UNKNOWN, (int) V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI },
30030 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_movapd512_mask", IX86_BUILTIN_MOVAPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30031 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_movaps512_mask", IX86_BUILTIN_MOVAPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30032 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movddup512_mask, "__builtin_ia32_movddup512_mask", IX86_BUILTIN_MOVDDUP512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30033 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32_512_mask", IX86_BUILTIN_MOVDQA32_512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30034 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64_512_mask", IX86_BUILTIN_MOVDQA64_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30035 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movshdup512_mask, "__builtin_ia32_movshdup512_mask", IX86_BUILTIN_MOVSHDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30036 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movsldup512_mask, "__builtin_ia32_movsldup512_mask", IX86_BUILTIN_MOVSLDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30037 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv16si2_mask, "__builtin_ia32_pabsd512_mask", IX86_BUILTIN_PABSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30038 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv8di2_mask, "__builtin_ia32_pabsq512_mask", IX86_BUILTIN_PABSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30039 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16si3_mask, "__builtin_ia32_paddd512_mask", IX86_BUILTIN_PADDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30040 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8di3_mask, "__builtin_ia32_paddq512_mask", IX86_BUILTIN_PADDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30041 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv16si3_mask, "__builtin_ia32_pandd512_mask", IX86_BUILTIN_PANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30042 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv16si3_mask, "__builtin_ia32_pandnd512_mask", IX86_BUILTIN_PANDND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30043 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv8di3_mask, "__builtin_ia32_pandnq512_mask", IX86_BUILTIN_PANDNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30044 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv8di3_mask, "__builtin_ia32_pandq512_mask", IX86_BUILTIN_PANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30045 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16si_mask, "__builtin_ia32_pbroadcastd512", IX86_BUILTIN_PBROADCASTD512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
30046 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv16si_mask, "__builtin_ia32_pbroadcastd512_gpr_mask", IX86_BUILTIN_PBROADCASTD512_GPR, UNKNOWN, (int) V16SI_FTYPE_SI_V16SI_HI },
30047 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_QI },
30048 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_HI },
30049 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
30050 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
30051 { OPTION_MASK_ISA_AVX512F & ~OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_memv8di_mask, "__builtin_ia32_pbroadcastq512_mem_mask", IX86_BUILTIN_PBROADCASTQ512_MEM, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
30052 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30053 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30054 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30055 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv8di3_mask, "__builtin_ia32_pcmpgtq512_mask", IX86_BUILTIN_PCMPGTQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30056 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16si_mask, "__builtin_ia32_compresssi512_mask", IX86_BUILTIN_PCOMPRESSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30057 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8di_mask, "__builtin_ia32_compressdi512_mask", IX86_BUILTIN_PCOMPRESSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30058 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandsi512_mask", IX86_BUILTIN_PEXPANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30059 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandsi512_maskz", IX86_BUILTIN_PEXPANDD512Z, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30060 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expanddi512_mask", IX86_BUILTIN_PEXPANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30061 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expanddi512_maskz", IX86_BUILTIN_PEXPANDQ512Z, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30062 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16si3_mask, "__builtin_ia32_pmaxsd512_mask", IX86_BUILTIN_PMAXSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30063 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8di3_mask, "__builtin_ia32_pmaxsq512_mask", IX86_BUILTIN_PMAXSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30064 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv16si3_mask, "__builtin_ia32_pmaxud512_mask", IX86_BUILTIN_PMAXUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30065 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv8di3_mask, "__builtin_ia32_pmaxuq512_mask", IX86_BUILTIN_PMAXUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30066 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16si3_mask, "__builtin_ia32_pminsd512_mask", IX86_BUILTIN_PMINSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30067 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8di3_mask, "__builtin_ia32_pminsq512_mask", IX86_BUILTIN_PMINSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30068 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv16si3_mask, "__builtin_ia32_pminud512_mask", IX86_BUILTIN_PMINUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30069 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv8di3_mask, "__builtin_ia32_pminuq512_mask", IX86_BUILTIN_PMINUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30070 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask, "__builtin_ia32_pmovdb512_mask", IX86_BUILTIN_PMOVDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
30071 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask, "__builtin_ia32_pmovdw512_mask", IX86_BUILTIN_PMOVDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
30072 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask, "__builtin_ia32_pmovqb512_mask", IX86_BUILTIN_PMOVQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
30073 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask, "__builtin_ia32_pmovqd512_mask", IX86_BUILTIN_PMOVQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
30074 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask, "__builtin_ia32_pmovqw512_mask", IX86_BUILTIN_PMOVQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
30075 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask, "__builtin_ia32_pmovsdb512_mask", IX86_BUILTIN_PMOVSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
30076 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask, "__builtin_ia32_pmovsdw512_mask", IX86_BUILTIN_PMOVSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
30077 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask, "__builtin_ia32_pmovsqb512_mask", IX86_BUILTIN_PMOVSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
30078 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask, "__builtin_ia32_pmovsqd512_mask", IX86_BUILTIN_PMOVSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
30079 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask, "__builtin_ia32_pmovsqw512_mask", IX86_BUILTIN_PMOVSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
30080 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16qiv16si2_mask, "__builtin_ia32_pmovsxbd512_mask", IX86_BUILTIN_PMOVSXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
30081 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8qiv8di2_mask, "__builtin_ia32_pmovsxbq512_mask", IX86_BUILTIN_PMOVSXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
30082 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8siv8di2_mask, "__builtin_ia32_pmovsxdq512_mask", IX86_BUILTIN_PMOVSXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
30083 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16hiv16si2_mask, "__builtin_ia32_pmovsxwd512_mask", IX86_BUILTIN_PMOVSXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
30084 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8hiv8di2_mask, "__builtin_ia32_pmovsxwq512_mask", IX86_BUILTIN_PMOVSXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
30085 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask, "__builtin_ia32_pmovusdb512_mask", IX86_BUILTIN_PMOVUSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
30086 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask, "__builtin_ia32_pmovusdw512_mask", IX86_BUILTIN_PMOVUSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
30087 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask, "__builtin_ia32_pmovusqb512_mask", IX86_BUILTIN_PMOVUSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
30088 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask, "__builtin_ia32_pmovusqd512_mask", IX86_BUILTIN_PMOVUSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
30089 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask, "__builtin_ia32_pmovusqw512_mask", IX86_BUILTIN_PMOVUSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
30090 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16qiv16si2_mask, "__builtin_ia32_pmovzxbd512_mask", IX86_BUILTIN_PMOVZXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
30091 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8qiv8di2_mask, "__builtin_ia32_pmovzxbq512_mask", IX86_BUILTIN_PMOVZXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
30092 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8siv8di2_mask, "__builtin_ia32_pmovzxdq512_mask", IX86_BUILTIN_PMOVZXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
30093 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16hiv16si2_mask, "__builtin_ia32_pmovzxwd512_mask", IX86_BUILTIN_PMOVZXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
30094 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8hiv8di2_mask, "__builtin_ia32_pmovzxwq512_mask", IX86_BUILTIN_PMOVZXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
30095 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_smult_even_v16si_mask, "__builtin_ia32_pmuldq512_mask", IX86_BUILTIN_PMULDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
30096 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16si3_mask, "__builtin_ia32_pmulld512_mask" , IX86_BUILTIN_PMULLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30097 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_umult_even_v16si_mask, "__builtin_ia32_pmuludq512_mask", IX86_BUILTIN_PMULUDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
30098 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv16si3_mask, "__builtin_ia32_pord512_mask", IX86_BUILTIN_PORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30099 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv8di3_mask, "__builtin_ia32_porq512_mask", IX86_BUILTIN_PORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30100 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv16si_mask, "__builtin_ia32_prold512_mask", IX86_BUILTIN_PROLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30101 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv8di_mask, "__builtin_ia32_prolq512_mask", IX86_BUILTIN_PROLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30102 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv16si_mask, "__builtin_ia32_prolvd512_mask", IX86_BUILTIN_PROLVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30103 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv8di_mask, "__builtin_ia32_prolvq512_mask", IX86_BUILTIN_PROLVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30104 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv16si_mask, "__builtin_ia32_prord512_mask", IX86_BUILTIN_PRORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30105 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv8di_mask, "__builtin_ia32_prorq512_mask", IX86_BUILTIN_PRORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30106 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv16si_mask, "__builtin_ia32_prorvd512_mask", IX86_BUILTIN_PRORVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30107 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv8di_mask, "__builtin_ia32_prorvq512_mask", IX86_BUILTIN_PRORVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30108 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pshufdv3_mask, "__builtin_ia32_pshufd512_mask", IX86_BUILTIN_PSHUFD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30109 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslld512_mask", IX86_BUILTIN_PSLLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
30110 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslldi512_mask", IX86_BUILTIN_PSLLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30111 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllq512_mask", IX86_BUILTIN_PSLLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
30112 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllqi512_mask", IX86_BUILTIN_PSLLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30113 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv16si_mask, "__builtin_ia32_psllv16si_mask", IX86_BUILTIN_PSLLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30114 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv8di_mask, "__builtin_ia32_psllv8di_mask", IX86_BUILTIN_PSLLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30115 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psrad512_mask", IX86_BUILTIN_PSRAD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
30116 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psradi512_mask", IX86_BUILTIN_PSRADI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30117 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraq512_mask", IX86_BUILTIN_PSRAQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
30118 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraqi512_mask", IX86_BUILTIN_PSRAQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30119 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv16si_mask, "__builtin_ia32_psrav16si_mask", IX86_BUILTIN_PSRAVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30120 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv8di_mask, "__builtin_ia32_psrav8di_mask", IX86_BUILTIN_PSRAVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30121 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrld512_mask", IX86_BUILTIN_PSRLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
30122 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrldi512_mask", IX86_BUILTIN_PSRLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30123 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlq512_mask", IX86_BUILTIN_PSRLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
30124 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlqi512_mask", IX86_BUILTIN_PSRLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30125 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv16si_mask, "__builtin_ia32_psrlv16si_mask", IX86_BUILTIN_PSRLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30126 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv8di_mask, "__builtin_ia32_psrlv8di_mask", IX86_BUILTIN_PSRLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30127 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16si3_mask, "__builtin_ia32_psubd512_mask", IX86_BUILTIN_PSUBD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30128 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8di3_mask, "__builtin_ia32_psubq512_mask", IX86_BUILTIN_PSUBQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30129 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv16si3_mask, "__builtin_ia32_ptestmd512", IX86_BUILTIN_PTESTMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30130 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv8di3_mask, "__builtin_ia32_ptestmq512", IX86_BUILTIN_PTESTMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30131 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv16si3_mask, "__builtin_ia32_ptestnmd512", IX86_BUILTIN_PTESTNMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30132 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv8di3_mask, "__builtin_ia32_ptestnmq512", IX86_BUILTIN_PTESTNMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30133 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv16si_mask, "__builtin_ia32_punpckhdq512_mask", IX86_BUILTIN_PUNPCKHDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30134 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv8di_mask, "__builtin_ia32_punpckhqdq512_mask", IX86_BUILTIN_PUNPCKHQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30135 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv16si_mask, "__builtin_ia32_punpckldq512_mask", IX86_BUILTIN_PUNPCKLDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30136 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv8di_mask, "__builtin_ia32_punpcklqdq512_mask", IX86_BUILTIN_PUNPCKLQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30137 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv16si3_mask, "__builtin_ia32_pxord512_mask", IX86_BUILTIN_PXORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30138 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv8di3_mask, "__builtin_ia32_pxorq512_mask", IX86_BUILTIN_PXORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30139 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v8df_mask, "__builtin_ia32_rcp14pd512_mask", IX86_BUILTIN_RCP14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30140 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v16sf_mask, "__builtin_ia32_rcp14ps512_mask", IX86_BUILTIN_RCP14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30141 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v2df, "__builtin_ia32_rcp14sd", IX86_BUILTIN_RCP14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30142 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v4sf, "__builtin_ia32_rcp14ss", IX86_BUILTIN_RCP14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30143 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30144 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30145 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v2df, "__builtin_ia32_rsqrt14sd", IX86_BUILTIN_RSQRT14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30146 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v4sf, "__builtin_ia32_rsqrt14ss", IX86_BUILTIN_RSQRT14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30147 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufpd512_mask, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
30148 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufps512_mask, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
30149 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
30150 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_mask", IX86_BUILTIN_SHUF_F64x2, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
30151 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_mask", IX86_BUILTIN_SHUF_I32x4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
30152 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_mask", IX86_BUILTIN_SHUF_I64x2, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
30153 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv16si3_mask, "__builtin_ia32_ucmpd512_mask", IX86_BUILTIN_UCMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
30154 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv8di3_mask, "__builtin_ia32_ucmpq512_mask", IX86_BUILTIN_UCMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
30155 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhpd512_mask, "__builtin_ia32_unpckhpd512_mask", IX86_BUILTIN_UNPCKHPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
30156 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhps512_mask, "__builtin_ia32_unpckhps512_mask", IX86_BUILTIN_UNPCKHPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
30157 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklpd512_mask, "__builtin_ia32_unpcklpd512_mask", IX86_BUILTIN_UNPCKLPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
30158 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklps512_mask, "__builtin_ia32_unpcklps512_mask", IX86_BUILTIN_UNPCKLPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
30159 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv16si2_mask, "__builtin_ia32_vplzcntd_512_mask", IX86_BUILTIN_VPCLZCNTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30160 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv8di2_mask, "__builtin_ia32_vplzcntq_512_mask", IX86_BUILTIN_VPCLZCNTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30161 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv16si_mask, "__builtin_ia32_vpconflictsi_512_mask", IX86_BUILTIN_VPCONFLICTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30162 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv8di_mask, "__builtin_ia32_vpconflictdi_512_mask", IX86_BUILTIN_VPCONFLICTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30163 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8df_mask, "__builtin_ia32_permdf512_mask", IX86_BUILTIN_VPERMDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
30164 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8di_mask, "__builtin_ia32_permdi512_mask", IX86_BUILTIN_VPERMDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30165 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16si3_mask, "__builtin_ia32_vpermi2vard512_mask", IX86_BUILTIN_VPERMI2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30166 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8df3_mask, "__builtin_ia32_vpermi2varpd512_mask", IX86_BUILTIN_VPERMI2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
30167 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16sf3_mask, "__builtin_ia32_vpermi2varps512_mask", IX86_BUILTIN_VPERMI2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
30168 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8di3_mask, "__builtin_ia32_vpermi2varq512_mask", IX86_BUILTIN_VPERMI2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30169 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv8df_mask, "__builtin_ia32_vpermilpd512_mask", IX86_BUILTIN_VPERMILPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
30170 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv16sf_mask, "__builtin_ia32_vpermilps512_mask", IX86_BUILTIN_VPERMILPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
30171 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv8df3_mask, "__builtin_ia32_vpermilvarpd512_mask", IX86_BUILTIN_VPERMILVARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
30172 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv16sf3_mask, "__builtin_ia32_vpermilvarps512_mask", IX86_BUILTIN_VPERMILVARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
30173 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_mask, "__builtin_ia32_vpermt2vard512_mask", IX86_BUILTIN_VPERMT2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30174 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_maskz, "__builtin_ia32_vpermt2vard512_maskz", IX86_BUILTIN_VPERMT2VARD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30175 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_mask, "__builtin_ia32_vpermt2varpd512_mask", IX86_BUILTIN_VPERMT2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
30176 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_maskz, "__builtin_ia32_vpermt2varpd512_maskz", IX86_BUILTIN_VPERMT2VARPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
30177 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_mask, "__builtin_ia32_vpermt2varps512_mask", IX86_BUILTIN_VPERMT2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
30178 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_maskz, "__builtin_ia32_vpermt2varps512_maskz", IX86_BUILTIN_VPERMT2VARPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
30179 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_mask, "__builtin_ia32_vpermt2varq512_mask", IX86_BUILTIN_VPERMT2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30180 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_maskz, "__builtin_ia32_vpermt2varq512_maskz", IX86_BUILTIN_VPERMT2VARQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30181 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8df_mask, "__builtin_ia32_permvardf512_mask", IX86_BUILTIN_VPERMVARDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
30182 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8di_mask, "__builtin_ia32_permvardi512_mask", IX86_BUILTIN_VPERMVARDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30183 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16sf_mask, "__builtin_ia32_permvarsf512_mask", IX86_BUILTIN_VPERMVARSF512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
30184 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16si_mask, "__builtin_ia32_permvarsi512_mask", IX86_BUILTIN_VPERMVARSI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30185 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_mask, "__builtin_ia32_pternlogd512_mask", IX86_BUILTIN_VTERNLOGD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
30186 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_maskz, "__builtin_ia32_pternlogd512_maskz", IX86_BUILTIN_VTERNLOGD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
30187 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_mask, "__builtin_ia32_pternlogq512_mask", IX86_BUILTIN_VTERNLOGQ512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
30188 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_maskz, "__builtin_ia32_pternlogq512_maskz", IX86_BUILTIN_VTERNLOGQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
30189
30190 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv16sf3, "__builtin_ia32_copysignps512", IX86_BUILTIN_CPYSGNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF },
30191 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv8df3, "__builtin_ia32_copysignpd512", IX86_BUILTIN_CPYSGNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF },
30192 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF },
30193 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sqrtv16sf2, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512, UNKNOWN, (int) V16SF_FTYPE_V16SF },
30194 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf, "__builtin_ia32_exp2ps", IX86_BUILTIN_EXP2PS, UNKNOWN, (int) V16SF_FTYPE_V16SF },
30195 { OPTION_MASK_ISA_AVX512F, CODE_FOR_roundv8df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix512", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF },
30196 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_floorpd_vec_pack_sfix512", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
30197 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
30198
30199 /* Mask arithmetic operations */
30200 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andhi3, "__builtin_ia32_kandhi", IX86_BUILTIN_KAND16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30201 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kandnhi, "__builtin_ia32_kandnhi", IX86_BUILTIN_KANDN16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30202 { OPTION_MASK_ISA_AVX512F, CODE_FOR_one_cmplhi2, "__builtin_ia32_knothi", IX86_BUILTIN_KNOT16, UNKNOWN, (int) HI_FTYPE_HI },
30203 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorhi3, "__builtin_ia32_korhi", IX86_BUILTIN_KOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30204 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestchi, "__builtin_ia32_kortestchi", IX86_BUILTIN_KORTESTC16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30205 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestzhi, "__builtin_ia32_kortestzhi", IX86_BUILTIN_KORTESTZ16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30206 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kunpckhi, "__builtin_ia32_kunpckhi", IX86_BUILTIN_KUNPCKBW, UNKNOWN, (int) HI_FTYPE_HI_HI },
30207 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kxnorhi, "__builtin_ia32_kxnorhi", IX86_BUILTIN_KXNOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30208 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorhi3, "__builtin_ia32_kxorhi", IX86_BUILTIN_KXOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30209 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kmovw, "__builtin_ia32_kmov16", IX86_BUILTIN_KMOV16, UNKNOWN, (int) HI_FTYPE_HI },
30210
30211 /* SHA */
30212 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg1, 0, IX86_BUILTIN_SHA1MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30213 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg2, 0, IX86_BUILTIN_SHA1MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30214 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1nexte, 0, IX86_BUILTIN_SHA1NEXTE, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30215 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1rnds4, 0, IX86_BUILTIN_SHA1RNDS4, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
30216 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg1, 0, IX86_BUILTIN_SHA256MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30217 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg2, 0, IX86_BUILTIN_SHA256MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30218 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256rnds2, 0, IX86_BUILTIN_SHA256RNDS2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
30219 };
30220
30221 /* Builtins with rounding support. */
30222 static const struct builtin_description bdesc_round_args[] =
30223 {
30224 /* AVX512F */
30225 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8df3_mask_round, "__builtin_ia32_addpd512_mask", IX86_BUILTIN_ADDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30226 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16sf3_mask_round, "__builtin_ia32_addps512_mask", IX86_BUILTIN_ADDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30227 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmaddv2df3_round, "__builtin_ia32_addsd_round", IX86_BUILTIN_ADDSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30228 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmaddv4sf3_round, "__builtin_ia32_addss_round", IX86_BUILTIN_ADDSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30229 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8df3_mask_round, "__builtin_ia32_cmppd512_mask", IX86_BUILTIN_CMPPD512, UNKNOWN, (int) QI_FTYPE_V8DF_V8DF_INT_QI_INT },
30230 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16sf3_mask_round, "__builtin_ia32_cmpps512_mask", IX86_BUILTIN_CMPPS512, UNKNOWN, (int) HI_FTYPE_V16SF_V16SF_INT_HI_INT },
30231 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv2df3_mask_round, "__builtin_ia32_cmpsd_mask", IX86_BUILTIN_CMPSD_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI_INT },
30232 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv4sf3_mask_round, "__builtin_ia32_cmpss_mask", IX86_BUILTIN_CMPSS_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI_INT },
30233 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_comi_round, "__builtin_ia32_vcomisd", IX86_BUILTIN_COMIDF, UNKNOWN, (int) INT_FTYPE_V2DF_V2DF_INT_INT },
30234 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_comi_round, "__builtin_ia32_vcomiss", IX86_BUILTIN_COMISF, UNKNOWN, (int) INT_FTYPE_V4SF_V4SF_INT_INT },
30235 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv16siv16sf2_mask_round, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
30236 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2dq512_mask_round, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30237 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2ps512_mask_round, "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT },
30238 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv8dfv8si_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30239 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtph2ps512_mask_round, "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT },
30240 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30241 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT },
30242 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30243 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2ss_round, "__builtin_ia32_cvtsd2ss_round", IX86_BUILTIN_CVTSD2SS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_INT },
30244 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq_round, "__builtin_ia32_cvtsi2sd64", IX86_BUILTIN_CVTSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT64_INT },
30245 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtsi2ss_round, "__builtin_ia32_cvtsi2ss32", IX86_BUILTIN_CVTSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_INT },
30246 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq_round, "__builtin_ia32_cvtsi2ss64", IX86_BUILTIN_CVTSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT64_INT },
30247 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtss2sd_round, "__builtin_ia32_cvtss2sd_round", IX86_BUILTIN_CVTSS2SD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_INT },
30248 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30249 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30250 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30251 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30252 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
30253 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2sd64_round, "__builtin_ia32_cvtusi2sd64", IX86_BUILTIN_CVTUSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT64_INT },
30254 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2ss32_round, "__builtin_ia32_cvtusi2ss32", IX86_BUILTIN_CVTUSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT_INT },
30255 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2ss64_round, "__builtin_ia32_cvtusi2ss64", IX86_BUILTIN_CVTUSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT64_INT },
30256 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv8df3_mask_round, "__builtin_ia32_divpd512_mask", IX86_BUILTIN_DIVPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30257 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv16sf3_mask_round, "__builtin_ia32_divps512_mask", IX86_BUILTIN_DIVPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30258 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmdivv2df3_round, "__builtin_ia32_divsd_round", IX86_BUILTIN_DIVSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30259 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmdivv4sf3_round, "__builtin_ia32_divss_round", IX86_BUILTIN_DIVSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30260 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_mask_round, "__builtin_ia32_fixupimmpd512_mask", IX86_BUILTIN_FIXUPIMMPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
30261 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_maskz_round, "__builtin_ia32_fixupimmpd512_maskz", IX86_BUILTIN_FIXUPIMMPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
30262 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_mask_round, "__builtin_ia32_fixupimmps512_mask", IX86_BUILTIN_FIXUPIMMPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
30263 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_maskz_round, "__builtin_ia32_fixupimmps512_maskz", IX86_BUILTIN_FIXUPIMMPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
30264 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_mask_round, "__builtin_ia32_fixupimmsd_mask", IX86_BUILTIN_FIXUPIMMSD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
30265 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_maskz_round, "__builtin_ia32_fixupimmsd_maskz", IX86_BUILTIN_FIXUPIMMSD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
30266 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_mask_round, "__builtin_ia32_fixupimmss_mask", IX86_BUILTIN_FIXUPIMMSS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
30267 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_maskz_round, "__builtin_ia32_fixupimmss_maskz", IX86_BUILTIN_FIXUPIMMSS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
30268 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv8df_mask_round, "__builtin_ia32_getexppd512_mask", IX86_BUILTIN_GETEXPPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30269 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv16sf_mask_round, "__builtin_ia32_getexpps512_mask", IX86_BUILTIN_GETEXPPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30270 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv2df_round, "__builtin_ia32_getexpsd128_round", IX86_BUILTIN_GETEXPSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30271 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv4sf_round, "__builtin_ia32_getexpss128_round", IX86_BUILTIN_GETEXPSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30272 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv8df_mask_round, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
30273 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv16sf_mask_round, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
30274 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv2df_round, "__builtin_ia32_getmantsd_round", IX86_BUILTIN_GETMANTSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
30275 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv4sf_round, "__builtin_ia32_getmantss_round", IX86_BUILTIN_GETMANTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
30276 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30277 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30278 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsmaxv2df3_round, "__builtin_ia32_maxsd_round", IX86_BUILTIN_MAXSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30279 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsmaxv4sf3_round, "__builtin_ia32_maxss_round", IX86_BUILTIN_MAXSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30280 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8df3_mask_round, "__builtin_ia32_minpd512_mask", IX86_BUILTIN_MINPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30281 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16sf3_mask_round, "__builtin_ia32_minps512_mask", IX86_BUILTIN_MINPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30282 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsminv2df3_round, "__builtin_ia32_minsd_round", IX86_BUILTIN_MINSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30283 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsminv4sf3_round, "__builtin_ia32_minss_round", IX86_BUILTIN_MINSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30284 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv8df3_mask_round, "__builtin_ia32_mulpd512_mask", IX86_BUILTIN_MULPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30285 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16sf3_mask_round, "__builtin_ia32_mulps512_mask", IX86_BUILTIN_MULPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30286 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmmulv2df3_round, "__builtin_ia32_mulsd_round", IX86_BUILTIN_MULSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30287 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmmulv4sf3_round, "__builtin_ia32_mulss_round", IX86_BUILTIN_MULSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30288 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev8df_mask_round, "__builtin_ia32_rndscalepd_mask", IX86_BUILTIN_RNDSCALEPD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
30289 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev16sf_mask_round, "__builtin_ia32_rndscaleps_mask", IX86_BUILTIN_RNDSCALEPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
30290 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev2df_round, "__builtin_ia32_rndscalesd_round", IX86_BUILTIN_RNDSCALESD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
30291 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev4sf_round, "__builtin_ia32_rndscaless_round", IX86_BUILTIN_RNDSCALESS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
30292 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv8df_mask_round, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30293 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv16sf_mask_round, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30294 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv2df_round, "__builtin_ia32_scalefsd_round", IX86_BUILTIN_SCALEFSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30295 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv4sf_round, "__builtin_ia32_scalefss_round", IX86_BUILTIN_SCALEFSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30296 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2_mask_round, "__builtin_ia32_sqrtpd512_mask", IX86_BUILTIN_SQRTPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30297 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv16sf2_mask_round, "__builtin_ia32_sqrtps512_mask", IX86_BUILTIN_SQRTPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30298 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsqrtv2df2_round, "__builtin_ia32_sqrtsd_round", IX86_BUILTIN_SQRTSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30299 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsqrtv4sf2_round, "__builtin_ia32_sqrtss_round", IX86_BUILTIN_SQRTSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30300 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8df3_mask_round, "__builtin_ia32_subpd512_mask", IX86_BUILTIN_SUBPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30301 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16sf3_mask_round, "__builtin_ia32_subps512_mask", IX86_BUILTIN_SUBPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30302 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsubv2df3_round, "__builtin_ia32_subsd_round", IX86_BUILTIN_SUBSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30303 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsubv4sf3_round, "__builtin_ia32_subss_round", IX86_BUILTIN_SUBSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30304 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2si_round, "__builtin_ia32_vcvtsd2si32", IX86_BUILTIN_VCVTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
30305 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq_round, "__builtin_ia32_vcvtsd2si64", IX86_BUILTIN_VCVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
30306 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtsd2usi_round, "__builtin_ia32_vcvtsd2usi32", IX86_BUILTIN_VCVTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
30307 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtsd2usiq_round, "__builtin_ia32_vcvtsd2usi64", IX86_BUILTIN_VCVTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
30308 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtss2si_round, "__builtin_ia32_vcvtss2si32", IX86_BUILTIN_VCVTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
30309 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq_round, "__builtin_ia32_vcvtss2si64", IX86_BUILTIN_VCVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
30310 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtss2usi_round, "__builtin_ia32_vcvtss2usi32", IX86_BUILTIN_VCVTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
30311 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtss2usiq_round, "__builtin_ia32_vcvtss2usi64", IX86_BUILTIN_VCVTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
30312 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvttsd2si_round, "__builtin_ia32_vcvttsd2si32", IX86_BUILTIN_VCVTTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
30313 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq_round, "__builtin_ia32_vcvttsd2si64", IX86_BUILTIN_VCVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
30314 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttsd2usi_round, "__builtin_ia32_vcvttsd2usi32", IX86_BUILTIN_VCVTTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
30315 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttsd2usiq_round, "__builtin_ia32_vcvttsd2usi64", IX86_BUILTIN_VCVTTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
30316 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvttss2si_round, "__builtin_ia32_vcvttss2si32", IX86_BUILTIN_VCVTTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
30317 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq_round, "__builtin_ia32_vcvttss2si64", IX86_BUILTIN_VCVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
30318 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttss2usi_round, "__builtin_ia32_vcvttss2usi32", IX86_BUILTIN_VCVTTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
30319 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttss2usiq_round, "__builtin_ia32_vcvttss2usi64", IX86_BUILTIN_VCVTTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
30320 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask_round, "__builtin_ia32_vfmaddpd512_mask", IX86_BUILTIN_VFMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30321 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask3_round, "__builtin_ia32_vfmaddpd512_mask3", IX86_BUILTIN_VFMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30322 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_maskz_round, "__builtin_ia32_vfmaddpd512_maskz", IX86_BUILTIN_VFMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30323 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask_round, "__builtin_ia32_vfmaddps512_mask", IX86_BUILTIN_VFMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30324 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask3_round, "__builtin_ia32_vfmaddps512_mask3", IX86_BUILTIN_VFMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30325 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_maskz_round, "__builtin_ia32_vfmaddps512_maskz", IX86_BUILTIN_VFMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30326 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v2df_round, "__builtin_ia32_vfmaddsd3_round", IX86_BUILTIN_VFMADDSD3_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_INT },
30327 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v4sf_round, "__builtin_ia32_vfmaddss3_round", IX86_BUILTIN_VFMADDSS3_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_INT },
30328 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask_round, "__builtin_ia32_vfmaddsubpd512_mask", IX86_BUILTIN_VFMADDSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30329 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask3_round, "__builtin_ia32_vfmaddsubpd512_mask3", IX86_BUILTIN_VFMADDSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30330 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_maskz_round, "__builtin_ia32_vfmaddsubpd512_maskz", IX86_BUILTIN_VFMADDSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30331 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask_round, "__builtin_ia32_vfmaddsubps512_mask", IX86_BUILTIN_VFMADDSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30332 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask3_round, "__builtin_ia32_vfmaddsubps512_mask3", IX86_BUILTIN_VFMADDSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30333 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_maskz_round, "__builtin_ia32_vfmaddsubps512_maskz", IX86_BUILTIN_VFMADDSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30334 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v8df_mask3_round, "__builtin_ia32_vfmsubaddpd512_mask3", IX86_BUILTIN_VFMSUBADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30335 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v16sf_mask3_round, "__builtin_ia32_vfmsubaddps512_mask3", IX86_BUILTIN_VFMSUBADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30336 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v8df_mask3_round, "__builtin_ia32_vfmsubpd512_mask3", IX86_BUILTIN_VFMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30337 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30338 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v8df_mask_round, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30339 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30340 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30341 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30342 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30343 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask3_round, "__builtin_ia32_vfnmsubps512_mask3", IX86_BUILTIN_VFNMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30344
30345 /* AVX512ER */
30346 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v8df_mask_round, "__builtin_ia32_exp2pd_mask", IX86_BUILTIN_EXP2PD_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30347 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf_mask_round, "__builtin_ia32_exp2ps_mask", IX86_BUILTIN_EXP2PS_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30348 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v8df_mask_round, "__builtin_ia32_rcp28pd_mask", IX86_BUILTIN_RCP28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30349 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v16sf_mask_round, "__builtin_ia32_rcp28ps_mask", IX86_BUILTIN_RCP28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30350 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v2df_round, "__builtin_ia32_rcp28sd_round", IX86_BUILTIN_RCP28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30351 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v4sf_round, "__builtin_ia32_rcp28ss_round", IX86_BUILTIN_RCP28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30352 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v8df_mask_round, "__builtin_ia32_rsqrt28pd_mask", IX86_BUILTIN_RSQRT28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30353 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v16sf_mask_round, "__builtin_ia32_rsqrt28ps_mask", IX86_BUILTIN_RSQRT28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30354 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v2df_round, "__builtin_ia32_rsqrt28sd_round", IX86_BUILTIN_RSQRT28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30355 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v4sf_round, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30356 };
30357
30358 /* FMA4 and XOP. */
30359 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
30360 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
30361 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
30362 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
30363 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
30364 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
30365 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
30366 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
30367 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
30368 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
30369 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
30370 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
30371 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
30372 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
30373 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
30374 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
30375 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
30376 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
30377 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
30378 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
30379 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
30380 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
30381 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
30382 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
30383 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
30384 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
30385 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
30386 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
30387 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
30388 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
30389 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
30390 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
30391 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
30392 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
30393 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
30394 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
30395 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
30396 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
30397 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
30398 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
30399 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
30400 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
30401 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
30402 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
30403 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
30404 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
30405 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
30406 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
30407 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
30408 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
30409 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
30410 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
30411
30412 static const struct builtin_description bdesc_multi_arg[] =
30413 {
30414 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
30415 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
30416 UNKNOWN, (int)MULTI_ARG_3_SF },
30417 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
30418 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
30419 UNKNOWN, (int)MULTI_ARG_3_DF },
30420
30421 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf,
30422 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3,
30423 UNKNOWN, (int)MULTI_ARG_3_SF },
30424 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df,
30425 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3,
30426 UNKNOWN, (int)MULTI_ARG_3_DF },
30427
30428 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
30429 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
30430 UNKNOWN, (int)MULTI_ARG_3_SF },
30431 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
30432 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
30433 UNKNOWN, (int)MULTI_ARG_3_DF },
30434 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
30435 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
30436 UNKNOWN, (int)MULTI_ARG_3_SF2 },
30437 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
30438 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
30439 UNKNOWN, (int)MULTI_ARG_3_DF2 },
30440
30441 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
30442 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
30443 UNKNOWN, (int)MULTI_ARG_3_SF },
30444 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
30445 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
30446 UNKNOWN, (int)MULTI_ARG_3_DF },
30447 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
30448 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
30449 UNKNOWN, (int)MULTI_ARG_3_SF2 },
30450 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
30451 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
30452 UNKNOWN, (int)MULTI_ARG_3_DF2 },
30453
30454 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
30455 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
30456 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
30457 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
30458 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
30459 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
30460 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
30461
30462 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
30463 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
30464 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
30465 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
30466 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
30467 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
30468 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
30469
30470 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
30471
30472 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
30473 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
30474 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30475 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30476 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
30477 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
30478 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30479 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30480 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30481 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30482 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30483 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30484
30485 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
30486 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
30487 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
30488 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
30489 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
30490 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
30491 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
30492 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
30493 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
30494 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
30495 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
30496 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
30497 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
30498 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
30499 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
30500 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
30501
30502 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_1_SF },
30503 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_1_DF },
30504 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
30505 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
30506 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
30507 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
30508
30509 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
30510 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
30511 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
30512 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
30513 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
30514 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
30515 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
30516 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
30517 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
30518 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
30519 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
30520 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
30521 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
30522 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
30523 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
30524
30525 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
30526 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
30527 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
30528 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
30529 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
30530 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
30531 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
30532
30533 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
30534 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
30535 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
30536 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
30537 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
30538 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
30539 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
30540
30541 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
30542 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
30543 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
30544 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
30545 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
30546 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
30547 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
30548
30549 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
30550 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
30551 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
30552 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
30553 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
30554 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
30555 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
30556
30557 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
30558 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
30559 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
30560 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
30561 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
30562 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
30563 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
30564
30565 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
30566 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
30567 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
30568 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
30569 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
30570 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
30571 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
30572
30573 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
30574 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
30575 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
30576 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
30577 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
30578 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
30579 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
30580
30581 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
30582 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
30583 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
30584 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
30585 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
30586 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
30587 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
30588
30589 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
30590 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
30591 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
30592 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
30593 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
30594 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
30595 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
30596 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
30597
30598 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
30599 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
30600 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
30601 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
30602 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
30603 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
30604 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
30605 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
30606
30607 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
30608 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
30609 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
30610 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
30611
30612 };
30613 \f
30614 /* TM vector builtins. */
30615
30616 /* Reuse the existing x86-specific `struct builtin_description' cause
30617 we're lazy. Add casts to make them fit. */
30618 static const struct builtin_description bdesc_tm[] =
30619 {
30620 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30621 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30622 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30623 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30624 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30625 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30626 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30627
30628 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30629 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30630 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30631 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30632 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30633 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30634 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30635
30636 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30637 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30638 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30639 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30640 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30641 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30642 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30643
30644 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
30645 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
30646 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
30647 };
30648
30649 /* TM callbacks. */
30650
30651 /* Return the builtin decl needed to load a vector of TYPE. */
30652
30653 static tree
30654 ix86_builtin_tm_load (tree type)
30655 {
30656 if (TREE_CODE (type) == VECTOR_TYPE)
30657 {
30658 switch (tree_to_uhwi (TYPE_SIZE (type)))
30659 {
30660 case 64:
30661 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64);
30662 case 128:
30663 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128);
30664 case 256:
30665 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256);
30666 }
30667 }
30668 return NULL_TREE;
30669 }
30670
30671 /* Return the builtin decl needed to store a vector of TYPE. */
30672
30673 static tree
30674 ix86_builtin_tm_store (tree type)
30675 {
30676 if (TREE_CODE (type) == VECTOR_TYPE)
30677 {
30678 switch (tree_to_uhwi (TYPE_SIZE (type)))
30679 {
30680 case 64:
30681 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64);
30682 case 128:
30683 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128);
30684 case 256:
30685 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256);
30686 }
30687 }
30688 return NULL_TREE;
30689 }
30690 \f
30691 /* Initialize the transactional memory vector load/store builtins. */
30692
30693 static void
30694 ix86_init_tm_builtins (void)
30695 {
30696 enum ix86_builtin_func_type ftype;
30697 const struct builtin_description *d;
30698 size_t i;
30699 tree decl;
30700 tree attrs_load, attrs_type_load, attrs_store, attrs_type_store;
30701 tree attrs_log, attrs_type_log;
30702
30703 if (!flag_tm)
30704 return;
30705
30706 /* If there are no builtins defined, we must be compiling in a
30707 language without trans-mem support. */
30708 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1))
30709 return;
30710
30711 /* Use whatever attributes a normal TM load has. */
30712 decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
30713 attrs_load = DECL_ATTRIBUTES (decl);
30714 attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30715 /* Use whatever attributes a normal TM store has. */
30716 decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1);
30717 attrs_store = DECL_ATTRIBUTES (decl);
30718 attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30719 /* Use whatever attributes a normal TM log has. */
30720 decl = builtin_decl_explicit (BUILT_IN_TM_LOG);
30721 attrs_log = DECL_ATTRIBUTES (decl);
30722 attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30723
30724 for (i = 0, d = bdesc_tm;
30725 i < ARRAY_SIZE (bdesc_tm);
30726 i++, d++)
30727 {
30728 if ((d->mask & ix86_isa_flags) != 0
30729 || (lang_hooks.builtin_function
30730 == lang_hooks.builtin_function_ext_scope))
30731 {
30732 tree type, attrs, attrs_type;
30733 enum built_in_function code = (enum built_in_function) d->code;
30734
30735 ftype = (enum ix86_builtin_func_type) d->flag;
30736 type = ix86_get_builtin_func_type (ftype);
30737
30738 if (BUILTIN_TM_LOAD_P (code))
30739 {
30740 attrs = attrs_load;
30741 attrs_type = attrs_type_load;
30742 }
30743 else if (BUILTIN_TM_STORE_P (code))
30744 {
30745 attrs = attrs_store;
30746 attrs_type = attrs_type_store;
30747 }
30748 else
30749 {
30750 attrs = attrs_log;
30751 attrs_type = attrs_type_log;
30752 }
30753 decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL,
30754 /* The builtin without the prefix for
30755 calling it directly. */
30756 d->name + strlen ("__builtin_"),
30757 attrs);
30758 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
30759 set the TYPE_ATTRIBUTES. */
30760 decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN);
30761
30762 set_builtin_decl (code, decl, false);
30763 }
30764 }
30765 }
30766
30767 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
30768 in the current target ISA to allow the user to compile particular modules
30769 with different target specific options that differ from the command line
30770 options. */
30771 static void
30772 ix86_init_mmx_sse_builtins (void)
30773 {
30774 const struct builtin_description * d;
30775 enum ix86_builtin_func_type ftype;
30776 size_t i;
30777
30778 /* Add all special builtins with variable number of operands. */
30779 for (i = 0, d = bdesc_special_args;
30780 i < ARRAY_SIZE (bdesc_special_args);
30781 i++, d++)
30782 {
30783 if (d->name == 0)
30784 continue;
30785
30786 ftype = (enum ix86_builtin_func_type) d->flag;
30787 def_builtin (d->mask, d->name, ftype, d->code);
30788 }
30789
30790 /* Add all builtins with variable number of operands. */
30791 for (i = 0, d = bdesc_args;
30792 i < ARRAY_SIZE (bdesc_args);
30793 i++, d++)
30794 {
30795 if (d->name == 0)
30796 continue;
30797
30798 ftype = (enum ix86_builtin_func_type) d->flag;
30799 def_builtin_const (d->mask, d->name, ftype, d->code);
30800 }
30801
30802 /* Add all builtins with rounding. */
30803 for (i = 0, d = bdesc_round_args;
30804 i < ARRAY_SIZE (bdesc_round_args);
30805 i++, d++)
30806 {
30807 if (d->name == 0)
30808 continue;
30809
30810 ftype = (enum ix86_builtin_func_type) d->flag;
30811 def_builtin_const (d->mask, d->name, ftype, d->code);
30812 }
30813
30814 /* pcmpestr[im] insns. */
30815 for (i = 0, d = bdesc_pcmpestr;
30816 i < ARRAY_SIZE (bdesc_pcmpestr);
30817 i++, d++)
30818 {
30819 if (d->code == IX86_BUILTIN_PCMPESTRM128)
30820 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
30821 else
30822 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
30823 def_builtin_const (d->mask, d->name, ftype, d->code);
30824 }
30825
30826 /* pcmpistr[im] insns. */
30827 for (i = 0, d = bdesc_pcmpistr;
30828 i < ARRAY_SIZE (bdesc_pcmpistr);
30829 i++, d++)
30830 {
30831 if (d->code == IX86_BUILTIN_PCMPISTRM128)
30832 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
30833 else
30834 ftype = INT_FTYPE_V16QI_V16QI_INT;
30835 def_builtin_const (d->mask, d->name, ftype, d->code);
30836 }
30837
30838 /* comi/ucomi insns. */
30839 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
30840 {
30841 if (d->mask == OPTION_MASK_ISA_SSE2)
30842 ftype = INT_FTYPE_V2DF_V2DF;
30843 else
30844 ftype = INT_FTYPE_V4SF_V4SF;
30845 def_builtin_const (d->mask, d->name, ftype, d->code);
30846 }
30847
30848 /* SSE */
30849 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
30850 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
30851 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
30852 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
30853
30854 /* SSE or 3DNow!A */
30855 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
30856 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
30857 IX86_BUILTIN_MASKMOVQ);
30858
30859 /* SSE2 */
30860 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
30861 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
30862
30863 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
30864 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
30865 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
30866 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
30867
30868 /* SSE3. */
30869 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
30870 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
30871 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
30872 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
30873
30874 /* AES */
30875 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
30876 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
30877 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
30878 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
30879 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
30880 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
30881 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
30882 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
30883 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
30884 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
30885 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
30886 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
30887
30888 /* PCLMUL */
30889 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
30890 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
30891
30892 /* RDRND */
30893 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
30894 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
30895 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
30896 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
30897 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
30898 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
30899 IX86_BUILTIN_RDRAND64_STEP);
30900
30901 /* AVX2 */
30902 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
30903 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
30904 IX86_BUILTIN_GATHERSIV2DF);
30905
30906 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
30907 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
30908 IX86_BUILTIN_GATHERSIV4DF);
30909
30910 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
30911 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
30912 IX86_BUILTIN_GATHERDIV2DF);
30913
30914 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
30915 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
30916 IX86_BUILTIN_GATHERDIV4DF);
30917
30918 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
30919 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
30920 IX86_BUILTIN_GATHERSIV4SF);
30921
30922 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
30923 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
30924 IX86_BUILTIN_GATHERSIV8SF);
30925
30926 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
30927 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
30928 IX86_BUILTIN_GATHERDIV4SF);
30929
30930 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
30931 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
30932 IX86_BUILTIN_GATHERDIV8SF);
30933
30934 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
30935 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
30936 IX86_BUILTIN_GATHERSIV2DI);
30937
30938 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
30939 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
30940 IX86_BUILTIN_GATHERSIV4DI);
30941
30942 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
30943 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
30944 IX86_BUILTIN_GATHERDIV2DI);
30945
30946 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
30947 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
30948 IX86_BUILTIN_GATHERDIV4DI);
30949
30950 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
30951 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
30952 IX86_BUILTIN_GATHERSIV4SI);
30953
30954 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
30955 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
30956 IX86_BUILTIN_GATHERSIV8SI);
30957
30958 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
30959 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
30960 IX86_BUILTIN_GATHERDIV4SI);
30961
30962 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
30963 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
30964 IX86_BUILTIN_GATHERDIV8SI);
30965
30966 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ",
30967 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
30968 IX86_BUILTIN_GATHERALTSIV4DF);
30969
30970 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ",
30971 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
30972 IX86_BUILTIN_GATHERALTDIV8SF);
30973
30974 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ",
30975 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
30976 IX86_BUILTIN_GATHERALTSIV4DI);
30977
30978 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ",
30979 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
30980 IX86_BUILTIN_GATHERALTDIV8SI);
30981
30982 /* AVX512F */
30983 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16sf",
30984 V16SF_FTYPE_V16SF_PCFLOAT_V16SI_HI_INT,
30985 IX86_BUILTIN_GATHER3SIV16SF);
30986
30987 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8df",
30988 V8DF_FTYPE_V8DF_PCDOUBLE_V8SI_QI_INT,
30989 IX86_BUILTIN_GATHER3SIV8DF);
30990
30991 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16sf",
30992 V8SF_FTYPE_V8SF_PCFLOAT_V8DI_QI_INT,
30993 IX86_BUILTIN_GATHER3DIV16SF);
30994
30995 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8df",
30996 V8DF_FTYPE_V8DF_PCDOUBLE_V8DI_QI_INT,
30997 IX86_BUILTIN_GATHER3DIV8DF);
30998
30999 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16si",
31000 V16SI_FTYPE_V16SI_PCINT_V16SI_HI_INT,
31001 IX86_BUILTIN_GATHER3SIV16SI);
31002
31003 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8di",
31004 V8DI_FTYPE_V8DI_PCINT64_V8SI_QI_INT,
31005 IX86_BUILTIN_GATHER3SIV8DI);
31006
31007 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16si",
31008 V8SI_FTYPE_V8SI_PCINT_V8DI_QI_INT,
31009 IX86_BUILTIN_GATHER3DIV16SI);
31010
31011 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8di",
31012 V8DI_FTYPE_V8DI_PCINT64_V8DI_QI_INT,
31013 IX86_BUILTIN_GATHER3DIV8DI);
31014
31015 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8df ",
31016 V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT,
31017 IX86_BUILTIN_GATHER3ALTSIV8DF);
31018
31019 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8sf ",
31020 V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT,
31021 IX86_BUILTIN_GATHER3ALTDIV16SF);
31022
31023 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8di ",
31024 V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT,
31025 IX86_BUILTIN_GATHER3ALTSIV8DI);
31026
31027 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8si ",
31028 V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT,
31029 IX86_BUILTIN_GATHER3ALTDIV16SI);
31030
31031 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16sf",
31032 VOID_FTYPE_PFLOAT_HI_V16SI_V16SF_INT,
31033 IX86_BUILTIN_SCATTERSIV16SF);
31034
31035 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8df",
31036 VOID_FTYPE_PDOUBLE_QI_V8SI_V8DF_INT,
31037 IX86_BUILTIN_SCATTERSIV8DF);
31038
31039 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16sf",
31040 VOID_FTYPE_PFLOAT_QI_V8DI_V8SF_INT,
31041 IX86_BUILTIN_SCATTERDIV16SF);
31042
31043 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8df",
31044 VOID_FTYPE_PDOUBLE_QI_V8DI_V8DF_INT,
31045 IX86_BUILTIN_SCATTERDIV8DF);
31046
31047 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16si",
31048 VOID_FTYPE_PINT_HI_V16SI_V16SI_INT,
31049 IX86_BUILTIN_SCATTERSIV16SI);
31050
31051 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8di",
31052 VOID_FTYPE_PLONGLONG_QI_V8SI_V8DI_INT,
31053 IX86_BUILTIN_SCATTERSIV8DI);
31054
31055 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16si",
31056 VOID_FTYPE_PINT_QI_V8DI_V8SI_INT,
31057 IX86_BUILTIN_SCATTERDIV16SI);
31058
31059 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8di",
31060 VOID_FTYPE_PLONGLONG_QI_V8DI_V8DI_INT,
31061 IX86_BUILTIN_SCATTERDIV8DI);
31062
31063 /* AVX512PF */
31064 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdpd",
31065 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
31066 IX86_BUILTIN_GATHERPFDPD);
31067 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdps",
31068 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
31069 IX86_BUILTIN_GATHERPFDPS);
31070 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqpd",
31071 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
31072 IX86_BUILTIN_GATHERPFQPD);
31073 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqps",
31074 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
31075 IX86_BUILTIN_GATHERPFQPS);
31076 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdpd",
31077 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
31078 IX86_BUILTIN_SCATTERPFDPD);
31079 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdps",
31080 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
31081 IX86_BUILTIN_SCATTERPFDPS);
31082 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqpd",
31083 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
31084 IX86_BUILTIN_SCATTERPFQPD);
31085 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqps",
31086 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
31087 IX86_BUILTIN_SCATTERPFQPS);
31088
31089 /* SHA */
31090 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg1",
31091 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG1);
31092 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg2",
31093 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG2);
31094 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1nexte",
31095 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1NEXTE);
31096 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1rnds4",
31097 V4SI_FTYPE_V4SI_V4SI_INT, IX86_BUILTIN_SHA1RNDS4);
31098 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg1",
31099 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG1);
31100 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg2",
31101 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG2);
31102 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256rnds2",
31103 V4SI_FTYPE_V4SI_V4SI_V4SI, IX86_BUILTIN_SHA256RNDS2);
31104
31105 /* RTM. */
31106 def_builtin (OPTION_MASK_ISA_RTM, "__builtin_ia32_xabort",
31107 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT);
31108
31109 /* MMX access to the vec_init patterns. */
31110 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
31111 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
31112
31113 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
31114 V4HI_FTYPE_HI_HI_HI_HI,
31115 IX86_BUILTIN_VEC_INIT_V4HI);
31116
31117 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
31118 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
31119 IX86_BUILTIN_VEC_INIT_V8QI);
31120
31121 /* Access to the vec_extract patterns. */
31122 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
31123 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
31124 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
31125 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
31126 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
31127 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
31128 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
31129 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
31130 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
31131 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
31132
31133 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
31134 "__builtin_ia32_vec_ext_v4hi",
31135 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
31136
31137 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
31138 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
31139
31140 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
31141 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
31142
31143 /* Access to the vec_set patterns. */
31144 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
31145 "__builtin_ia32_vec_set_v2di",
31146 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
31147
31148 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
31149 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
31150
31151 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
31152 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
31153
31154 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
31155 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
31156
31157 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
31158 "__builtin_ia32_vec_set_v4hi",
31159 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
31160
31161 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
31162 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
31163
31164 /* RDSEED */
31165 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_hi_step",
31166 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDSEED16_STEP);
31167 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_si_step",
31168 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDSEED32_STEP);
31169 def_builtin (OPTION_MASK_ISA_RDSEED | OPTION_MASK_ISA_64BIT,
31170 "__builtin_ia32_rdseed_di_step",
31171 INT_FTYPE_PULONGLONG, IX86_BUILTIN_RDSEED64_STEP);
31172
31173 /* ADCX */
31174 def_builtin (0, "__builtin_ia32_addcarryx_u32",
31175 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_ADDCARRYX32);
31176 def_builtin (OPTION_MASK_ISA_64BIT,
31177 "__builtin_ia32_addcarryx_u64",
31178 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
31179 IX86_BUILTIN_ADDCARRYX64);
31180
31181 /* Read/write FLAGS. */
31182 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u32",
31183 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
31184 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u64",
31185 UINT64_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
31186 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u32",
31187 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_WRITE_FLAGS);
31188 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u64",
31189 VOID_FTYPE_UINT64, IX86_BUILTIN_WRITE_FLAGS);
31190
31191 /* CLFLUSHOPT. */
31192 def_builtin (OPTION_MASK_ISA_CLFLUSHOPT, "__builtin_ia32_clflushopt",
31193 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSHOPT);
31194
31195 /* Add FMA4 multi-arg argument instructions */
31196 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
31197 {
31198 if (d->name == 0)
31199 continue;
31200
31201 ftype = (enum ix86_builtin_func_type) d->flag;
31202 def_builtin_const (d->mask, d->name, ftype, d->code);
31203 }
31204 }
31205
31206 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
31207 to return a pointer to VERSION_DECL if the outcome of the expression
31208 formed by PREDICATE_CHAIN is true. This function will be called during
31209 version dispatch to decide which function version to execute. It returns
31210 the basic block at the end, to which more conditions can be added. */
31211
31212 static basic_block
31213 add_condition_to_bb (tree function_decl, tree version_decl,
31214 tree predicate_chain, basic_block new_bb)
31215 {
31216 gimple return_stmt;
31217 tree convert_expr, result_var;
31218 gimple convert_stmt;
31219 gimple call_cond_stmt;
31220 gimple if_else_stmt;
31221
31222 basic_block bb1, bb2, bb3;
31223 edge e12, e23;
31224
31225 tree cond_var, and_expr_var = NULL_TREE;
31226 gimple_seq gseq;
31227
31228 tree predicate_decl, predicate_arg;
31229
31230 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
31231
31232 gcc_assert (new_bb != NULL);
31233 gseq = bb_seq (new_bb);
31234
31235
31236 convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
31237 build_fold_addr_expr (version_decl));
31238 result_var = create_tmp_var (ptr_type_node, NULL);
31239 convert_stmt = gimple_build_assign (result_var, convert_expr);
31240 return_stmt = gimple_build_return (result_var);
31241
31242 if (predicate_chain == NULL_TREE)
31243 {
31244 gimple_seq_add_stmt (&gseq, convert_stmt);
31245 gimple_seq_add_stmt (&gseq, return_stmt);
31246 set_bb_seq (new_bb, gseq);
31247 gimple_set_bb (convert_stmt, new_bb);
31248 gimple_set_bb (return_stmt, new_bb);
31249 pop_cfun ();
31250 return new_bb;
31251 }
31252
31253 while (predicate_chain != NULL)
31254 {
31255 cond_var = create_tmp_var (integer_type_node, NULL);
31256 predicate_decl = TREE_PURPOSE (predicate_chain);
31257 predicate_arg = TREE_VALUE (predicate_chain);
31258 call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
31259 gimple_call_set_lhs (call_cond_stmt, cond_var);
31260
31261 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
31262 gimple_set_bb (call_cond_stmt, new_bb);
31263 gimple_seq_add_stmt (&gseq, call_cond_stmt);
31264
31265 predicate_chain = TREE_CHAIN (predicate_chain);
31266
31267 if (and_expr_var == NULL)
31268 and_expr_var = cond_var;
31269 else
31270 {
31271 gimple assign_stmt;
31272 /* Use MIN_EXPR to check if any integer is zero?.
31273 and_expr_var = min_expr <cond_var, and_expr_var> */
31274 assign_stmt = gimple_build_assign (and_expr_var,
31275 build2 (MIN_EXPR, integer_type_node,
31276 cond_var, and_expr_var));
31277
31278 gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
31279 gimple_set_bb (assign_stmt, new_bb);
31280 gimple_seq_add_stmt (&gseq, assign_stmt);
31281 }
31282 }
31283
31284 if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
31285 integer_zero_node,
31286 NULL_TREE, NULL_TREE);
31287 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
31288 gimple_set_bb (if_else_stmt, new_bb);
31289 gimple_seq_add_stmt (&gseq, if_else_stmt);
31290
31291 gimple_seq_add_stmt (&gseq, convert_stmt);
31292 gimple_seq_add_stmt (&gseq, return_stmt);
31293 set_bb_seq (new_bb, gseq);
31294
31295 bb1 = new_bb;
31296 e12 = split_block (bb1, if_else_stmt);
31297 bb2 = e12->dest;
31298 e12->flags &= ~EDGE_FALLTHRU;
31299 e12->flags |= EDGE_TRUE_VALUE;
31300
31301 e23 = split_block (bb2, return_stmt);
31302
31303 gimple_set_bb (convert_stmt, bb2);
31304 gimple_set_bb (return_stmt, bb2);
31305
31306 bb3 = e23->dest;
31307 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
31308
31309 remove_edge (e23);
31310 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
31311
31312 pop_cfun ();
31313
31314 return bb3;
31315 }
31316
31317 /* This parses the attribute arguments to target in DECL and determines
31318 the right builtin to use to match the platform specification.
31319 It returns the priority value for this version decl. If PREDICATE_LIST
31320 is not NULL, it stores the list of cpu features that need to be checked
31321 before dispatching this function. */
31322
31323 static unsigned int
31324 get_builtin_code_for_version (tree decl, tree *predicate_list)
31325 {
31326 tree attrs;
31327 struct cl_target_option cur_target;
31328 tree target_node;
31329 struct cl_target_option *new_target;
31330 const char *arg_str = NULL;
31331 const char *attrs_str = NULL;
31332 char *tok_str = NULL;
31333 char *token;
31334
31335 /* Priority of i386 features, greater value is higher priority. This is
31336 used to decide the order in which function dispatch must happen. For
31337 instance, a version specialized for SSE4.2 should be checked for dispatch
31338 before a version for SSE3, as SSE4.2 implies SSE3. */
31339 enum feature_priority
31340 {
31341 P_ZERO = 0,
31342 P_MMX,
31343 P_SSE,
31344 P_SSE2,
31345 P_SSE3,
31346 P_SSSE3,
31347 P_PROC_SSSE3,
31348 P_SSE4_A,
31349 P_PROC_SSE4_A,
31350 P_SSE4_1,
31351 P_SSE4_2,
31352 P_PROC_SSE4_2,
31353 P_POPCNT,
31354 P_AVX,
31355 P_PROC_AVX,
31356 P_FMA4,
31357 P_XOP,
31358 P_PROC_XOP,
31359 P_FMA,
31360 P_PROC_FMA,
31361 P_AVX2,
31362 P_PROC_AVX2
31363 };
31364
31365 enum feature_priority priority = P_ZERO;
31366
31367 /* These are the target attribute strings for which a dispatcher is
31368 available, from fold_builtin_cpu. */
31369
31370 static struct _feature_list
31371 {
31372 const char *const name;
31373 const enum feature_priority priority;
31374 }
31375 const feature_list[] =
31376 {
31377 {"mmx", P_MMX},
31378 {"sse", P_SSE},
31379 {"sse2", P_SSE2},
31380 {"sse3", P_SSE3},
31381 {"sse4a", P_SSE4_A},
31382 {"ssse3", P_SSSE3},
31383 {"sse4.1", P_SSE4_1},
31384 {"sse4.2", P_SSE4_2},
31385 {"popcnt", P_POPCNT},
31386 {"avx", P_AVX},
31387 {"fma4", P_FMA4},
31388 {"xop", P_XOP},
31389 {"fma", P_FMA},
31390 {"avx2", P_AVX2}
31391 };
31392
31393
31394 static unsigned int NUM_FEATURES
31395 = sizeof (feature_list) / sizeof (struct _feature_list);
31396
31397 unsigned int i;
31398
31399 tree predicate_chain = NULL_TREE;
31400 tree predicate_decl, predicate_arg;
31401
31402 attrs = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
31403 gcc_assert (attrs != NULL);
31404
31405 attrs = TREE_VALUE (TREE_VALUE (attrs));
31406
31407 gcc_assert (TREE_CODE (attrs) == STRING_CST);
31408 attrs_str = TREE_STRING_POINTER (attrs);
31409
31410 /* Return priority zero for default function. */
31411 if (strcmp (attrs_str, "default") == 0)
31412 return 0;
31413
31414 /* Handle arch= if specified. For priority, set it to be 1 more than
31415 the best instruction set the processor can handle. For instance, if
31416 there is a version for atom and a version for ssse3 (the highest ISA
31417 priority for atom), the atom version must be checked for dispatch
31418 before the ssse3 version. */
31419 if (strstr (attrs_str, "arch=") != NULL)
31420 {
31421 cl_target_option_save (&cur_target, &global_options);
31422 target_node = ix86_valid_target_attribute_tree (attrs, &global_options,
31423 &global_options_set);
31424
31425 gcc_assert (target_node);
31426 new_target = TREE_TARGET_OPTION (target_node);
31427 gcc_assert (new_target);
31428
31429 if (new_target->arch_specified && new_target->arch > 0)
31430 {
31431 switch (new_target->arch)
31432 {
31433 case PROCESSOR_CORE2:
31434 arg_str = "core2";
31435 priority = P_PROC_SSSE3;
31436 break;
31437 case PROCESSOR_NEHALEM:
31438 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AES)
31439 arg_str = "westmere";
31440 else
31441 /* We translate "arch=corei7" and "arch=nehalem" to
31442 "corei7" so that it will be mapped to M_INTEL_COREI7
31443 as cpu type to cover all M_INTEL_COREI7_XXXs. */
31444 arg_str = "corei7";
31445 priority = P_PROC_SSE4_2;
31446 break;
31447 case PROCESSOR_SANDYBRIDGE:
31448 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_F16C)
31449 arg_str = "ivybridge";
31450 else
31451 arg_str = "sandybridge";
31452 priority = P_PROC_AVX;
31453 break;
31454 case PROCESSOR_HASWELL:
31455 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_ADX)
31456 arg_str = "broadwell";
31457 else
31458 arg_str = "haswell";
31459 priority = P_PROC_AVX2;
31460 break;
31461 case PROCESSOR_BONNELL:
31462 arg_str = "bonnell";
31463 priority = P_PROC_SSSE3;
31464 break;
31465 case PROCESSOR_SILVERMONT:
31466 arg_str = "silvermont";
31467 priority = P_PROC_SSE4_2;
31468 break;
31469 case PROCESSOR_AMDFAM10:
31470 arg_str = "amdfam10h";
31471 priority = P_PROC_SSE4_A;
31472 break;
31473 case PROCESSOR_BTVER1:
31474 arg_str = "btver1";
31475 priority = P_PROC_SSE4_A;
31476 break;
31477 case PROCESSOR_BTVER2:
31478 arg_str = "btver2";
31479 priority = P_PROC_AVX;
31480 break;
31481 case PROCESSOR_BDVER1:
31482 arg_str = "bdver1";
31483 priority = P_PROC_XOP;
31484 break;
31485 case PROCESSOR_BDVER2:
31486 arg_str = "bdver2";
31487 priority = P_PROC_FMA;
31488 break;
31489 case PROCESSOR_BDVER3:
31490 arg_str = "bdver3";
31491 priority = P_PROC_FMA;
31492 break;
31493 case PROCESSOR_BDVER4:
31494 arg_str = "bdver4";
31495 priority = P_PROC_AVX2;
31496 break;
31497 }
31498 }
31499
31500 cl_target_option_restore (&global_options, &cur_target);
31501
31502 if (predicate_list && arg_str == NULL)
31503 {
31504 error_at (DECL_SOURCE_LOCATION (decl),
31505 "No dispatcher found for the versioning attributes");
31506 return 0;
31507 }
31508
31509 if (predicate_list)
31510 {
31511 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS];
31512 /* For a C string literal the length includes the trailing NULL. */
31513 predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
31514 predicate_chain = tree_cons (predicate_decl, predicate_arg,
31515 predicate_chain);
31516 }
31517 }
31518
31519 /* Process feature name. */
31520 tok_str = (char *) xmalloc (strlen (attrs_str) + 1);
31521 strcpy (tok_str, attrs_str);
31522 token = strtok (tok_str, ",");
31523 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_SUPPORTS];
31524
31525 while (token != NULL)
31526 {
31527 /* Do not process "arch=" */
31528 if (strncmp (token, "arch=", 5) == 0)
31529 {
31530 token = strtok (NULL, ",");
31531 continue;
31532 }
31533 for (i = 0; i < NUM_FEATURES; ++i)
31534 {
31535 if (strcmp (token, feature_list[i].name) == 0)
31536 {
31537 if (predicate_list)
31538 {
31539 predicate_arg = build_string_literal (
31540 strlen (feature_list[i].name) + 1,
31541 feature_list[i].name);
31542 predicate_chain = tree_cons (predicate_decl, predicate_arg,
31543 predicate_chain);
31544 }
31545 /* Find the maximum priority feature. */
31546 if (feature_list[i].priority > priority)
31547 priority = feature_list[i].priority;
31548
31549 break;
31550 }
31551 }
31552 if (predicate_list && i == NUM_FEATURES)
31553 {
31554 error_at (DECL_SOURCE_LOCATION (decl),
31555 "No dispatcher found for %s", token);
31556 return 0;
31557 }
31558 token = strtok (NULL, ",");
31559 }
31560 free (tok_str);
31561
31562 if (predicate_list && predicate_chain == NULL_TREE)
31563 {
31564 error_at (DECL_SOURCE_LOCATION (decl),
31565 "No dispatcher found for the versioning attributes : %s",
31566 attrs_str);
31567 return 0;
31568 }
31569 else if (predicate_list)
31570 {
31571 predicate_chain = nreverse (predicate_chain);
31572 *predicate_list = predicate_chain;
31573 }
31574
31575 return priority;
31576 }
31577
31578 /* This compares the priority of target features in function DECL1
31579 and DECL2. It returns positive value if DECL1 is higher priority,
31580 negative value if DECL2 is higher priority and 0 if they are the
31581 same. */
31582
31583 static int
31584 ix86_compare_version_priority (tree decl1, tree decl2)
31585 {
31586 unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
31587 unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
31588
31589 return (int)priority1 - (int)priority2;
31590 }
31591
31592 /* V1 and V2 point to function versions with different priorities
31593 based on the target ISA. This function compares their priorities. */
31594
31595 static int
31596 feature_compare (const void *v1, const void *v2)
31597 {
31598 typedef struct _function_version_info
31599 {
31600 tree version_decl;
31601 tree predicate_chain;
31602 unsigned int dispatch_priority;
31603 } function_version_info;
31604
31605 const function_version_info c1 = *(const function_version_info *)v1;
31606 const function_version_info c2 = *(const function_version_info *)v2;
31607 return (c2.dispatch_priority - c1.dispatch_priority);
31608 }
31609
31610 /* This function generates the dispatch function for
31611 multi-versioned functions. DISPATCH_DECL is the function which will
31612 contain the dispatch logic. FNDECLS are the function choices for
31613 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
31614 in DISPATCH_DECL in which the dispatch code is generated. */
31615
31616 static int
31617 dispatch_function_versions (tree dispatch_decl,
31618 void *fndecls_p,
31619 basic_block *empty_bb)
31620 {
31621 tree default_decl;
31622 gimple ifunc_cpu_init_stmt;
31623 gimple_seq gseq;
31624 int ix;
31625 tree ele;
31626 vec<tree> *fndecls;
31627 unsigned int num_versions = 0;
31628 unsigned int actual_versions = 0;
31629 unsigned int i;
31630
31631 struct _function_version_info
31632 {
31633 tree version_decl;
31634 tree predicate_chain;
31635 unsigned int dispatch_priority;
31636 }*function_version_info;
31637
31638 gcc_assert (dispatch_decl != NULL
31639 && fndecls_p != NULL
31640 && empty_bb != NULL);
31641
31642 /*fndecls_p is actually a vector. */
31643 fndecls = static_cast<vec<tree> *> (fndecls_p);
31644
31645 /* At least one more version other than the default. */
31646 num_versions = fndecls->length ();
31647 gcc_assert (num_versions >= 2);
31648
31649 function_version_info = (struct _function_version_info *)
31650 XNEWVEC (struct _function_version_info, (num_versions - 1));
31651
31652 /* The first version in the vector is the default decl. */
31653 default_decl = (*fndecls)[0];
31654
31655 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
31656
31657 gseq = bb_seq (*empty_bb);
31658 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
31659 constructors, so explicity call __builtin_cpu_init here. */
31660 ifunc_cpu_init_stmt = gimple_build_call_vec (
31661 ix86_builtins [(int) IX86_BUILTIN_CPU_INIT], vNULL);
31662 gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
31663 gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
31664 set_bb_seq (*empty_bb, gseq);
31665
31666 pop_cfun ();
31667
31668
31669 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
31670 {
31671 tree version_decl = ele;
31672 tree predicate_chain = NULL_TREE;
31673 unsigned int priority;
31674 /* Get attribute string, parse it and find the right predicate decl.
31675 The predicate function could be a lengthy combination of many
31676 features, like arch-type and various isa-variants. */
31677 priority = get_builtin_code_for_version (version_decl,
31678 &predicate_chain);
31679
31680 if (predicate_chain == NULL_TREE)
31681 continue;
31682
31683 function_version_info [actual_versions].version_decl = version_decl;
31684 function_version_info [actual_versions].predicate_chain
31685 = predicate_chain;
31686 function_version_info [actual_versions].dispatch_priority = priority;
31687 actual_versions++;
31688 }
31689
31690 /* Sort the versions according to descending order of dispatch priority. The
31691 priority is based on the ISA. This is not a perfect solution. There
31692 could still be ambiguity. If more than one function version is suitable
31693 to execute, which one should be dispatched? In future, allow the user
31694 to specify a dispatch priority next to the version. */
31695 qsort (function_version_info, actual_versions,
31696 sizeof (struct _function_version_info), feature_compare);
31697
31698 for (i = 0; i < actual_versions; ++i)
31699 *empty_bb = add_condition_to_bb (dispatch_decl,
31700 function_version_info[i].version_decl,
31701 function_version_info[i].predicate_chain,
31702 *empty_bb);
31703
31704 /* dispatch default version at the end. */
31705 *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
31706 NULL, *empty_bb);
31707
31708 free (function_version_info);
31709 return 0;
31710 }
31711
31712 /* Comparator function to be used in qsort routine to sort attribute
31713 specification strings to "target". */
31714
31715 static int
31716 attr_strcmp (const void *v1, const void *v2)
31717 {
31718 const char *c1 = *(char *const*)v1;
31719 const char *c2 = *(char *const*)v2;
31720 return strcmp (c1, c2);
31721 }
31722
31723 /* ARGLIST is the argument to target attribute. This function tokenizes
31724 the comma separated arguments, sorts them and returns a string which
31725 is a unique identifier for the comma separated arguments. It also
31726 replaces non-identifier characters "=,-" with "_". */
31727
31728 static char *
31729 sorted_attr_string (tree arglist)
31730 {
31731 tree arg;
31732 size_t str_len_sum = 0;
31733 char **args = NULL;
31734 char *attr_str, *ret_str;
31735 char *attr = NULL;
31736 unsigned int argnum = 1;
31737 unsigned int i;
31738
31739 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
31740 {
31741 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
31742 size_t len = strlen (str);
31743 str_len_sum += len + 1;
31744 if (arg != arglist)
31745 argnum++;
31746 for (i = 0; i < strlen (str); i++)
31747 if (str[i] == ',')
31748 argnum++;
31749 }
31750
31751 attr_str = XNEWVEC (char, str_len_sum);
31752 str_len_sum = 0;
31753 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
31754 {
31755 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
31756 size_t len = strlen (str);
31757 memcpy (attr_str + str_len_sum, str, len);
31758 attr_str[str_len_sum + len] = TREE_CHAIN (arg) ? ',' : '\0';
31759 str_len_sum += len + 1;
31760 }
31761
31762 /* Replace "=,-" with "_". */
31763 for (i = 0; i < strlen (attr_str); i++)
31764 if (attr_str[i] == '=' || attr_str[i]== '-')
31765 attr_str[i] = '_';
31766
31767 if (argnum == 1)
31768 return attr_str;
31769
31770 args = XNEWVEC (char *, argnum);
31771
31772 i = 0;
31773 attr = strtok (attr_str, ",");
31774 while (attr != NULL)
31775 {
31776 args[i] = attr;
31777 i++;
31778 attr = strtok (NULL, ",");
31779 }
31780
31781 qsort (args, argnum, sizeof (char *), attr_strcmp);
31782
31783 ret_str = XNEWVEC (char, str_len_sum);
31784 str_len_sum = 0;
31785 for (i = 0; i < argnum; i++)
31786 {
31787 size_t len = strlen (args[i]);
31788 memcpy (ret_str + str_len_sum, args[i], len);
31789 ret_str[str_len_sum + len] = i < argnum - 1 ? '_' : '\0';
31790 str_len_sum += len + 1;
31791 }
31792
31793 XDELETEVEC (args);
31794 XDELETEVEC (attr_str);
31795 return ret_str;
31796 }
31797
31798 /* This function changes the assembler name for functions that are
31799 versions. If DECL is a function version and has a "target"
31800 attribute, it appends the attribute string to its assembler name. */
31801
31802 static tree
31803 ix86_mangle_function_version_assembler_name (tree decl, tree id)
31804 {
31805 tree version_attr;
31806 const char *orig_name, *version_string;
31807 char *attr_str, *assembler_name;
31808
31809 if (DECL_DECLARED_INLINE_P (decl)
31810 && lookup_attribute ("gnu_inline",
31811 DECL_ATTRIBUTES (decl)))
31812 error_at (DECL_SOURCE_LOCATION (decl),
31813 "Function versions cannot be marked as gnu_inline,"
31814 " bodies have to be generated");
31815
31816 if (DECL_VIRTUAL_P (decl)
31817 || DECL_VINDEX (decl))
31818 sorry ("Virtual function multiversioning not supported");
31819
31820 version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
31821
31822 /* target attribute string cannot be NULL. */
31823 gcc_assert (version_attr != NULL_TREE);
31824
31825 orig_name = IDENTIFIER_POINTER (id);
31826 version_string
31827 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr)));
31828
31829 if (strcmp (version_string, "default") == 0)
31830 return id;
31831
31832 attr_str = sorted_attr_string (TREE_VALUE (version_attr));
31833 assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2);
31834
31835 sprintf (assembler_name, "%s.%s", orig_name, attr_str);
31836
31837 /* Allow assembler name to be modified if already set. */
31838 if (DECL_ASSEMBLER_NAME_SET_P (decl))
31839 SET_DECL_RTL (decl, NULL);
31840
31841 tree ret = get_identifier (assembler_name);
31842 XDELETEVEC (attr_str);
31843 XDELETEVEC (assembler_name);
31844 return ret;
31845 }
31846
31847 /* This function returns true if FN1 and FN2 are versions of the same function,
31848 that is, the target strings of the function decls are different. This assumes
31849 that FN1 and FN2 have the same signature. */
31850
31851 static bool
31852 ix86_function_versions (tree fn1, tree fn2)
31853 {
31854 tree attr1, attr2;
31855 char *target1, *target2;
31856 bool result;
31857
31858 if (TREE_CODE (fn1) != FUNCTION_DECL
31859 || TREE_CODE (fn2) != FUNCTION_DECL)
31860 return false;
31861
31862 attr1 = lookup_attribute ("target", DECL_ATTRIBUTES (fn1));
31863 attr2 = lookup_attribute ("target", DECL_ATTRIBUTES (fn2));
31864
31865 /* At least one function decl should have the target attribute specified. */
31866 if (attr1 == NULL_TREE && attr2 == NULL_TREE)
31867 return false;
31868
31869 /* Diagnose missing target attribute if one of the decls is already
31870 multi-versioned. */
31871 if (attr1 == NULL_TREE || attr2 == NULL_TREE)
31872 {
31873 if (DECL_FUNCTION_VERSIONED (fn1) || DECL_FUNCTION_VERSIONED (fn2))
31874 {
31875 if (attr2 != NULL_TREE)
31876 {
31877 tree tem = fn1;
31878 fn1 = fn2;
31879 fn2 = tem;
31880 attr1 = attr2;
31881 }
31882 error_at (DECL_SOURCE_LOCATION (fn2),
31883 "missing %<target%> attribute for multi-versioned %D",
31884 fn2);
31885 inform (DECL_SOURCE_LOCATION (fn1),
31886 "previous declaration of %D", fn1);
31887 /* Prevent diagnosing of the same error multiple times. */
31888 DECL_ATTRIBUTES (fn2)
31889 = tree_cons (get_identifier ("target"),
31890 copy_node (TREE_VALUE (attr1)),
31891 DECL_ATTRIBUTES (fn2));
31892 }
31893 return false;
31894 }
31895
31896 target1 = sorted_attr_string (TREE_VALUE (attr1));
31897 target2 = sorted_attr_string (TREE_VALUE (attr2));
31898
31899 /* The sorted target strings must be different for fn1 and fn2
31900 to be versions. */
31901 if (strcmp (target1, target2) == 0)
31902 result = false;
31903 else
31904 result = true;
31905
31906 XDELETEVEC (target1);
31907 XDELETEVEC (target2);
31908
31909 return result;
31910 }
31911
31912 static tree
31913 ix86_mangle_decl_assembler_name (tree decl, tree id)
31914 {
31915 /* For function version, add the target suffix to the assembler name. */
31916 if (TREE_CODE (decl) == FUNCTION_DECL
31917 && DECL_FUNCTION_VERSIONED (decl))
31918 id = ix86_mangle_function_version_assembler_name (decl, id);
31919 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
31920 id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
31921 #endif
31922
31923 return id;
31924 }
31925
31926 /* Return a new name by appending SUFFIX to the DECL name. If make_unique
31927 is true, append the full path name of the source file. */
31928
31929 static char *
31930 make_name (tree decl, const char *suffix, bool make_unique)
31931 {
31932 char *global_var_name;
31933 int name_len;
31934 const char *name;
31935 const char *unique_name = NULL;
31936
31937 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
31938
31939 /* Get a unique name that can be used globally without any chances
31940 of collision at link time. */
31941 if (make_unique)
31942 unique_name = IDENTIFIER_POINTER (get_file_function_name ("\0"));
31943
31944 name_len = strlen (name) + strlen (suffix) + 2;
31945
31946 if (make_unique)
31947 name_len += strlen (unique_name) + 1;
31948 global_var_name = XNEWVEC (char, name_len);
31949
31950 /* Use '.' to concatenate names as it is demangler friendly. */
31951 if (make_unique)
31952 snprintf (global_var_name, name_len, "%s.%s.%s", name, unique_name,
31953 suffix);
31954 else
31955 snprintf (global_var_name, name_len, "%s.%s", name, suffix);
31956
31957 return global_var_name;
31958 }
31959
31960 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
31961
31962 /* Make a dispatcher declaration for the multi-versioned function DECL.
31963 Calls to DECL function will be replaced with calls to the dispatcher
31964 by the front-end. Return the decl created. */
31965
31966 static tree
31967 make_dispatcher_decl (const tree decl)
31968 {
31969 tree func_decl;
31970 char *func_name;
31971 tree fn_type, func_type;
31972 bool is_uniq = false;
31973
31974 if (TREE_PUBLIC (decl) == 0)
31975 is_uniq = true;
31976
31977 func_name = make_name (decl, "ifunc", is_uniq);
31978
31979 fn_type = TREE_TYPE (decl);
31980 func_type = build_function_type (TREE_TYPE (fn_type),
31981 TYPE_ARG_TYPES (fn_type));
31982
31983 func_decl = build_fn_decl (func_name, func_type);
31984 XDELETEVEC (func_name);
31985 TREE_USED (func_decl) = 1;
31986 DECL_CONTEXT (func_decl) = NULL_TREE;
31987 DECL_INITIAL (func_decl) = error_mark_node;
31988 DECL_ARTIFICIAL (func_decl) = 1;
31989 /* Mark this func as external, the resolver will flip it again if
31990 it gets generated. */
31991 DECL_EXTERNAL (func_decl) = 1;
31992 /* This will be of type IFUNCs have to be externally visible. */
31993 TREE_PUBLIC (func_decl) = 1;
31994
31995 return func_decl;
31996 }
31997
31998 #endif
31999
32000 /* Returns true if decl is multi-versioned and DECL is the default function,
32001 that is it is not tagged with target specific optimization. */
32002
32003 static bool
32004 is_function_default_version (const tree decl)
32005 {
32006 if (TREE_CODE (decl) != FUNCTION_DECL
32007 || !DECL_FUNCTION_VERSIONED (decl))
32008 return false;
32009 tree attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
32010 gcc_assert (attr);
32011 attr = TREE_VALUE (TREE_VALUE (attr));
32012 return (TREE_CODE (attr) == STRING_CST
32013 && strcmp (TREE_STRING_POINTER (attr), "default") == 0);
32014 }
32015
32016 /* Make a dispatcher declaration for the multi-versioned function DECL.
32017 Calls to DECL function will be replaced with calls to the dispatcher
32018 by the front-end. Returns the decl of the dispatcher function. */
32019
32020 static tree
32021 ix86_get_function_versions_dispatcher (void *decl)
32022 {
32023 tree fn = (tree) decl;
32024 struct cgraph_node *node = NULL;
32025 struct cgraph_node *default_node = NULL;
32026 struct cgraph_function_version_info *node_v = NULL;
32027 struct cgraph_function_version_info *first_v = NULL;
32028
32029 tree dispatch_decl = NULL;
32030
32031 struct cgraph_function_version_info *default_version_info = NULL;
32032
32033 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
32034
32035 node = cgraph_node::get (fn);
32036 gcc_assert (node != NULL);
32037
32038 node_v = node->function_version ();
32039 gcc_assert (node_v != NULL);
32040
32041 if (node_v->dispatcher_resolver != NULL)
32042 return node_v->dispatcher_resolver;
32043
32044 /* Find the default version and make it the first node. */
32045 first_v = node_v;
32046 /* Go to the beginning of the chain. */
32047 while (first_v->prev != NULL)
32048 first_v = first_v->prev;
32049 default_version_info = first_v;
32050 while (default_version_info != NULL)
32051 {
32052 if (is_function_default_version
32053 (default_version_info->this_node->decl))
32054 break;
32055 default_version_info = default_version_info->next;
32056 }
32057
32058 /* If there is no default node, just return NULL. */
32059 if (default_version_info == NULL)
32060 return NULL;
32061
32062 /* Make default info the first node. */
32063 if (first_v != default_version_info)
32064 {
32065 default_version_info->prev->next = default_version_info->next;
32066 if (default_version_info->next)
32067 default_version_info->next->prev = default_version_info->prev;
32068 first_v->prev = default_version_info;
32069 default_version_info->next = first_v;
32070 default_version_info->prev = NULL;
32071 }
32072
32073 default_node = default_version_info->this_node;
32074
32075 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
32076 if (targetm.has_ifunc_p ())
32077 {
32078 struct cgraph_function_version_info *it_v = NULL;
32079 struct cgraph_node *dispatcher_node = NULL;
32080 struct cgraph_function_version_info *dispatcher_version_info = NULL;
32081
32082 /* Right now, the dispatching is done via ifunc. */
32083 dispatch_decl = make_dispatcher_decl (default_node->decl);
32084
32085 dispatcher_node = cgraph_node::get_create (dispatch_decl);
32086 gcc_assert (dispatcher_node != NULL);
32087 dispatcher_node->dispatcher_function = 1;
32088 dispatcher_version_info
32089 = dispatcher_node->insert_new_function_version ();
32090 dispatcher_version_info->next = default_version_info;
32091 dispatcher_node->definition = 1;
32092
32093 /* Set the dispatcher for all the versions. */
32094 it_v = default_version_info;
32095 while (it_v != NULL)
32096 {
32097 it_v->dispatcher_resolver = dispatch_decl;
32098 it_v = it_v->next;
32099 }
32100 }
32101 else
32102 #endif
32103 {
32104 error_at (DECL_SOURCE_LOCATION (default_node->decl),
32105 "multiversioning needs ifunc which is not supported "
32106 "on this target");
32107 }
32108
32109 return dispatch_decl;
32110 }
32111
32112 /* Makes a function attribute of the form NAME(ARG_NAME) and chains
32113 it to CHAIN. */
32114
32115 static tree
32116 make_attribute (const char *name, const char *arg_name, tree chain)
32117 {
32118 tree attr_name;
32119 tree attr_arg_name;
32120 tree attr_args;
32121 tree attr;
32122
32123 attr_name = get_identifier (name);
32124 attr_arg_name = build_string (strlen (arg_name), arg_name);
32125 attr_args = tree_cons (NULL_TREE, attr_arg_name, NULL_TREE);
32126 attr = tree_cons (attr_name, attr_args, chain);
32127 return attr;
32128 }
32129
32130 /* Make the resolver function decl to dispatch the versions of
32131 a multi-versioned function, DEFAULT_DECL. Create an
32132 empty basic block in the resolver and store the pointer in
32133 EMPTY_BB. Return the decl of the resolver function. */
32134
32135 static tree
32136 make_resolver_func (const tree default_decl,
32137 const tree dispatch_decl,
32138 basic_block *empty_bb)
32139 {
32140 char *resolver_name;
32141 tree decl, type, decl_name, t;
32142 bool is_uniq = false;
32143
32144 /* IFUNC's have to be globally visible. So, if the default_decl is
32145 not, then the name of the IFUNC should be made unique. */
32146 if (TREE_PUBLIC (default_decl) == 0)
32147 is_uniq = true;
32148
32149 /* Append the filename to the resolver function if the versions are
32150 not externally visible. This is because the resolver function has
32151 to be externally visible for the loader to find it. So, appending
32152 the filename will prevent conflicts with a resolver function from
32153 another module which is based on the same version name. */
32154 resolver_name = make_name (default_decl, "resolver", is_uniq);
32155
32156 /* The resolver function should return a (void *). */
32157 type = build_function_type_list (ptr_type_node, NULL_TREE);
32158
32159 decl = build_fn_decl (resolver_name, type);
32160 decl_name = get_identifier (resolver_name);
32161 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
32162
32163 DECL_NAME (decl) = decl_name;
32164 TREE_USED (decl) = 1;
32165 DECL_ARTIFICIAL (decl) = 1;
32166 DECL_IGNORED_P (decl) = 0;
32167 /* IFUNC resolvers have to be externally visible. */
32168 TREE_PUBLIC (decl) = 1;
32169 DECL_UNINLINABLE (decl) = 1;
32170
32171 /* Resolver is not external, body is generated. */
32172 DECL_EXTERNAL (decl) = 0;
32173 DECL_EXTERNAL (dispatch_decl) = 0;
32174
32175 DECL_CONTEXT (decl) = NULL_TREE;
32176 DECL_INITIAL (decl) = make_node (BLOCK);
32177 DECL_STATIC_CONSTRUCTOR (decl) = 0;
32178
32179 if (DECL_COMDAT_GROUP (default_decl)
32180 || TREE_PUBLIC (default_decl))
32181 {
32182 /* In this case, each translation unit with a call to this
32183 versioned function will put out a resolver. Ensure it
32184 is comdat to keep just one copy. */
32185 DECL_COMDAT (decl) = 1;
32186 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
32187 }
32188 /* Build result decl and add to function_decl. */
32189 t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
32190 DECL_ARTIFICIAL (t) = 1;
32191 DECL_IGNORED_P (t) = 1;
32192 DECL_RESULT (decl) = t;
32193
32194 gimplify_function_tree (decl);
32195 push_cfun (DECL_STRUCT_FUNCTION (decl));
32196 *empty_bb = init_lowered_empty_function (decl, false);
32197
32198 cgraph_node::add_new_function (decl, true);
32199 cgraph_node::get_create (decl)->call_function_insertion_hooks ();
32200
32201 pop_cfun ();
32202
32203 gcc_assert (dispatch_decl != NULL);
32204 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
32205 DECL_ATTRIBUTES (dispatch_decl)
32206 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
32207
32208 /* Create the alias for dispatch to resolver here. */
32209 /*cgraph_create_function_alias (dispatch_decl, decl);*/
32210 cgraph_node::create_same_body_alias (dispatch_decl, decl);
32211 XDELETEVEC (resolver_name);
32212 return decl;
32213 }
32214
32215 /* Generate the dispatching code body to dispatch multi-versioned function
32216 DECL. The target hook is called to process the "target" attributes and
32217 provide the code to dispatch the right function at run-time. NODE points
32218 to the dispatcher decl whose body will be created. */
32219
32220 static tree
32221 ix86_generate_version_dispatcher_body (void *node_p)
32222 {
32223 tree resolver_decl;
32224 basic_block empty_bb;
32225 tree default_ver_decl;
32226 struct cgraph_node *versn;
32227 struct cgraph_node *node;
32228
32229 struct cgraph_function_version_info *node_version_info = NULL;
32230 struct cgraph_function_version_info *versn_info = NULL;
32231
32232 node = (cgraph_node *)node_p;
32233
32234 node_version_info = node->function_version ();
32235 gcc_assert (node->dispatcher_function
32236 && node_version_info != NULL);
32237
32238 if (node_version_info->dispatcher_resolver)
32239 return node_version_info->dispatcher_resolver;
32240
32241 /* The first version in the chain corresponds to the default version. */
32242 default_ver_decl = node_version_info->next->this_node->decl;
32243
32244 /* node is going to be an alias, so remove the finalized bit. */
32245 node->definition = false;
32246
32247 resolver_decl = make_resolver_func (default_ver_decl,
32248 node->decl, &empty_bb);
32249
32250 node_version_info->dispatcher_resolver = resolver_decl;
32251
32252 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
32253
32254 auto_vec<tree, 2> fn_ver_vec;
32255
32256 for (versn_info = node_version_info->next; versn_info;
32257 versn_info = versn_info->next)
32258 {
32259 versn = versn_info->this_node;
32260 /* Check for virtual functions here again, as by this time it should
32261 have been determined if this function needs a vtable index or
32262 not. This happens for methods in derived classes that override
32263 virtual methods in base classes but are not explicitly marked as
32264 virtual. */
32265 if (DECL_VINDEX (versn->decl))
32266 sorry ("Virtual function multiversioning not supported");
32267
32268 fn_ver_vec.safe_push (versn->decl);
32269 }
32270
32271 dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
32272 rebuild_cgraph_edges ();
32273 pop_cfun ();
32274 return resolver_decl;
32275 }
32276 /* This builds the processor_model struct type defined in
32277 libgcc/config/i386/cpuinfo.c */
32278
32279 static tree
32280 build_processor_model_struct (void)
32281 {
32282 const char *field_name[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
32283 "__cpu_features"};
32284 tree field = NULL_TREE, field_chain = NULL_TREE;
32285 int i;
32286 tree type = make_node (RECORD_TYPE);
32287
32288 /* The first 3 fields are unsigned int. */
32289 for (i = 0; i < 3; ++i)
32290 {
32291 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
32292 get_identifier (field_name[i]), unsigned_type_node);
32293 if (field_chain != NULL_TREE)
32294 DECL_CHAIN (field) = field_chain;
32295 field_chain = field;
32296 }
32297
32298 /* The last field is an array of unsigned integers of size one. */
32299 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
32300 get_identifier (field_name[3]),
32301 build_array_type (unsigned_type_node,
32302 build_index_type (size_one_node)));
32303 if (field_chain != NULL_TREE)
32304 DECL_CHAIN (field) = field_chain;
32305 field_chain = field;
32306
32307 finish_builtin_struct (type, "__processor_model", field_chain, NULL_TREE);
32308 return type;
32309 }
32310
32311 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
32312
32313 static tree
32314 make_var_decl (tree type, const char *name)
32315 {
32316 tree new_decl;
32317
32318 new_decl = build_decl (UNKNOWN_LOCATION,
32319 VAR_DECL,
32320 get_identifier(name),
32321 type);
32322
32323 DECL_EXTERNAL (new_decl) = 1;
32324 TREE_STATIC (new_decl) = 1;
32325 TREE_PUBLIC (new_decl) = 1;
32326 DECL_INITIAL (new_decl) = 0;
32327 DECL_ARTIFICIAL (new_decl) = 0;
32328 DECL_PRESERVE_P (new_decl) = 1;
32329
32330 make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl));
32331 assemble_variable (new_decl, 0, 0, 0);
32332
32333 return new_decl;
32334 }
32335
32336 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
32337 into an integer defined in libgcc/config/i386/cpuinfo.c */
32338
32339 static tree
32340 fold_builtin_cpu (tree fndecl, tree *args)
32341 {
32342 unsigned int i;
32343 enum ix86_builtins fn_code = (enum ix86_builtins)
32344 DECL_FUNCTION_CODE (fndecl);
32345 tree param_string_cst = NULL;
32346
32347 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
32348 enum processor_features
32349 {
32350 F_CMOV = 0,
32351 F_MMX,
32352 F_POPCNT,
32353 F_SSE,
32354 F_SSE2,
32355 F_SSE3,
32356 F_SSSE3,
32357 F_SSE4_1,
32358 F_SSE4_2,
32359 F_AVX,
32360 F_AVX2,
32361 F_SSE4_A,
32362 F_FMA4,
32363 F_XOP,
32364 F_FMA,
32365 F_MAX
32366 };
32367
32368 /* These are the values for vendor types and cpu types and subtypes
32369 in cpuinfo.c. Cpu types and subtypes should be subtracted by
32370 the corresponding start value. */
32371 enum processor_model
32372 {
32373 M_INTEL = 1,
32374 M_AMD,
32375 M_CPU_TYPE_START,
32376 M_INTEL_BONNELL,
32377 M_INTEL_CORE2,
32378 M_INTEL_COREI7,
32379 M_AMDFAM10H,
32380 M_AMDFAM15H,
32381 M_INTEL_SILVERMONT,
32382 M_AMD_BTVER1,
32383 M_AMD_BTVER2,
32384 M_CPU_SUBTYPE_START,
32385 M_INTEL_COREI7_NEHALEM,
32386 M_INTEL_COREI7_WESTMERE,
32387 M_INTEL_COREI7_SANDYBRIDGE,
32388 M_AMDFAM10H_BARCELONA,
32389 M_AMDFAM10H_SHANGHAI,
32390 M_AMDFAM10H_ISTANBUL,
32391 M_AMDFAM15H_BDVER1,
32392 M_AMDFAM15H_BDVER2,
32393 M_AMDFAM15H_BDVER3,
32394 M_AMDFAM15H_BDVER4,
32395 M_INTEL_COREI7_IVYBRIDGE,
32396 M_INTEL_COREI7_HASWELL
32397 };
32398
32399 static struct _arch_names_table
32400 {
32401 const char *const name;
32402 const enum processor_model model;
32403 }
32404 const arch_names_table[] =
32405 {
32406 {"amd", M_AMD},
32407 {"intel", M_INTEL},
32408 {"atom", M_INTEL_BONNELL},
32409 {"slm", M_INTEL_SILVERMONT},
32410 {"core2", M_INTEL_CORE2},
32411 {"corei7", M_INTEL_COREI7},
32412 {"nehalem", M_INTEL_COREI7_NEHALEM},
32413 {"westmere", M_INTEL_COREI7_WESTMERE},
32414 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE},
32415 {"ivybridge", M_INTEL_COREI7_IVYBRIDGE},
32416 {"haswell", M_INTEL_COREI7_HASWELL},
32417 {"bonnell", M_INTEL_BONNELL},
32418 {"silvermont", M_INTEL_SILVERMONT},
32419 {"amdfam10h", M_AMDFAM10H},
32420 {"barcelona", M_AMDFAM10H_BARCELONA},
32421 {"shanghai", M_AMDFAM10H_SHANGHAI},
32422 {"istanbul", M_AMDFAM10H_ISTANBUL},
32423 {"btver1", M_AMD_BTVER1},
32424 {"amdfam15h", M_AMDFAM15H},
32425 {"bdver1", M_AMDFAM15H_BDVER1},
32426 {"bdver2", M_AMDFAM15H_BDVER2},
32427 {"bdver3", M_AMDFAM15H_BDVER3},
32428 {"bdver4", M_AMDFAM15H_BDVER4},
32429 {"btver2", M_AMD_BTVER2},
32430 };
32431
32432 static struct _isa_names_table
32433 {
32434 const char *const name;
32435 const enum processor_features feature;
32436 }
32437 const isa_names_table[] =
32438 {
32439 {"cmov", F_CMOV},
32440 {"mmx", F_MMX},
32441 {"popcnt", F_POPCNT},
32442 {"sse", F_SSE},
32443 {"sse2", F_SSE2},
32444 {"sse3", F_SSE3},
32445 {"ssse3", F_SSSE3},
32446 {"sse4a", F_SSE4_A},
32447 {"sse4.1", F_SSE4_1},
32448 {"sse4.2", F_SSE4_2},
32449 {"avx", F_AVX},
32450 {"fma4", F_FMA4},
32451 {"xop", F_XOP},
32452 {"fma", F_FMA},
32453 {"avx2", F_AVX2}
32454 };
32455
32456 tree __processor_model_type = build_processor_model_struct ();
32457 tree __cpu_model_var = make_var_decl (__processor_model_type,
32458 "__cpu_model");
32459
32460
32461 varpool_add_new_variable (__cpu_model_var);
32462
32463 gcc_assert ((args != NULL) && (*args != NULL));
32464
32465 param_string_cst = *args;
32466 while (param_string_cst
32467 && TREE_CODE (param_string_cst) != STRING_CST)
32468 {
32469 /* *args must be a expr that can contain other EXPRS leading to a
32470 STRING_CST. */
32471 if (!EXPR_P (param_string_cst))
32472 {
32473 error ("Parameter to builtin must be a string constant or literal");
32474 return integer_zero_node;
32475 }
32476 param_string_cst = TREE_OPERAND (EXPR_CHECK (param_string_cst), 0);
32477 }
32478
32479 gcc_assert (param_string_cst);
32480
32481 if (fn_code == IX86_BUILTIN_CPU_IS)
32482 {
32483 tree ref;
32484 tree field;
32485 tree final;
32486
32487 unsigned int field_val = 0;
32488 unsigned int NUM_ARCH_NAMES
32489 = sizeof (arch_names_table) / sizeof (struct _arch_names_table);
32490
32491 for (i = 0; i < NUM_ARCH_NAMES; i++)
32492 if (strcmp (arch_names_table[i].name,
32493 TREE_STRING_POINTER (param_string_cst)) == 0)
32494 break;
32495
32496 if (i == NUM_ARCH_NAMES)
32497 {
32498 error ("Parameter to builtin not valid: %s",
32499 TREE_STRING_POINTER (param_string_cst));
32500 return integer_zero_node;
32501 }
32502
32503 field = TYPE_FIELDS (__processor_model_type);
32504 field_val = arch_names_table[i].model;
32505
32506 /* CPU types are stored in the next field. */
32507 if (field_val > M_CPU_TYPE_START
32508 && field_val < M_CPU_SUBTYPE_START)
32509 {
32510 field = DECL_CHAIN (field);
32511 field_val -= M_CPU_TYPE_START;
32512 }
32513
32514 /* CPU subtypes are stored in the next field. */
32515 if (field_val > M_CPU_SUBTYPE_START)
32516 {
32517 field = DECL_CHAIN ( DECL_CHAIN (field));
32518 field_val -= M_CPU_SUBTYPE_START;
32519 }
32520
32521 /* Get the appropriate field in __cpu_model. */
32522 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
32523 field, NULL_TREE);
32524
32525 /* Check the value. */
32526 final = build2 (EQ_EXPR, unsigned_type_node, ref,
32527 build_int_cstu (unsigned_type_node, field_val));
32528 return build1 (CONVERT_EXPR, integer_type_node, final);
32529 }
32530 else if (fn_code == IX86_BUILTIN_CPU_SUPPORTS)
32531 {
32532 tree ref;
32533 tree array_elt;
32534 tree field;
32535 tree final;
32536
32537 unsigned int field_val = 0;
32538 unsigned int NUM_ISA_NAMES
32539 = sizeof (isa_names_table) / sizeof (struct _isa_names_table);
32540
32541 for (i = 0; i < NUM_ISA_NAMES; i++)
32542 if (strcmp (isa_names_table[i].name,
32543 TREE_STRING_POINTER (param_string_cst)) == 0)
32544 break;
32545
32546 if (i == NUM_ISA_NAMES)
32547 {
32548 error ("Parameter to builtin not valid: %s",
32549 TREE_STRING_POINTER (param_string_cst));
32550 return integer_zero_node;
32551 }
32552
32553 field = TYPE_FIELDS (__processor_model_type);
32554 /* Get the last field, which is __cpu_features. */
32555 while (DECL_CHAIN (field))
32556 field = DECL_CHAIN (field);
32557
32558 /* Get the appropriate field: __cpu_model.__cpu_features */
32559 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
32560 field, NULL_TREE);
32561
32562 /* Access the 0th element of __cpu_features array. */
32563 array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
32564 integer_zero_node, NULL_TREE, NULL_TREE);
32565
32566 field_val = (1 << isa_names_table[i].feature);
32567 /* Return __cpu_model.__cpu_features[0] & field_val */
32568 final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
32569 build_int_cstu (unsigned_type_node, field_val));
32570 return build1 (CONVERT_EXPR, integer_type_node, final);
32571 }
32572 gcc_unreachable ();
32573 }
32574
32575 static tree
32576 ix86_fold_builtin (tree fndecl, int n_args,
32577 tree *args, bool ignore ATTRIBUTE_UNUSED)
32578 {
32579 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
32580 {
32581 enum ix86_builtins fn_code = (enum ix86_builtins)
32582 DECL_FUNCTION_CODE (fndecl);
32583 if (fn_code == IX86_BUILTIN_CPU_IS
32584 || fn_code == IX86_BUILTIN_CPU_SUPPORTS)
32585 {
32586 gcc_assert (n_args == 1);
32587 return fold_builtin_cpu (fndecl, args);
32588 }
32589 }
32590
32591 #ifdef SUBTARGET_FOLD_BUILTIN
32592 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
32593 #endif
32594
32595 return NULL_TREE;
32596 }
32597
32598 /* Make builtins to detect cpu type and features supported. NAME is
32599 the builtin name, CODE is the builtin code, and FTYPE is the function
32600 type of the builtin. */
32601
32602 static void
32603 make_cpu_type_builtin (const char* name, int code,
32604 enum ix86_builtin_func_type ftype, bool is_const)
32605 {
32606 tree decl;
32607 tree type;
32608
32609 type = ix86_get_builtin_func_type (ftype);
32610 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
32611 NULL, NULL_TREE);
32612 gcc_assert (decl != NULL_TREE);
32613 ix86_builtins[(int) code] = decl;
32614 TREE_READONLY (decl) = is_const;
32615 }
32616
32617 /* Make builtins to get CPU type and features supported. The created
32618 builtins are :
32619
32620 __builtin_cpu_init (), to detect cpu type and features,
32621 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
32622 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
32623 */
32624
32625 static void
32626 ix86_init_platform_type_builtins (void)
32627 {
32628 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT,
32629 INT_FTYPE_VOID, false);
32630 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS,
32631 INT_FTYPE_PCCHAR, true);
32632 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS,
32633 INT_FTYPE_PCCHAR, true);
32634 }
32635
32636 /* Internal method for ix86_init_builtins. */
32637
32638 static void
32639 ix86_init_builtins_va_builtins_abi (void)
32640 {
32641 tree ms_va_ref, sysv_va_ref;
32642 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
32643 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
32644 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
32645 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
32646
32647 if (!TARGET_64BIT)
32648 return;
32649 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
32650 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
32651 ms_va_ref = build_reference_type (ms_va_list_type_node);
32652 sysv_va_ref =
32653 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
32654
32655 fnvoid_va_end_ms =
32656 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
32657 fnvoid_va_start_ms =
32658 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
32659 fnvoid_va_end_sysv =
32660 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
32661 fnvoid_va_start_sysv =
32662 build_varargs_function_type_list (void_type_node, sysv_va_ref,
32663 NULL_TREE);
32664 fnvoid_va_copy_ms =
32665 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
32666 NULL_TREE);
32667 fnvoid_va_copy_sysv =
32668 build_function_type_list (void_type_node, sysv_va_ref,
32669 sysv_va_ref, NULL_TREE);
32670
32671 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
32672 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
32673 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
32674 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
32675 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
32676 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
32677 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
32678 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32679 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
32680 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32681 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
32682 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32683 }
32684
32685 static void
32686 ix86_init_builtin_types (void)
32687 {
32688 tree float128_type_node, float80_type_node;
32689
32690 /* The __float80 type. */
32691 float80_type_node = long_double_type_node;
32692 if (TYPE_MODE (float80_type_node) != XFmode)
32693 {
32694 /* The __float80 type. */
32695 float80_type_node = make_node (REAL_TYPE);
32696
32697 TYPE_PRECISION (float80_type_node) = 80;
32698 layout_type (float80_type_node);
32699 }
32700 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
32701
32702 /* The __float128 type. */
32703 float128_type_node = make_node (REAL_TYPE);
32704 TYPE_PRECISION (float128_type_node) = 128;
32705 layout_type (float128_type_node);
32706 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
32707
32708 /* This macro is built by i386-builtin-types.awk. */
32709 DEFINE_BUILTIN_PRIMITIVE_TYPES;
32710 }
32711
32712 static void
32713 ix86_init_builtins (void)
32714 {
32715 tree t;
32716
32717 ix86_init_builtin_types ();
32718
32719 /* Builtins to get CPU type and features. */
32720 ix86_init_platform_type_builtins ();
32721
32722 /* TFmode support builtins. */
32723 def_builtin_const (0, "__builtin_infq",
32724 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
32725 def_builtin_const (0, "__builtin_huge_valq",
32726 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
32727
32728 /* We will expand them to normal call if SSE isn't available since
32729 they are used by libgcc. */
32730 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
32731 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
32732 BUILT_IN_MD, "__fabstf2", NULL_TREE);
32733 TREE_READONLY (t) = 1;
32734 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
32735
32736 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
32737 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
32738 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
32739 TREE_READONLY (t) = 1;
32740 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
32741
32742 ix86_init_tm_builtins ();
32743 ix86_init_mmx_sse_builtins ();
32744
32745 if (TARGET_LP64)
32746 ix86_init_builtins_va_builtins_abi ();
32747
32748 #ifdef SUBTARGET_INIT_BUILTINS
32749 SUBTARGET_INIT_BUILTINS;
32750 #endif
32751 }
32752
32753 /* Return the ix86 builtin for CODE. */
32754
32755 static tree
32756 ix86_builtin_decl (unsigned code, bool)
32757 {
32758 if (code >= IX86_BUILTIN_MAX)
32759 return error_mark_node;
32760
32761 return ix86_builtins[code];
32762 }
32763
32764 /* Errors in the source file can cause expand_expr to return const0_rtx
32765 where we expect a vector. To avoid crashing, use one of the vector
32766 clear instructions. */
32767 static rtx
32768 safe_vector_operand (rtx x, enum machine_mode mode)
32769 {
32770 if (x == const0_rtx)
32771 x = CONST0_RTX (mode);
32772 return x;
32773 }
32774
32775 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
32776
32777 static rtx
32778 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
32779 {
32780 rtx pat;
32781 tree arg0 = CALL_EXPR_ARG (exp, 0);
32782 tree arg1 = CALL_EXPR_ARG (exp, 1);
32783 rtx op0 = expand_normal (arg0);
32784 rtx op1 = expand_normal (arg1);
32785 enum machine_mode tmode = insn_data[icode].operand[0].mode;
32786 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
32787 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
32788
32789 if (VECTOR_MODE_P (mode0))
32790 op0 = safe_vector_operand (op0, mode0);
32791 if (VECTOR_MODE_P (mode1))
32792 op1 = safe_vector_operand (op1, mode1);
32793
32794 if (optimize || !target
32795 || GET_MODE (target) != tmode
32796 || !insn_data[icode].operand[0].predicate (target, tmode))
32797 target = gen_reg_rtx (tmode);
32798
32799 if (GET_MODE (op1) == SImode && mode1 == TImode)
32800 {
32801 rtx x = gen_reg_rtx (V4SImode);
32802 emit_insn (gen_sse2_loadd (x, op1));
32803 op1 = gen_lowpart (TImode, x);
32804 }
32805
32806 if (!insn_data[icode].operand[1].predicate (op0, mode0))
32807 op0 = copy_to_mode_reg (mode0, op0);
32808 if (!insn_data[icode].operand[2].predicate (op1, mode1))
32809 op1 = copy_to_mode_reg (mode1, op1);
32810
32811 pat = GEN_FCN (icode) (target, op0, op1);
32812 if (! pat)
32813 return 0;
32814
32815 emit_insn (pat);
32816
32817 return target;
32818 }
32819
32820 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
32821
32822 static rtx
32823 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
32824 enum ix86_builtin_func_type m_type,
32825 enum rtx_code sub_code)
32826 {
32827 rtx pat;
32828 int i;
32829 int nargs;
32830 bool comparison_p = false;
32831 bool tf_p = false;
32832 bool last_arg_constant = false;
32833 int num_memory = 0;
32834 struct {
32835 rtx op;
32836 enum machine_mode mode;
32837 } args[4];
32838
32839 enum machine_mode tmode = insn_data[icode].operand[0].mode;
32840
32841 switch (m_type)
32842 {
32843 case MULTI_ARG_4_DF2_DI_I:
32844 case MULTI_ARG_4_DF2_DI_I1:
32845 case MULTI_ARG_4_SF2_SI_I:
32846 case MULTI_ARG_4_SF2_SI_I1:
32847 nargs = 4;
32848 last_arg_constant = true;
32849 break;
32850
32851 case MULTI_ARG_3_SF:
32852 case MULTI_ARG_3_DF:
32853 case MULTI_ARG_3_SF2:
32854 case MULTI_ARG_3_DF2:
32855 case MULTI_ARG_3_DI:
32856 case MULTI_ARG_3_SI:
32857 case MULTI_ARG_3_SI_DI:
32858 case MULTI_ARG_3_HI:
32859 case MULTI_ARG_3_HI_SI:
32860 case MULTI_ARG_3_QI:
32861 case MULTI_ARG_3_DI2:
32862 case MULTI_ARG_3_SI2:
32863 case MULTI_ARG_3_HI2:
32864 case MULTI_ARG_3_QI2:
32865 nargs = 3;
32866 break;
32867
32868 case MULTI_ARG_2_SF:
32869 case MULTI_ARG_2_DF:
32870 case MULTI_ARG_2_DI:
32871 case MULTI_ARG_2_SI:
32872 case MULTI_ARG_2_HI:
32873 case MULTI_ARG_2_QI:
32874 nargs = 2;
32875 break;
32876
32877 case MULTI_ARG_2_DI_IMM:
32878 case MULTI_ARG_2_SI_IMM:
32879 case MULTI_ARG_2_HI_IMM:
32880 case MULTI_ARG_2_QI_IMM:
32881 nargs = 2;
32882 last_arg_constant = true;
32883 break;
32884
32885 case MULTI_ARG_1_SF:
32886 case MULTI_ARG_1_DF:
32887 case MULTI_ARG_1_SF2:
32888 case MULTI_ARG_1_DF2:
32889 case MULTI_ARG_1_DI:
32890 case MULTI_ARG_1_SI:
32891 case MULTI_ARG_1_HI:
32892 case MULTI_ARG_1_QI:
32893 case MULTI_ARG_1_SI_DI:
32894 case MULTI_ARG_1_HI_DI:
32895 case MULTI_ARG_1_HI_SI:
32896 case MULTI_ARG_1_QI_DI:
32897 case MULTI_ARG_1_QI_SI:
32898 case MULTI_ARG_1_QI_HI:
32899 nargs = 1;
32900 break;
32901
32902 case MULTI_ARG_2_DI_CMP:
32903 case MULTI_ARG_2_SI_CMP:
32904 case MULTI_ARG_2_HI_CMP:
32905 case MULTI_ARG_2_QI_CMP:
32906 nargs = 2;
32907 comparison_p = true;
32908 break;
32909
32910 case MULTI_ARG_2_SF_TF:
32911 case MULTI_ARG_2_DF_TF:
32912 case MULTI_ARG_2_DI_TF:
32913 case MULTI_ARG_2_SI_TF:
32914 case MULTI_ARG_2_HI_TF:
32915 case MULTI_ARG_2_QI_TF:
32916 nargs = 2;
32917 tf_p = true;
32918 break;
32919
32920 default:
32921 gcc_unreachable ();
32922 }
32923
32924 if (optimize || !target
32925 || GET_MODE (target) != tmode
32926 || !insn_data[icode].operand[0].predicate (target, tmode))
32927 target = gen_reg_rtx (tmode);
32928
32929 gcc_assert (nargs <= 4);
32930
32931 for (i = 0; i < nargs; i++)
32932 {
32933 tree arg = CALL_EXPR_ARG (exp, i);
32934 rtx op = expand_normal (arg);
32935 int adjust = (comparison_p) ? 1 : 0;
32936 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
32937
32938 if (last_arg_constant && i == nargs - 1)
32939 {
32940 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
32941 {
32942 enum insn_code new_icode = icode;
32943 switch (icode)
32944 {
32945 case CODE_FOR_xop_vpermil2v2df3:
32946 case CODE_FOR_xop_vpermil2v4sf3:
32947 case CODE_FOR_xop_vpermil2v4df3:
32948 case CODE_FOR_xop_vpermil2v8sf3:
32949 error ("the last argument must be a 2-bit immediate");
32950 return gen_reg_rtx (tmode);
32951 case CODE_FOR_xop_rotlv2di3:
32952 new_icode = CODE_FOR_rotlv2di3;
32953 goto xop_rotl;
32954 case CODE_FOR_xop_rotlv4si3:
32955 new_icode = CODE_FOR_rotlv4si3;
32956 goto xop_rotl;
32957 case CODE_FOR_xop_rotlv8hi3:
32958 new_icode = CODE_FOR_rotlv8hi3;
32959 goto xop_rotl;
32960 case CODE_FOR_xop_rotlv16qi3:
32961 new_icode = CODE_FOR_rotlv16qi3;
32962 xop_rotl:
32963 if (CONST_INT_P (op))
32964 {
32965 int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
32966 op = GEN_INT (INTVAL (op) & mask);
32967 gcc_checking_assert
32968 (insn_data[icode].operand[i + 1].predicate (op, mode));
32969 }
32970 else
32971 {
32972 gcc_checking_assert
32973 (nargs == 2
32974 && insn_data[new_icode].operand[0].mode == tmode
32975 && insn_data[new_icode].operand[1].mode == tmode
32976 && insn_data[new_icode].operand[2].mode == mode
32977 && insn_data[new_icode].operand[0].predicate
32978 == insn_data[icode].operand[0].predicate
32979 && insn_data[new_icode].operand[1].predicate
32980 == insn_data[icode].operand[1].predicate);
32981 icode = new_icode;
32982 goto non_constant;
32983 }
32984 break;
32985 default:
32986 gcc_unreachable ();
32987 }
32988 }
32989 }
32990 else
32991 {
32992 non_constant:
32993 if (VECTOR_MODE_P (mode))
32994 op = safe_vector_operand (op, mode);
32995
32996 /* If we aren't optimizing, only allow one memory operand to be
32997 generated. */
32998 if (memory_operand (op, mode))
32999 num_memory++;
33000
33001 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
33002
33003 if (optimize
33004 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
33005 || num_memory > 1)
33006 op = force_reg (mode, op);
33007 }
33008
33009 args[i].op = op;
33010 args[i].mode = mode;
33011 }
33012
33013 switch (nargs)
33014 {
33015 case 1:
33016 pat = GEN_FCN (icode) (target, args[0].op);
33017 break;
33018
33019 case 2:
33020 if (tf_p)
33021 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
33022 GEN_INT ((int)sub_code));
33023 else if (! comparison_p)
33024 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
33025 else
33026 {
33027 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
33028 args[0].op,
33029 args[1].op);
33030
33031 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
33032 }
33033 break;
33034
33035 case 3:
33036 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
33037 break;
33038
33039 case 4:
33040 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
33041 break;
33042
33043 default:
33044 gcc_unreachable ();
33045 }
33046
33047 if (! pat)
33048 return 0;
33049
33050 emit_insn (pat);
33051 return target;
33052 }
33053
33054 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
33055 insns with vec_merge. */
33056
33057 static rtx
33058 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
33059 rtx target)
33060 {
33061 rtx pat;
33062 tree arg0 = CALL_EXPR_ARG (exp, 0);
33063 rtx op1, op0 = expand_normal (arg0);
33064 enum machine_mode tmode = insn_data[icode].operand[0].mode;
33065 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
33066
33067 if (optimize || !target
33068 || GET_MODE (target) != tmode
33069 || !insn_data[icode].operand[0].predicate (target, tmode))
33070 target = gen_reg_rtx (tmode);
33071
33072 if (VECTOR_MODE_P (mode0))
33073 op0 = safe_vector_operand (op0, mode0);
33074
33075 if ((optimize && !register_operand (op0, mode0))
33076 || !insn_data[icode].operand[1].predicate (op0, mode0))
33077 op0 = copy_to_mode_reg (mode0, op0);
33078
33079 op1 = op0;
33080 if (!insn_data[icode].operand[2].predicate (op1, mode0))
33081 op1 = copy_to_mode_reg (mode0, op1);
33082
33083 pat = GEN_FCN (icode) (target, op0, op1);
33084 if (! pat)
33085 return 0;
33086 emit_insn (pat);
33087 return target;
33088 }
33089
33090 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
33091
33092 static rtx
33093 ix86_expand_sse_compare (const struct builtin_description *d,
33094 tree exp, rtx target, bool swap)
33095 {
33096 rtx pat;
33097 tree arg0 = CALL_EXPR_ARG (exp, 0);
33098 tree arg1 = CALL_EXPR_ARG (exp, 1);
33099 rtx op0 = expand_normal (arg0);
33100 rtx op1 = expand_normal (arg1);
33101 rtx op2;
33102 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
33103 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
33104 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
33105 enum rtx_code comparison = d->comparison;
33106
33107 if (VECTOR_MODE_P (mode0))
33108 op0 = safe_vector_operand (op0, mode0);
33109 if (VECTOR_MODE_P (mode1))
33110 op1 = safe_vector_operand (op1, mode1);
33111
33112 /* Swap operands if we have a comparison that isn't available in
33113 hardware. */
33114 if (swap)
33115 {
33116 rtx tmp = gen_reg_rtx (mode1);
33117 emit_move_insn (tmp, op1);
33118 op1 = op0;
33119 op0 = tmp;
33120 }
33121
33122 if (optimize || !target
33123 || GET_MODE (target) != tmode
33124 || !insn_data[d->icode].operand[0].predicate (target, tmode))
33125 target = gen_reg_rtx (tmode);
33126
33127 if ((optimize && !register_operand (op0, mode0))
33128 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
33129 op0 = copy_to_mode_reg (mode0, op0);
33130 if ((optimize && !register_operand (op1, mode1))
33131 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
33132 op1 = copy_to_mode_reg (mode1, op1);
33133
33134 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
33135 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
33136 if (! pat)
33137 return 0;
33138 emit_insn (pat);
33139 return target;
33140 }
33141
33142 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
33143
33144 static rtx
33145 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
33146 rtx target)
33147 {
33148 rtx pat;
33149 tree arg0 = CALL_EXPR_ARG (exp, 0);
33150 tree arg1 = CALL_EXPR_ARG (exp, 1);
33151 rtx op0 = expand_normal (arg0);
33152 rtx op1 = expand_normal (arg1);
33153 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
33154 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
33155 enum rtx_code comparison = d->comparison;
33156
33157 if (VECTOR_MODE_P (mode0))
33158 op0 = safe_vector_operand (op0, mode0);
33159 if (VECTOR_MODE_P (mode1))
33160 op1 = safe_vector_operand (op1, mode1);
33161
33162 /* Swap operands if we have a comparison that isn't available in
33163 hardware. */
33164 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
33165 {
33166 rtx tmp = op1;
33167 op1 = op0;
33168 op0 = tmp;
33169 }
33170
33171 target = gen_reg_rtx (SImode);
33172 emit_move_insn (target, const0_rtx);
33173 target = gen_rtx_SUBREG (QImode, target, 0);
33174
33175 if ((optimize && !register_operand (op0, mode0))
33176 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33177 op0 = copy_to_mode_reg (mode0, op0);
33178 if ((optimize && !register_operand (op1, mode1))
33179 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
33180 op1 = copy_to_mode_reg (mode1, op1);
33181
33182 pat = GEN_FCN (d->icode) (op0, op1);
33183 if (! pat)
33184 return 0;
33185 emit_insn (pat);
33186 emit_insn (gen_rtx_SET (VOIDmode,
33187 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33188 gen_rtx_fmt_ee (comparison, QImode,
33189 SET_DEST (pat),
33190 const0_rtx)));
33191
33192 return SUBREG_REG (target);
33193 }
33194
33195 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
33196
33197 static rtx
33198 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
33199 rtx target)
33200 {
33201 rtx pat;
33202 tree arg0 = CALL_EXPR_ARG (exp, 0);
33203 rtx op1, op0 = expand_normal (arg0);
33204 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
33205 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
33206
33207 if (optimize || target == 0
33208 || GET_MODE (target) != tmode
33209 || !insn_data[d->icode].operand[0].predicate (target, tmode))
33210 target = gen_reg_rtx (tmode);
33211
33212 if (VECTOR_MODE_P (mode0))
33213 op0 = safe_vector_operand (op0, mode0);
33214
33215 if ((optimize && !register_operand (op0, mode0))
33216 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33217 op0 = copy_to_mode_reg (mode0, op0);
33218
33219 op1 = GEN_INT (d->comparison);
33220
33221 pat = GEN_FCN (d->icode) (target, op0, op1);
33222 if (! pat)
33223 return 0;
33224 emit_insn (pat);
33225 return target;
33226 }
33227
33228 static rtx
33229 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d,
33230 tree exp, rtx target)
33231 {
33232 rtx pat;
33233 tree arg0 = CALL_EXPR_ARG (exp, 0);
33234 tree arg1 = CALL_EXPR_ARG (exp, 1);
33235 rtx op0 = expand_normal (arg0);
33236 rtx op1 = expand_normal (arg1);
33237 rtx op2;
33238 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
33239 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
33240 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
33241
33242 if (optimize || target == 0
33243 || GET_MODE (target) != tmode
33244 || !insn_data[d->icode].operand[0].predicate (target, tmode))
33245 target = gen_reg_rtx (tmode);
33246
33247 op0 = safe_vector_operand (op0, mode0);
33248 op1 = safe_vector_operand (op1, mode1);
33249
33250 if ((optimize && !register_operand (op0, mode0))
33251 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33252 op0 = copy_to_mode_reg (mode0, op0);
33253 if ((optimize && !register_operand (op1, mode1))
33254 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
33255 op1 = copy_to_mode_reg (mode1, op1);
33256
33257 op2 = GEN_INT (d->comparison);
33258
33259 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
33260 if (! pat)
33261 return 0;
33262 emit_insn (pat);
33263 return target;
33264 }
33265
33266 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
33267
33268 static rtx
33269 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
33270 rtx target)
33271 {
33272 rtx pat;
33273 tree arg0 = CALL_EXPR_ARG (exp, 0);
33274 tree arg1 = CALL_EXPR_ARG (exp, 1);
33275 rtx op0 = expand_normal (arg0);
33276 rtx op1 = expand_normal (arg1);
33277 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
33278 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
33279 enum rtx_code comparison = d->comparison;
33280
33281 if (VECTOR_MODE_P (mode0))
33282 op0 = safe_vector_operand (op0, mode0);
33283 if (VECTOR_MODE_P (mode1))
33284 op1 = safe_vector_operand (op1, mode1);
33285
33286 target = gen_reg_rtx (SImode);
33287 emit_move_insn (target, const0_rtx);
33288 target = gen_rtx_SUBREG (QImode, target, 0);
33289
33290 if ((optimize && !register_operand (op0, mode0))
33291 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33292 op0 = copy_to_mode_reg (mode0, op0);
33293 if ((optimize && !register_operand (op1, mode1))
33294 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
33295 op1 = copy_to_mode_reg (mode1, op1);
33296
33297 pat = GEN_FCN (d->icode) (op0, op1);
33298 if (! pat)
33299 return 0;
33300 emit_insn (pat);
33301 emit_insn (gen_rtx_SET (VOIDmode,
33302 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33303 gen_rtx_fmt_ee (comparison, QImode,
33304 SET_DEST (pat),
33305 const0_rtx)));
33306
33307 return SUBREG_REG (target);
33308 }
33309
33310 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
33311
33312 static rtx
33313 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
33314 tree exp, rtx target)
33315 {
33316 rtx pat;
33317 tree arg0 = CALL_EXPR_ARG (exp, 0);
33318 tree arg1 = CALL_EXPR_ARG (exp, 1);
33319 tree arg2 = CALL_EXPR_ARG (exp, 2);
33320 tree arg3 = CALL_EXPR_ARG (exp, 3);
33321 tree arg4 = CALL_EXPR_ARG (exp, 4);
33322 rtx scratch0, scratch1;
33323 rtx op0 = expand_normal (arg0);
33324 rtx op1 = expand_normal (arg1);
33325 rtx op2 = expand_normal (arg2);
33326 rtx op3 = expand_normal (arg3);
33327 rtx op4 = expand_normal (arg4);
33328 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
33329
33330 tmode0 = insn_data[d->icode].operand[0].mode;
33331 tmode1 = insn_data[d->icode].operand[1].mode;
33332 modev2 = insn_data[d->icode].operand[2].mode;
33333 modei3 = insn_data[d->icode].operand[3].mode;
33334 modev4 = insn_data[d->icode].operand[4].mode;
33335 modei5 = insn_data[d->icode].operand[5].mode;
33336 modeimm = insn_data[d->icode].operand[6].mode;
33337
33338 if (VECTOR_MODE_P (modev2))
33339 op0 = safe_vector_operand (op0, modev2);
33340 if (VECTOR_MODE_P (modev4))
33341 op2 = safe_vector_operand (op2, modev4);
33342
33343 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
33344 op0 = copy_to_mode_reg (modev2, op0);
33345 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
33346 op1 = copy_to_mode_reg (modei3, op1);
33347 if ((optimize && !register_operand (op2, modev4))
33348 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
33349 op2 = copy_to_mode_reg (modev4, op2);
33350 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
33351 op3 = copy_to_mode_reg (modei5, op3);
33352
33353 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
33354 {
33355 error ("the fifth argument must be an 8-bit immediate");
33356 return const0_rtx;
33357 }
33358
33359 if (d->code == IX86_BUILTIN_PCMPESTRI128)
33360 {
33361 if (optimize || !target
33362 || GET_MODE (target) != tmode0
33363 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
33364 target = gen_reg_rtx (tmode0);
33365
33366 scratch1 = gen_reg_rtx (tmode1);
33367
33368 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
33369 }
33370 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
33371 {
33372 if (optimize || !target
33373 || GET_MODE (target) != tmode1
33374 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
33375 target = gen_reg_rtx (tmode1);
33376
33377 scratch0 = gen_reg_rtx (tmode0);
33378
33379 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
33380 }
33381 else
33382 {
33383 gcc_assert (d->flag);
33384
33385 scratch0 = gen_reg_rtx (tmode0);
33386 scratch1 = gen_reg_rtx (tmode1);
33387
33388 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
33389 }
33390
33391 if (! pat)
33392 return 0;
33393
33394 emit_insn (pat);
33395
33396 if (d->flag)
33397 {
33398 target = gen_reg_rtx (SImode);
33399 emit_move_insn (target, const0_rtx);
33400 target = gen_rtx_SUBREG (QImode, target, 0);
33401
33402 emit_insn
33403 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33404 gen_rtx_fmt_ee (EQ, QImode,
33405 gen_rtx_REG ((enum machine_mode) d->flag,
33406 FLAGS_REG),
33407 const0_rtx)));
33408 return SUBREG_REG (target);
33409 }
33410 else
33411 return target;
33412 }
33413
33414
33415 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
33416
33417 static rtx
33418 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
33419 tree exp, rtx target)
33420 {
33421 rtx pat;
33422 tree arg0 = CALL_EXPR_ARG (exp, 0);
33423 tree arg1 = CALL_EXPR_ARG (exp, 1);
33424 tree arg2 = CALL_EXPR_ARG (exp, 2);
33425 rtx scratch0, scratch1;
33426 rtx op0 = expand_normal (arg0);
33427 rtx op1 = expand_normal (arg1);
33428 rtx op2 = expand_normal (arg2);
33429 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
33430
33431 tmode0 = insn_data[d->icode].operand[0].mode;
33432 tmode1 = insn_data[d->icode].operand[1].mode;
33433 modev2 = insn_data[d->icode].operand[2].mode;
33434 modev3 = insn_data[d->icode].operand[3].mode;
33435 modeimm = insn_data[d->icode].operand[4].mode;
33436
33437 if (VECTOR_MODE_P (modev2))
33438 op0 = safe_vector_operand (op0, modev2);
33439 if (VECTOR_MODE_P (modev3))
33440 op1 = safe_vector_operand (op1, modev3);
33441
33442 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
33443 op0 = copy_to_mode_reg (modev2, op0);
33444 if ((optimize && !register_operand (op1, modev3))
33445 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
33446 op1 = copy_to_mode_reg (modev3, op1);
33447
33448 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
33449 {
33450 error ("the third argument must be an 8-bit immediate");
33451 return const0_rtx;
33452 }
33453
33454 if (d->code == IX86_BUILTIN_PCMPISTRI128)
33455 {
33456 if (optimize || !target
33457 || GET_MODE (target) != tmode0
33458 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
33459 target = gen_reg_rtx (tmode0);
33460
33461 scratch1 = gen_reg_rtx (tmode1);
33462
33463 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
33464 }
33465 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
33466 {
33467 if (optimize || !target
33468 || GET_MODE (target) != tmode1
33469 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
33470 target = gen_reg_rtx (tmode1);
33471
33472 scratch0 = gen_reg_rtx (tmode0);
33473
33474 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
33475 }
33476 else
33477 {
33478 gcc_assert (d->flag);
33479
33480 scratch0 = gen_reg_rtx (tmode0);
33481 scratch1 = gen_reg_rtx (tmode1);
33482
33483 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
33484 }
33485
33486 if (! pat)
33487 return 0;
33488
33489 emit_insn (pat);
33490
33491 if (d->flag)
33492 {
33493 target = gen_reg_rtx (SImode);
33494 emit_move_insn (target, const0_rtx);
33495 target = gen_rtx_SUBREG (QImode, target, 0);
33496
33497 emit_insn
33498 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33499 gen_rtx_fmt_ee (EQ, QImode,
33500 gen_rtx_REG ((enum machine_mode) d->flag,
33501 FLAGS_REG),
33502 const0_rtx)));
33503 return SUBREG_REG (target);
33504 }
33505 else
33506 return target;
33507 }
33508
33509 /* Subroutine of ix86_expand_builtin to take care of insns with
33510 variable number of operands. */
33511
33512 static rtx
33513 ix86_expand_args_builtin (const struct builtin_description *d,
33514 tree exp, rtx target)
33515 {
33516 rtx pat, real_target;
33517 unsigned int i, nargs;
33518 unsigned int nargs_constant = 0;
33519 unsigned int mask_pos = 0;
33520 int num_memory = 0;
33521 struct
33522 {
33523 rtx op;
33524 enum machine_mode mode;
33525 } args[6];
33526 bool last_arg_count = false;
33527 enum insn_code icode = d->icode;
33528 const struct insn_data_d *insn_p = &insn_data[icode];
33529 enum machine_mode tmode = insn_p->operand[0].mode;
33530 enum machine_mode rmode = VOIDmode;
33531 bool swap = false;
33532 enum rtx_code comparison = d->comparison;
33533
33534 switch ((enum ix86_builtin_func_type) d->flag)
33535 {
33536 case V2DF_FTYPE_V2DF_ROUND:
33537 case V4DF_FTYPE_V4DF_ROUND:
33538 case V4SF_FTYPE_V4SF_ROUND:
33539 case V8SF_FTYPE_V8SF_ROUND:
33540 case V4SI_FTYPE_V4SF_ROUND:
33541 case V8SI_FTYPE_V8SF_ROUND:
33542 return ix86_expand_sse_round (d, exp, target);
33543 case V4SI_FTYPE_V2DF_V2DF_ROUND:
33544 case V8SI_FTYPE_V4DF_V4DF_ROUND:
33545 case V16SI_FTYPE_V8DF_V8DF_ROUND:
33546 return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
33547 case INT_FTYPE_V8SF_V8SF_PTEST:
33548 case INT_FTYPE_V4DI_V4DI_PTEST:
33549 case INT_FTYPE_V4DF_V4DF_PTEST:
33550 case INT_FTYPE_V4SF_V4SF_PTEST:
33551 case INT_FTYPE_V2DI_V2DI_PTEST:
33552 case INT_FTYPE_V2DF_V2DF_PTEST:
33553 return ix86_expand_sse_ptest (d, exp, target);
33554 case FLOAT128_FTYPE_FLOAT128:
33555 case FLOAT_FTYPE_FLOAT:
33556 case INT_FTYPE_INT:
33557 case UINT64_FTYPE_INT:
33558 case UINT16_FTYPE_UINT16:
33559 case INT64_FTYPE_INT64:
33560 case INT64_FTYPE_V4SF:
33561 case INT64_FTYPE_V2DF:
33562 case INT_FTYPE_V16QI:
33563 case INT_FTYPE_V8QI:
33564 case INT_FTYPE_V8SF:
33565 case INT_FTYPE_V4DF:
33566 case INT_FTYPE_V4SF:
33567 case INT_FTYPE_V2DF:
33568 case INT_FTYPE_V32QI:
33569 case V16QI_FTYPE_V16QI:
33570 case V8SI_FTYPE_V8SF:
33571 case V8SI_FTYPE_V4SI:
33572 case V8HI_FTYPE_V8HI:
33573 case V8HI_FTYPE_V16QI:
33574 case V8QI_FTYPE_V8QI:
33575 case V8SF_FTYPE_V8SF:
33576 case V8SF_FTYPE_V8SI:
33577 case V8SF_FTYPE_V4SF:
33578 case V8SF_FTYPE_V8HI:
33579 case V4SI_FTYPE_V4SI:
33580 case V4SI_FTYPE_V16QI:
33581 case V4SI_FTYPE_V4SF:
33582 case V4SI_FTYPE_V8SI:
33583 case V4SI_FTYPE_V8HI:
33584 case V4SI_FTYPE_V4DF:
33585 case V4SI_FTYPE_V2DF:
33586 case V4HI_FTYPE_V4HI:
33587 case V4DF_FTYPE_V4DF:
33588 case V4DF_FTYPE_V4SI:
33589 case V4DF_FTYPE_V4SF:
33590 case V4DF_FTYPE_V2DF:
33591 case V4SF_FTYPE_V4SF:
33592 case V4SF_FTYPE_V4SI:
33593 case V4SF_FTYPE_V8SF:
33594 case V4SF_FTYPE_V4DF:
33595 case V4SF_FTYPE_V8HI:
33596 case V4SF_FTYPE_V2DF:
33597 case V2DI_FTYPE_V2DI:
33598 case V2DI_FTYPE_V16QI:
33599 case V2DI_FTYPE_V8HI:
33600 case V2DI_FTYPE_V4SI:
33601 case V2DF_FTYPE_V2DF:
33602 case V2DF_FTYPE_V4SI:
33603 case V2DF_FTYPE_V4DF:
33604 case V2DF_FTYPE_V4SF:
33605 case V2DF_FTYPE_V2SI:
33606 case V2SI_FTYPE_V2SI:
33607 case V2SI_FTYPE_V4SF:
33608 case V2SI_FTYPE_V2SF:
33609 case V2SI_FTYPE_V2DF:
33610 case V2SF_FTYPE_V2SF:
33611 case V2SF_FTYPE_V2SI:
33612 case V32QI_FTYPE_V32QI:
33613 case V32QI_FTYPE_V16QI:
33614 case V16HI_FTYPE_V16HI:
33615 case V16HI_FTYPE_V8HI:
33616 case V8SI_FTYPE_V8SI:
33617 case V16HI_FTYPE_V16QI:
33618 case V8SI_FTYPE_V16QI:
33619 case V4DI_FTYPE_V16QI:
33620 case V8SI_FTYPE_V8HI:
33621 case V4DI_FTYPE_V8HI:
33622 case V4DI_FTYPE_V4SI:
33623 case V4DI_FTYPE_V2DI:
33624 case HI_FTYPE_HI:
33625 case UINT_FTYPE_V2DF:
33626 case UINT_FTYPE_V4SF:
33627 case UINT64_FTYPE_V2DF:
33628 case UINT64_FTYPE_V4SF:
33629 case V16QI_FTYPE_V8DI:
33630 case V16HI_FTYPE_V16SI:
33631 case V16SI_FTYPE_HI:
33632 case V16SI_FTYPE_V16SI:
33633 case V16SI_FTYPE_INT:
33634 case V16SF_FTYPE_FLOAT:
33635 case V16SF_FTYPE_V4SF:
33636 case V16SF_FTYPE_V16SF:
33637 case V8HI_FTYPE_V8DI:
33638 case V8UHI_FTYPE_V8UHI:
33639 case V8SI_FTYPE_V8DI:
33640 case V8USI_FTYPE_V8USI:
33641 case V8SF_FTYPE_V8DF:
33642 case V8DI_FTYPE_QI:
33643 case V8DI_FTYPE_INT64:
33644 case V8DI_FTYPE_V4DI:
33645 case V8DI_FTYPE_V8DI:
33646 case V8DF_FTYPE_DOUBLE:
33647 case V8DF_FTYPE_V4DF:
33648 case V8DF_FTYPE_V8DF:
33649 case V8DF_FTYPE_V8SI:
33650 nargs = 1;
33651 break;
33652 case V4SF_FTYPE_V4SF_VEC_MERGE:
33653 case V2DF_FTYPE_V2DF_VEC_MERGE:
33654 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
33655 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
33656 case V16QI_FTYPE_V16QI_V16QI:
33657 case V16QI_FTYPE_V8HI_V8HI:
33658 case V16SI_FTYPE_V16SI_V16SI:
33659 case V16SF_FTYPE_V16SF_V16SF:
33660 case V16SF_FTYPE_V16SF_V16SI:
33661 case V8QI_FTYPE_V8QI_V8QI:
33662 case V8QI_FTYPE_V4HI_V4HI:
33663 case V8HI_FTYPE_V8HI_V8HI:
33664 case V8HI_FTYPE_V16QI_V16QI:
33665 case V8HI_FTYPE_V4SI_V4SI:
33666 case V8SF_FTYPE_V8SF_V8SF:
33667 case V8SF_FTYPE_V8SF_V8SI:
33668 case V8DI_FTYPE_V8DI_V8DI:
33669 case V8DF_FTYPE_V8DF_V8DF:
33670 case V8DF_FTYPE_V8DF_V8DI:
33671 case V4SI_FTYPE_V4SI_V4SI:
33672 case V4SI_FTYPE_V8HI_V8HI:
33673 case V4SI_FTYPE_V4SF_V4SF:
33674 case V4SI_FTYPE_V2DF_V2DF:
33675 case V4HI_FTYPE_V4HI_V4HI:
33676 case V4HI_FTYPE_V8QI_V8QI:
33677 case V4HI_FTYPE_V2SI_V2SI:
33678 case V4DF_FTYPE_V4DF_V4DF:
33679 case V4DF_FTYPE_V4DF_V4DI:
33680 case V4SF_FTYPE_V4SF_V4SF:
33681 case V4SF_FTYPE_V4SF_V4SI:
33682 case V4SF_FTYPE_V4SF_V2SI:
33683 case V4SF_FTYPE_V4SF_V2DF:
33684 case V4SF_FTYPE_V4SF_UINT:
33685 case V4SF_FTYPE_V4SF_UINT64:
33686 case V4SF_FTYPE_V4SF_DI:
33687 case V4SF_FTYPE_V4SF_SI:
33688 case V2DI_FTYPE_V2DI_V2DI:
33689 case V2DI_FTYPE_V16QI_V16QI:
33690 case V2DI_FTYPE_V4SI_V4SI:
33691 case V2UDI_FTYPE_V4USI_V4USI:
33692 case V2DI_FTYPE_V2DI_V16QI:
33693 case V2DI_FTYPE_V2DF_V2DF:
33694 case V2SI_FTYPE_V2SI_V2SI:
33695 case V2SI_FTYPE_V4HI_V4HI:
33696 case V2SI_FTYPE_V2SF_V2SF:
33697 case V2DF_FTYPE_V2DF_V2DF:
33698 case V2DF_FTYPE_V2DF_V4SF:
33699 case V2DF_FTYPE_V2DF_V2DI:
33700 case V2DF_FTYPE_V2DF_DI:
33701 case V2DF_FTYPE_V2DF_SI:
33702 case V2DF_FTYPE_V2DF_UINT:
33703 case V2DF_FTYPE_V2DF_UINT64:
33704 case V2SF_FTYPE_V2SF_V2SF:
33705 case V1DI_FTYPE_V1DI_V1DI:
33706 case V1DI_FTYPE_V8QI_V8QI:
33707 case V1DI_FTYPE_V2SI_V2SI:
33708 case V32QI_FTYPE_V16HI_V16HI:
33709 case V16HI_FTYPE_V8SI_V8SI:
33710 case V32QI_FTYPE_V32QI_V32QI:
33711 case V16HI_FTYPE_V32QI_V32QI:
33712 case V16HI_FTYPE_V16HI_V16HI:
33713 case V8SI_FTYPE_V4DF_V4DF:
33714 case V8SI_FTYPE_V8SI_V8SI:
33715 case V8SI_FTYPE_V16HI_V16HI:
33716 case V4DI_FTYPE_V4DI_V4DI:
33717 case V4DI_FTYPE_V8SI_V8SI:
33718 case V4UDI_FTYPE_V8USI_V8USI:
33719 case QI_FTYPE_V8DI_V8DI:
33720 case HI_FTYPE_V16SI_V16SI:
33721 if (comparison == UNKNOWN)
33722 return ix86_expand_binop_builtin (icode, exp, target);
33723 nargs = 2;
33724 break;
33725 case V4SF_FTYPE_V4SF_V4SF_SWAP:
33726 case V2DF_FTYPE_V2DF_V2DF_SWAP:
33727 gcc_assert (comparison != UNKNOWN);
33728 nargs = 2;
33729 swap = true;
33730 break;
33731 case V16HI_FTYPE_V16HI_V8HI_COUNT:
33732 case V16HI_FTYPE_V16HI_SI_COUNT:
33733 case V8SI_FTYPE_V8SI_V4SI_COUNT:
33734 case V8SI_FTYPE_V8SI_SI_COUNT:
33735 case V4DI_FTYPE_V4DI_V2DI_COUNT:
33736 case V4DI_FTYPE_V4DI_INT_COUNT:
33737 case V8HI_FTYPE_V8HI_V8HI_COUNT:
33738 case V8HI_FTYPE_V8HI_SI_COUNT:
33739 case V4SI_FTYPE_V4SI_V4SI_COUNT:
33740 case V4SI_FTYPE_V4SI_SI_COUNT:
33741 case V4HI_FTYPE_V4HI_V4HI_COUNT:
33742 case V4HI_FTYPE_V4HI_SI_COUNT:
33743 case V2DI_FTYPE_V2DI_V2DI_COUNT:
33744 case V2DI_FTYPE_V2DI_SI_COUNT:
33745 case V2SI_FTYPE_V2SI_V2SI_COUNT:
33746 case V2SI_FTYPE_V2SI_SI_COUNT:
33747 case V1DI_FTYPE_V1DI_V1DI_COUNT:
33748 case V1DI_FTYPE_V1DI_SI_COUNT:
33749 nargs = 2;
33750 last_arg_count = true;
33751 break;
33752 case UINT64_FTYPE_UINT64_UINT64:
33753 case UINT_FTYPE_UINT_UINT:
33754 case UINT_FTYPE_UINT_USHORT:
33755 case UINT_FTYPE_UINT_UCHAR:
33756 case UINT16_FTYPE_UINT16_INT:
33757 case UINT8_FTYPE_UINT8_INT:
33758 case HI_FTYPE_HI_HI:
33759 case V16SI_FTYPE_V8DF_V8DF:
33760 nargs = 2;
33761 break;
33762 case V2DI_FTYPE_V2DI_INT_CONVERT:
33763 nargs = 2;
33764 rmode = V1TImode;
33765 nargs_constant = 1;
33766 break;
33767 case V4DI_FTYPE_V4DI_INT_CONVERT:
33768 nargs = 2;
33769 rmode = V2TImode;
33770 nargs_constant = 1;
33771 break;
33772 case V8HI_FTYPE_V8HI_INT:
33773 case V8HI_FTYPE_V8SF_INT:
33774 case V16HI_FTYPE_V16SF_INT:
33775 case V8HI_FTYPE_V4SF_INT:
33776 case V8SF_FTYPE_V8SF_INT:
33777 case V4SF_FTYPE_V16SF_INT:
33778 case V16SF_FTYPE_V16SF_INT:
33779 case V4SI_FTYPE_V4SI_INT:
33780 case V4SI_FTYPE_V8SI_INT:
33781 case V4HI_FTYPE_V4HI_INT:
33782 case V4DF_FTYPE_V4DF_INT:
33783 case V4DF_FTYPE_V8DF_INT:
33784 case V4SF_FTYPE_V4SF_INT:
33785 case V4SF_FTYPE_V8SF_INT:
33786 case V2DI_FTYPE_V2DI_INT:
33787 case V2DF_FTYPE_V2DF_INT:
33788 case V2DF_FTYPE_V4DF_INT:
33789 case V16HI_FTYPE_V16HI_INT:
33790 case V8SI_FTYPE_V8SI_INT:
33791 case V16SI_FTYPE_V16SI_INT:
33792 case V4SI_FTYPE_V16SI_INT:
33793 case V4DI_FTYPE_V4DI_INT:
33794 case V2DI_FTYPE_V4DI_INT:
33795 case V4DI_FTYPE_V8DI_INT:
33796 case HI_FTYPE_HI_INT:
33797 nargs = 2;
33798 nargs_constant = 1;
33799 break;
33800 case V16QI_FTYPE_V16QI_V16QI_V16QI:
33801 case V8SF_FTYPE_V8SF_V8SF_V8SF:
33802 case V4DF_FTYPE_V4DF_V4DF_V4DF:
33803 case V4SF_FTYPE_V4SF_V4SF_V4SF:
33804 case V2DF_FTYPE_V2DF_V2DF_V2DF:
33805 case V32QI_FTYPE_V32QI_V32QI_V32QI:
33806 case HI_FTYPE_V16SI_V16SI_HI:
33807 case QI_FTYPE_V8DI_V8DI_QI:
33808 case V16HI_FTYPE_V16SI_V16HI_HI:
33809 case V16QI_FTYPE_V16SI_V16QI_HI:
33810 case V16QI_FTYPE_V8DI_V16QI_QI:
33811 case V16SF_FTYPE_V16SF_V16SF_HI:
33812 case V16SF_FTYPE_V16SF_V16SF_V16SF:
33813 case V16SF_FTYPE_V16SF_V16SI_V16SF:
33814 case V16SF_FTYPE_V16SI_V16SF_HI:
33815 case V16SF_FTYPE_V16SI_V16SF_V16SF:
33816 case V16SF_FTYPE_V4SF_V16SF_HI:
33817 case V16SI_FTYPE_SI_V16SI_HI:
33818 case V16SI_FTYPE_V16HI_V16SI_HI:
33819 case V16SI_FTYPE_V16QI_V16SI_HI:
33820 case V16SI_FTYPE_V16SF_V16SI_HI:
33821 case V16SI_FTYPE_V16SI_V16SI_HI:
33822 case V16SI_FTYPE_V16SI_V16SI_V16SI:
33823 case V16SI_FTYPE_V4SI_V16SI_HI:
33824 case V2DI_FTYPE_V2DI_V2DI_V2DI:
33825 case V4DI_FTYPE_V4DI_V4DI_V4DI:
33826 case V8DF_FTYPE_V2DF_V8DF_QI:
33827 case V8DF_FTYPE_V4DF_V8DF_QI:
33828 case V8DF_FTYPE_V8DF_V8DF_QI:
33829 case V8DF_FTYPE_V8DF_V8DF_V8DF:
33830 case V8DF_FTYPE_V8DF_V8DI_V8DF:
33831 case V8DF_FTYPE_V8DI_V8DF_V8DF:
33832 case V8DF_FTYPE_V8SF_V8DF_QI:
33833 case V8DF_FTYPE_V8SI_V8DF_QI:
33834 case V8DI_FTYPE_DI_V8DI_QI:
33835 case V8DI_FTYPE_V16QI_V8DI_QI:
33836 case V8DI_FTYPE_V2DI_V8DI_QI:
33837 case V8DI_FTYPE_V4DI_V8DI_QI:
33838 case V8DI_FTYPE_V8DI_V8DI_QI:
33839 case V8DI_FTYPE_V8DI_V8DI_V8DI:
33840 case V8DI_FTYPE_V8HI_V8DI_QI:
33841 case V8DI_FTYPE_V8SI_V8DI_QI:
33842 case V8HI_FTYPE_V8DI_V8HI_QI:
33843 case V8SF_FTYPE_V8DF_V8SF_QI:
33844 case V8SI_FTYPE_V8DF_V8SI_QI:
33845 case V8SI_FTYPE_V8DI_V8SI_QI:
33846 case V4SI_FTYPE_V4SI_V4SI_V4SI:
33847 nargs = 3;
33848 break;
33849 case V32QI_FTYPE_V32QI_V32QI_INT:
33850 case V16HI_FTYPE_V16HI_V16HI_INT:
33851 case V16QI_FTYPE_V16QI_V16QI_INT:
33852 case V4DI_FTYPE_V4DI_V4DI_INT:
33853 case V8HI_FTYPE_V8HI_V8HI_INT:
33854 case V8SI_FTYPE_V8SI_V8SI_INT:
33855 case V8SI_FTYPE_V8SI_V4SI_INT:
33856 case V8SF_FTYPE_V8SF_V8SF_INT:
33857 case V8SF_FTYPE_V8SF_V4SF_INT:
33858 case V4SI_FTYPE_V4SI_V4SI_INT:
33859 case V4DF_FTYPE_V4DF_V4DF_INT:
33860 case V16SF_FTYPE_V16SF_V16SF_INT:
33861 case V16SF_FTYPE_V16SF_V4SF_INT:
33862 case V16SI_FTYPE_V16SI_V4SI_INT:
33863 case V4DF_FTYPE_V4DF_V2DF_INT:
33864 case V4SF_FTYPE_V4SF_V4SF_INT:
33865 case V2DI_FTYPE_V2DI_V2DI_INT:
33866 case V4DI_FTYPE_V4DI_V2DI_INT:
33867 case V2DF_FTYPE_V2DF_V2DF_INT:
33868 case QI_FTYPE_V8DI_V8DI_INT:
33869 case QI_FTYPE_V8DF_V8DF_INT:
33870 case QI_FTYPE_V2DF_V2DF_INT:
33871 case QI_FTYPE_V4SF_V4SF_INT:
33872 case HI_FTYPE_V16SI_V16SI_INT:
33873 case HI_FTYPE_V16SF_V16SF_INT:
33874 nargs = 3;
33875 nargs_constant = 1;
33876 break;
33877 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
33878 nargs = 3;
33879 rmode = V4DImode;
33880 nargs_constant = 1;
33881 break;
33882 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
33883 nargs = 3;
33884 rmode = V2DImode;
33885 nargs_constant = 1;
33886 break;
33887 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
33888 nargs = 3;
33889 rmode = DImode;
33890 nargs_constant = 1;
33891 break;
33892 case V2DI_FTYPE_V2DI_UINT_UINT:
33893 nargs = 3;
33894 nargs_constant = 2;
33895 break;
33896 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI:
33897 case V16SF_FTYPE_V16SF_V16SI_V16SF_HI:
33898 case V16SF_FTYPE_V16SI_V16SF_V16SF_HI:
33899 case V16SI_FTYPE_V16SI_V16SI_V16SI_HI:
33900 case V16SI_FTYPE_V16SI_V4SI_V16SI_HI:
33901 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI:
33902 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI:
33903 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI:
33904 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI:
33905 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI:
33906 case V8DF_FTYPE_V8DF_V8DI_V8DF_QI:
33907 case V8DF_FTYPE_V8DI_V8DF_V8DF_QI:
33908 case V8DI_FTYPE_V16SI_V16SI_V8DI_QI:
33909 case V8DI_FTYPE_V8DI_SI_V8DI_V8DI:
33910 case V8DI_FTYPE_V8DI_V2DI_V8DI_QI:
33911 case V8DI_FTYPE_V8DI_V8DI_V8DI_QI:
33912 nargs = 4;
33913 break;
33914 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
33915 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
33916 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
33917 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
33918 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT:
33919 nargs = 4;
33920 nargs_constant = 1;
33921 break;
33922 case QI_FTYPE_V2DF_V2DF_INT_QI:
33923 case QI_FTYPE_V4SF_V4SF_INT_QI:
33924 nargs = 4;
33925 mask_pos = 1;
33926 nargs_constant = 1;
33927 break;
33928 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
33929 nargs = 4;
33930 nargs_constant = 2;
33931 break;
33932 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED:
33933 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG:
33934 nargs = 4;
33935 break;
33936 case QI_FTYPE_V8DI_V8DI_INT_QI:
33937 case HI_FTYPE_V16SI_V16SI_INT_HI:
33938 case QI_FTYPE_V8DF_V8DF_INT_QI:
33939 case HI_FTYPE_V16SF_V16SF_INT_HI:
33940 mask_pos = 1;
33941 nargs = 4;
33942 nargs_constant = 1;
33943 break;
33944 case V8DF_FTYPE_V8DF_INT_V8DF_QI:
33945 case V16SF_FTYPE_V16SF_INT_V16SF_HI:
33946 case V16HI_FTYPE_V16SF_INT_V16HI_HI:
33947 case V16SI_FTYPE_V16SI_INT_V16SI_HI:
33948 case V4SI_FTYPE_V16SI_INT_V4SI_QI:
33949 case V4DI_FTYPE_V8DI_INT_V4DI_QI:
33950 case V4DF_FTYPE_V8DF_INT_V4DF_QI:
33951 case V4SF_FTYPE_V16SF_INT_V4SF_QI:
33952 case V8DI_FTYPE_V8DI_INT_V8DI_QI:
33953 nargs = 4;
33954 mask_pos = 2;
33955 nargs_constant = 1;
33956 break;
33957 case V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI:
33958 case V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI:
33959 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI:
33960 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI:
33961 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI:
33962 case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI:
33963 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI:
33964 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI:
33965 case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI:
33966 case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI:
33967 nargs = 5;
33968 mask_pos = 2;
33969 nargs_constant = 1;
33970 break;
33971 case V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI:
33972 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI:
33973 case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI:
33974 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI:
33975 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI:
33976 nargs = 5;
33977 mask_pos = 1;
33978 nargs_constant = 1;
33979 break;
33980
33981 default:
33982 gcc_unreachable ();
33983 }
33984
33985 gcc_assert (nargs <= ARRAY_SIZE (args));
33986
33987 if (comparison != UNKNOWN)
33988 {
33989 gcc_assert (nargs == 2);
33990 return ix86_expand_sse_compare (d, exp, target, swap);
33991 }
33992
33993 if (rmode == VOIDmode || rmode == tmode)
33994 {
33995 if (optimize
33996 || target == 0
33997 || GET_MODE (target) != tmode
33998 || !insn_p->operand[0].predicate (target, tmode))
33999 target = gen_reg_rtx (tmode);
34000 real_target = target;
34001 }
34002 else
34003 {
34004 real_target = gen_reg_rtx (tmode);
34005 target = simplify_gen_subreg (rmode, real_target, tmode, 0);
34006 }
34007
34008 for (i = 0; i < nargs; i++)
34009 {
34010 tree arg = CALL_EXPR_ARG (exp, i);
34011 rtx op = expand_normal (arg);
34012 enum machine_mode mode = insn_p->operand[i + 1].mode;
34013 bool match = insn_p->operand[i + 1].predicate (op, mode);
34014
34015 if (last_arg_count && (i + 1) == nargs)
34016 {
34017 /* SIMD shift insns take either an 8-bit immediate or
34018 register as count. But builtin functions take int as
34019 count. If count doesn't match, we put it in register. */
34020 if (!match)
34021 {
34022 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
34023 if (!insn_p->operand[i + 1].predicate (op, mode))
34024 op = copy_to_reg (op);
34025 }
34026 }
34027 else if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
34028 (!mask_pos && (nargs - i) <= nargs_constant))
34029 {
34030 if (!match)
34031 switch (icode)
34032 {
34033 case CODE_FOR_avx2_inserti128:
34034 case CODE_FOR_avx2_extracti128:
34035 error ("the last argument must be an 1-bit immediate");
34036 return const0_rtx;
34037
34038 case CODE_FOR_avx512f_cmpv8di3_mask:
34039 case CODE_FOR_avx512f_cmpv16si3_mask:
34040 case CODE_FOR_avx512f_ucmpv8di3_mask:
34041 case CODE_FOR_avx512f_ucmpv16si3_mask:
34042 error ("the last argument must be a 3-bit immediate");
34043 return const0_rtx;
34044
34045 case CODE_FOR_sse4_1_roundsd:
34046 case CODE_FOR_sse4_1_roundss:
34047
34048 case CODE_FOR_sse4_1_roundpd:
34049 case CODE_FOR_sse4_1_roundps:
34050 case CODE_FOR_avx_roundpd256:
34051 case CODE_FOR_avx_roundps256:
34052
34053 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix:
34054 case CODE_FOR_sse4_1_roundps_sfix:
34055 case CODE_FOR_avx_roundpd_vec_pack_sfix256:
34056 case CODE_FOR_avx_roundps_sfix256:
34057
34058 case CODE_FOR_sse4_1_blendps:
34059 case CODE_FOR_avx_blendpd256:
34060 case CODE_FOR_avx_vpermilv4df:
34061 case CODE_FOR_avx512f_getmantv8df_mask:
34062 case CODE_FOR_avx512f_getmantv16sf_mask:
34063 error ("the last argument must be a 4-bit immediate");
34064 return const0_rtx;
34065
34066 case CODE_FOR_sha1rnds4:
34067 case CODE_FOR_sse4_1_blendpd:
34068 case CODE_FOR_avx_vpermilv2df:
34069 case CODE_FOR_xop_vpermil2v2df3:
34070 case CODE_FOR_xop_vpermil2v4sf3:
34071 case CODE_FOR_xop_vpermil2v4df3:
34072 case CODE_FOR_xop_vpermil2v8sf3:
34073 case CODE_FOR_avx512f_vinsertf32x4_mask:
34074 case CODE_FOR_avx512f_vinserti32x4_mask:
34075 case CODE_FOR_avx512f_vextractf32x4_mask:
34076 case CODE_FOR_avx512f_vextracti32x4_mask:
34077 error ("the last argument must be a 2-bit immediate");
34078 return const0_rtx;
34079
34080 case CODE_FOR_avx_vextractf128v4df:
34081 case CODE_FOR_avx_vextractf128v8sf:
34082 case CODE_FOR_avx_vextractf128v8si:
34083 case CODE_FOR_avx_vinsertf128v4df:
34084 case CODE_FOR_avx_vinsertf128v8sf:
34085 case CODE_FOR_avx_vinsertf128v8si:
34086 case CODE_FOR_avx512f_vinsertf64x4_mask:
34087 case CODE_FOR_avx512f_vinserti64x4_mask:
34088 case CODE_FOR_avx512f_vextractf64x4_mask:
34089 case CODE_FOR_avx512f_vextracti64x4_mask:
34090 error ("the last argument must be a 1-bit immediate");
34091 return const0_rtx;
34092
34093 case CODE_FOR_avx_vmcmpv2df3:
34094 case CODE_FOR_avx_vmcmpv4sf3:
34095 case CODE_FOR_avx_cmpv2df3:
34096 case CODE_FOR_avx_cmpv4sf3:
34097 case CODE_FOR_avx_cmpv4df3:
34098 case CODE_FOR_avx_cmpv8sf3:
34099 case CODE_FOR_avx512f_cmpv8df3_mask:
34100 case CODE_FOR_avx512f_cmpv16sf3_mask:
34101 case CODE_FOR_avx512f_vmcmpv2df3_mask:
34102 case CODE_FOR_avx512f_vmcmpv4sf3_mask:
34103 error ("the last argument must be a 5-bit immediate");
34104 return const0_rtx;
34105
34106 default:
34107 switch (nargs_constant)
34108 {
34109 case 2:
34110 if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
34111 (!mask_pos && (nargs - i) == nargs_constant))
34112 {
34113 error ("the next to last argument must be an 8-bit immediate");
34114 break;
34115 }
34116 case 1:
34117 error ("the last argument must be an 8-bit immediate");
34118 break;
34119 default:
34120 gcc_unreachable ();
34121 }
34122 return const0_rtx;
34123 }
34124 }
34125 else
34126 {
34127 if (VECTOR_MODE_P (mode))
34128 op = safe_vector_operand (op, mode);
34129
34130 /* If we aren't optimizing, only allow one memory operand to
34131 be generated. */
34132 if (memory_operand (op, mode))
34133 num_memory++;
34134
34135 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34136 {
34137 if (optimize || !match || num_memory > 1)
34138 op = copy_to_mode_reg (mode, op);
34139 }
34140 else
34141 {
34142 op = copy_to_reg (op);
34143 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
34144 }
34145 }
34146
34147 args[i].op = op;
34148 args[i].mode = mode;
34149 }
34150
34151 switch (nargs)
34152 {
34153 case 1:
34154 pat = GEN_FCN (icode) (real_target, args[0].op);
34155 break;
34156 case 2:
34157 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
34158 break;
34159 case 3:
34160 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34161 args[2].op);
34162 break;
34163 case 4:
34164 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34165 args[2].op, args[3].op);
34166 break;
34167 case 5:
34168 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34169 args[2].op, args[3].op, args[4].op);
34170 case 6:
34171 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34172 args[2].op, args[3].op, args[4].op,
34173 args[5].op);
34174 break;
34175 default:
34176 gcc_unreachable ();
34177 }
34178
34179 if (! pat)
34180 return 0;
34181
34182 emit_insn (pat);
34183 return target;
34184 }
34185
34186 /* Transform pattern of following layout:
34187 (parallel [
34188 set (A B)
34189 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)])
34190 ])
34191 into:
34192 (set (A B))
34193
34194 Or:
34195 (parallel [ A B
34196 ...
34197 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)
34198 ...
34199 ])
34200 into:
34201 (parallel [ A B ... ]) */
34202
34203 static rtx
34204 ix86_erase_embedded_rounding (rtx pat)
34205 {
34206 if (GET_CODE (pat) == INSN)
34207 pat = PATTERN (pat);
34208
34209 gcc_assert (GET_CODE (pat) == PARALLEL);
34210
34211 if (XVECLEN (pat, 0) == 2)
34212 {
34213 rtx p0 = XVECEXP (pat, 0, 0);
34214 rtx p1 = XVECEXP (pat, 0, 1);
34215
34216 gcc_assert (GET_CODE (p0) == SET
34217 && GET_CODE (p1) == UNSPEC
34218 && XINT (p1, 1) == UNSPEC_EMBEDDED_ROUNDING);
34219
34220 return p0;
34221 }
34222 else
34223 {
34224 rtx *res = XALLOCAVEC (rtx, XVECLEN (pat, 0));
34225 int i = 0;
34226 int j = 0;
34227
34228 for (; i < XVECLEN (pat, 0); ++i)
34229 {
34230 rtx elem = XVECEXP (pat, 0, i);
34231 if (GET_CODE (elem) != UNSPEC
34232 || XINT (elem, 1) != UNSPEC_EMBEDDED_ROUNDING)
34233 res [j++] = elem;
34234 }
34235
34236 /* No more than 1 occurence was removed. */
34237 gcc_assert (j >= XVECLEN (pat, 0) - 1);
34238
34239 return gen_rtx_PARALLEL (GET_MODE (pat), gen_rtvec_v (j, res));
34240 }
34241 }
34242
34243 /* Subroutine of ix86_expand_round_builtin to take care of comi insns
34244 with rounding. */
34245 static rtx
34246 ix86_expand_sse_comi_round (const struct builtin_description *d,
34247 tree exp, rtx target)
34248 {
34249 rtx pat, set_dst;
34250 tree arg0 = CALL_EXPR_ARG (exp, 0);
34251 tree arg1 = CALL_EXPR_ARG (exp, 1);
34252 tree arg2 = CALL_EXPR_ARG (exp, 2);
34253 tree arg3 = CALL_EXPR_ARG (exp, 3);
34254 rtx op0 = expand_normal (arg0);
34255 rtx op1 = expand_normal (arg1);
34256 rtx op2 = expand_normal (arg2);
34257 rtx op3 = expand_normal (arg3);
34258 enum insn_code icode = d->icode;
34259 const struct insn_data_d *insn_p = &insn_data[icode];
34260 enum machine_mode mode0 = insn_p->operand[0].mode;
34261 enum machine_mode mode1 = insn_p->operand[1].mode;
34262 enum rtx_code comparison = UNEQ;
34263 bool need_ucomi = false;
34264
34265 /* See avxintrin.h for values. */
34266 enum rtx_code comi_comparisons[32] =
34267 {
34268 UNEQ, GT, GE, UNORDERED, LTGT, UNLE, UNLT, ORDERED, UNEQ, UNLT,
34269 UNLE, LT, LTGT, GE, GT, LT, UNEQ, GT, GE, UNORDERED, LTGT, UNLE,
34270 UNLT, ORDERED, UNEQ, UNLT, UNLE, LT, LTGT, GE, GT, LT
34271 };
34272 bool need_ucomi_values[32] =
34273 {
34274 true, false, false, true, true, false, false, true,
34275 true, false, false, true, true, false, false, true,
34276 false, true, true, false, false, true, true, false,
34277 false, true, true, false, false, true, true, false
34278 };
34279
34280 if (!CONST_INT_P (op2))
34281 {
34282 error ("the third argument must be comparison constant");
34283 return const0_rtx;
34284 }
34285 if (INTVAL (op2) < 0 || INTVAL (op2) >= 32)
34286 {
34287 error ("incorect comparison mode");
34288 return const0_rtx;
34289 }
34290
34291 if (!insn_p->operand[2].predicate (op3, SImode))
34292 {
34293 error ("incorrect rounding operand");
34294 return const0_rtx;
34295 }
34296
34297 comparison = comi_comparisons[INTVAL (op2)];
34298 need_ucomi = need_ucomi_values[INTVAL (op2)];
34299
34300 if (VECTOR_MODE_P (mode0))
34301 op0 = safe_vector_operand (op0, mode0);
34302 if (VECTOR_MODE_P (mode1))
34303 op1 = safe_vector_operand (op1, mode1);
34304
34305 target = gen_reg_rtx (SImode);
34306 emit_move_insn (target, const0_rtx);
34307 target = gen_rtx_SUBREG (QImode, target, 0);
34308
34309 if ((optimize && !register_operand (op0, mode0))
34310 || !insn_p->operand[0].predicate (op0, mode0))
34311 op0 = copy_to_mode_reg (mode0, op0);
34312 if ((optimize && !register_operand (op1, mode1))
34313 || !insn_p->operand[1].predicate (op1, mode1))
34314 op1 = copy_to_mode_reg (mode1, op1);
34315
34316 if (need_ucomi)
34317 icode = icode == CODE_FOR_sse_comi_round
34318 ? CODE_FOR_sse_ucomi_round
34319 : CODE_FOR_sse2_ucomi_round;
34320
34321 pat = GEN_FCN (icode) (op0, op1, op3);
34322 if (! pat)
34323 return 0;
34324
34325 /* Rounding operand can be either NO_ROUND or ROUND_SAE at this point. */
34326 if (INTVAL (op3) == NO_ROUND)
34327 {
34328 pat = ix86_erase_embedded_rounding (pat);
34329 if (! pat)
34330 return 0;
34331
34332 set_dst = SET_DEST (pat);
34333 }
34334 else
34335 {
34336 gcc_assert (GET_CODE (XVECEXP (pat, 0, 0)) == SET);
34337 set_dst = SET_DEST (XVECEXP (pat, 0, 0));
34338 }
34339
34340 emit_insn (pat);
34341 emit_insn (gen_rtx_SET (VOIDmode,
34342 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
34343 gen_rtx_fmt_ee (comparison, QImode,
34344 set_dst,
34345 const0_rtx)));
34346
34347 return SUBREG_REG (target);
34348 }
34349
34350 static rtx
34351 ix86_expand_round_builtin (const struct builtin_description *d,
34352 tree exp, rtx target)
34353 {
34354 rtx pat;
34355 unsigned int i, nargs;
34356 struct
34357 {
34358 rtx op;
34359 enum machine_mode mode;
34360 } args[6];
34361 enum insn_code icode = d->icode;
34362 const struct insn_data_d *insn_p = &insn_data[icode];
34363 enum machine_mode tmode = insn_p->operand[0].mode;
34364 unsigned int nargs_constant = 0;
34365 unsigned int redundant_embed_rnd = 0;
34366
34367 switch ((enum ix86_builtin_func_type) d->flag)
34368 {
34369 case UINT64_FTYPE_V2DF_INT:
34370 case UINT64_FTYPE_V4SF_INT:
34371 case UINT_FTYPE_V2DF_INT:
34372 case UINT_FTYPE_V4SF_INT:
34373 case INT64_FTYPE_V2DF_INT:
34374 case INT64_FTYPE_V4SF_INT:
34375 case INT_FTYPE_V2DF_INT:
34376 case INT_FTYPE_V4SF_INT:
34377 nargs = 2;
34378 break;
34379 case V4SF_FTYPE_V4SF_UINT_INT:
34380 case V4SF_FTYPE_V4SF_UINT64_INT:
34381 case V2DF_FTYPE_V2DF_UINT64_INT:
34382 case V4SF_FTYPE_V4SF_INT_INT:
34383 case V4SF_FTYPE_V4SF_INT64_INT:
34384 case V2DF_FTYPE_V2DF_INT64_INT:
34385 case V4SF_FTYPE_V4SF_V4SF_INT:
34386 case V2DF_FTYPE_V2DF_V2DF_INT:
34387 case V4SF_FTYPE_V4SF_V2DF_INT:
34388 case V2DF_FTYPE_V2DF_V4SF_INT:
34389 nargs = 3;
34390 break;
34391 case V8SF_FTYPE_V8DF_V8SF_QI_INT:
34392 case V8DF_FTYPE_V8DF_V8DF_QI_INT:
34393 case V8SI_FTYPE_V8DF_V8SI_QI_INT:
34394 case V16SF_FTYPE_V16SF_V16SF_HI_INT:
34395 case V16SF_FTYPE_V16SI_V16SF_HI_INT:
34396 case V16SI_FTYPE_V16SF_V16SI_HI_INT:
34397 case V8DF_FTYPE_V8SF_V8DF_QI_INT:
34398 case V16SF_FTYPE_V16HI_V16SF_HI_INT:
34399 case V2DF_FTYPE_V2DF_V2DF_V2DF_INT:
34400 case V4SF_FTYPE_V4SF_V4SF_V4SF_INT:
34401 nargs = 4;
34402 break;
34403 case V4SF_FTYPE_V4SF_V4SF_INT_INT:
34404 case V2DF_FTYPE_V2DF_V2DF_INT_INT:
34405 nargs_constant = 2;
34406 nargs = 4;
34407 break;
34408 case INT_FTYPE_V4SF_V4SF_INT_INT:
34409 case INT_FTYPE_V2DF_V2DF_INT_INT:
34410 return ix86_expand_sse_comi_round (d, exp, target);
34411 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT:
34412 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT:
34413 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT:
34414 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT:
34415 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT:
34416 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT:
34417 nargs = 5;
34418 break;
34419 case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT:
34420 case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT:
34421 nargs_constant = 4;
34422 nargs = 5;
34423 break;
34424 case QI_FTYPE_V8DF_V8DF_INT_QI_INT:
34425 case QI_FTYPE_V2DF_V2DF_INT_QI_INT:
34426 case HI_FTYPE_V16SF_V16SF_INT_HI_INT:
34427 case QI_FTYPE_V4SF_V4SF_INT_QI_INT:
34428 nargs_constant = 3;
34429 nargs = 5;
34430 break;
34431 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT:
34432 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT:
34433 nargs = 6;
34434 nargs_constant = 4;
34435 break;
34436 case V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT:
34437 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT:
34438 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT:
34439 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT:
34440 nargs = 6;
34441 nargs_constant = 3;
34442 break;
34443 default:
34444 gcc_unreachable ();
34445 }
34446 gcc_assert (nargs <= ARRAY_SIZE (args));
34447
34448 if (optimize
34449 || target == 0
34450 || GET_MODE (target) != tmode
34451 || !insn_p->operand[0].predicate (target, tmode))
34452 target = gen_reg_rtx (tmode);
34453
34454 for (i = 0; i < nargs; i++)
34455 {
34456 tree arg = CALL_EXPR_ARG (exp, i);
34457 rtx op = expand_normal (arg);
34458 enum machine_mode mode = insn_p->operand[i + 1].mode;
34459 bool match = insn_p->operand[i + 1].predicate (op, mode);
34460
34461 if (i == nargs - nargs_constant)
34462 {
34463 if (!match)
34464 {
34465 switch (icode)
34466 {
34467 case CODE_FOR_avx512f_getmantv8df_mask_round:
34468 case CODE_FOR_avx512f_getmantv16sf_mask_round:
34469 case CODE_FOR_avx512f_getmantv2df_round:
34470 case CODE_FOR_avx512f_getmantv4sf_round:
34471 error ("the immediate argument must be a 4-bit immediate");
34472 return const0_rtx;
34473 case CODE_FOR_avx512f_cmpv8df3_mask_round:
34474 case CODE_FOR_avx512f_cmpv16sf3_mask_round:
34475 case CODE_FOR_avx512f_vmcmpv2df3_mask_round:
34476 case CODE_FOR_avx512f_vmcmpv4sf3_mask_round:
34477 error ("the immediate argument must be a 5-bit immediate");
34478 return const0_rtx;
34479 default:
34480 error ("the immediate argument must be an 8-bit immediate");
34481 return const0_rtx;
34482 }
34483 }
34484 }
34485 else if (i == nargs-1)
34486 {
34487 if (!insn_p->operand[nargs].predicate (op, SImode))
34488 {
34489 error ("incorrect rounding operand");
34490 return const0_rtx;
34491 }
34492
34493 /* If there is no rounding use normal version of the pattern. */
34494 if (INTVAL (op) == NO_ROUND)
34495 redundant_embed_rnd = 1;
34496 }
34497 else
34498 {
34499 if (VECTOR_MODE_P (mode))
34500 op = safe_vector_operand (op, mode);
34501
34502 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34503 {
34504 if (optimize || !match)
34505 op = copy_to_mode_reg (mode, op);
34506 }
34507 else
34508 {
34509 op = copy_to_reg (op);
34510 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
34511 }
34512 }
34513
34514 args[i].op = op;
34515 args[i].mode = mode;
34516 }
34517
34518 switch (nargs)
34519 {
34520 case 1:
34521 pat = GEN_FCN (icode) (target, args[0].op);
34522 break;
34523 case 2:
34524 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
34525 break;
34526 case 3:
34527 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34528 args[2].op);
34529 break;
34530 case 4:
34531 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34532 args[2].op, args[3].op);
34533 break;
34534 case 5:
34535 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34536 args[2].op, args[3].op, args[4].op);
34537 case 6:
34538 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34539 args[2].op, args[3].op, args[4].op,
34540 args[5].op);
34541 break;
34542 default:
34543 gcc_unreachable ();
34544 }
34545
34546 if (!pat)
34547 return 0;
34548
34549 if (redundant_embed_rnd)
34550 pat = ix86_erase_embedded_rounding (pat);
34551
34552 emit_insn (pat);
34553 return target;
34554 }
34555
34556 /* Subroutine of ix86_expand_builtin to take care of special insns
34557 with variable number of operands. */
34558
34559 static rtx
34560 ix86_expand_special_args_builtin (const struct builtin_description *d,
34561 tree exp, rtx target)
34562 {
34563 tree arg;
34564 rtx pat, op;
34565 unsigned int i, nargs, arg_adjust, memory;
34566 bool aligned_mem = false;
34567 struct
34568 {
34569 rtx op;
34570 enum machine_mode mode;
34571 } args[3];
34572 enum insn_code icode = d->icode;
34573 bool last_arg_constant = false;
34574 const struct insn_data_d *insn_p = &insn_data[icode];
34575 enum machine_mode tmode = insn_p->operand[0].mode;
34576 enum { load, store } klass;
34577
34578 switch ((enum ix86_builtin_func_type) d->flag)
34579 {
34580 case VOID_FTYPE_VOID:
34581 emit_insn (GEN_FCN (icode) (target));
34582 return 0;
34583 case VOID_FTYPE_UINT64:
34584 case VOID_FTYPE_UNSIGNED:
34585 nargs = 0;
34586 klass = store;
34587 memory = 0;
34588 break;
34589
34590 case INT_FTYPE_VOID:
34591 case USHORT_FTYPE_VOID:
34592 case UINT64_FTYPE_VOID:
34593 case UNSIGNED_FTYPE_VOID:
34594 nargs = 0;
34595 klass = load;
34596 memory = 0;
34597 break;
34598 case UINT64_FTYPE_PUNSIGNED:
34599 case V2DI_FTYPE_PV2DI:
34600 case V4DI_FTYPE_PV4DI:
34601 case V32QI_FTYPE_PCCHAR:
34602 case V16QI_FTYPE_PCCHAR:
34603 case V8SF_FTYPE_PCV4SF:
34604 case V8SF_FTYPE_PCFLOAT:
34605 case V4SF_FTYPE_PCFLOAT:
34606 case V4DF_FTYPE_PCV2DF:
34607 case V4DF_FTYPE_PCDOUBLE:
34608 case V2DF_FTYPE_PCDOUBLE:
34609 case VOID_FTYPE_PVOID:
34610 case V16SI_FTYPE_PV4SI:
34611 case V16SF_FTYPE_PV4SF:
34612 case V8DI_FTYPE_PV4DI:
34613 case V8DI_FTYPE_PV8DI:
34614 case V8DF_FTYPE_PV4DF:
34615 nargs = 1;
34616 klass = load;
34617 memory = 0;
34618 switch (icode)
34619 {
34620 case CODE_FOR_sse4_1_movntdqa:
34621 case CODE_FOR_avx2_movntdqa:
34622 case CODE_FOR_avx512f_movntdqa:
34623 aligned_mem = true;
34624 break;
34625 default:
34626 break;
34627 }
34628 break;
34629 case VOID_FTYPE_PV2SF_V4SF:
34630 case VOID_FTYPE_PV8DI_V8DI:
34631 case VOID_FTYPE_PV4DI_V4DI:
34632 case VOID_FTYPE_PV2DI_V2DI:
34633 case VOID_FTYPE_PCHAR_V32QI:
34634 case VOID_FTYPE_PCHAR_V16QI:
34635 case VOID_FTYPE_PFLOAT_V16SF:
34636 case VOID_FTYPE_PFLOAT_V8SF:
34637 case VOID_FTYPE_PFLOAT_V4SF:
34638 case VOID_FTYPE_PDOUBLE_V8DF:
34639 case VOID_FTYPE_PDOUBLE_V4DF:
34640 case VOID_FTYPE_PDOUBLE_V2DF:
34641 case VOID_FTYPE_PLONGLONG_LONGLONG:
34642 case VOID_FTYPE_PULONGLONG_ULONGLONG:
34643 case VOID_FTYPE_PINT_INT:
34644 nargs = 1;
34645 klass = store;
34646 /* Reserve memory operand for target. */
34647 memory = ARRAY_SIZE (args);
34648 switch (icode)
34649 {
34650 /* These builtins and instructions require the memory
34651 to be properly aligned. */
34652 case CODE_FOR_avx_movntv4di:
34653 case CODE_FOR_sse2_movntv2di:
34654 case CODE_FOR_avx_movntv8sf:
34655 case CODE_FOR_sse_movntv4sf:
34656 case CODE_FOR_sse4a_vmmovntv4sf:
34657 case CODE_FOR_avx_movntv4df:
34658 case CODE_FOR_sse2_movntv2df:
34659 case CODE_FOR_sse4a_vmmovntv2df:
34660 case CODE_FOR_sse2_movntidi:
34661 case CODE_FOR_sse_movntq:
34662 case CODE_FOR_sse2_movntisi:
34663 case CODE_FOR_avx512f_movntv16sf:
34664 case CODE_FOR_avx512f_movntv8df:
34665 case CODE_FOR_avx512f_movntv8di:
34666 aligned_mem = true;
34667 break;
34668 default:
34669 break;
34670 }
34671 break;
34672 case V4SF_FTYPE_V4SF_PCV2SF:
34673 case V2DF_FTYPE_V2DF_PCDOUBLE:
34674 nargs = 2;
34675 klass = load;
34676 memory = 1;
34677 break;
34678 case V8SF_FTYPE_PCV8SF_V8SI:
34679 case V4DF_FTYPE_PCV4DF_V4DI:
34680 case V4SF_FTYPE_PCV4SF_V4SI:
34681 case V2DF_FTYPE_PCV2DF_V2DI:
34682 case V8SI_FTYPE_PCV8SI_V8SI:
34683 case V4DI_FTYPE_PCV4DI_V4DI:
34684 case V4SI_FTYPE_PCV4SI_V4SI:
34685 case V2DI_FTYPE_PCV2DI_V2DI:
34686 nargs = 2;
34687 klass = load;
34688 memory = 0;
34689 break;
34690 case VOID_FTYPE_PV8DF_V8DF_QI:
34691 case VOID_FTYPE_PV16SF_V16SF_HI:
34692 case VOID_FTYPE_PV8DI_V8DI_QI:
34693 case VOID_FTYPE_PV16SI_V16SI_HI:
34694 switch (icode)
34695 {
34696 /* These builtins and instructions require the memory
34697 to be properly aligned. */
34698 case CODE_FOR_avx512f_storev16sf_mask:
34699 case CODE_FOR_avx512f_storev16si_mask:
34700 case CODE_FOR_avx512f_storev8df_mask:
34701 case CODE_FOR_avx512f_storev8di_mask:
34702 aligned_mem = true;
34703 break;
34704 default:
34705 break;
34706 }
34707 /* FALLTHRU */
34708 case VOID_FTYPE_PV8SF_V8SI_V8SF:
34709 case VOID_FTYPE_PV4DF_V4DI_V4DF:
34710 case VOID_FTYPE_PV4SF_V4SI_V4SF:
34711 case VOID_FTYPE_PV2DF_V2DI_V2DF:
34712 case VOID_FTYPE_PV8SI_V8SI_V8SI:
34713 case VOID_FTYPE_PV4DI_V4DI_V4DI:
34714 case VOID_FTYPE_PV4SI_V4SI_V4SI:
34715 case VOID_FTYPE_PV2DI_V2DI_V2DI:
34716 case VOID_FTYPE_PDOUBLE_V2DF_QI:
34717 case VOID_FTYPE_PFLOAT_V4SF_QI:
34718 case VOID_FTYPE_PV8SI_V8DI_QI:
34719 case VOID_FTYPE_PV8HI_V8DI_QI:
34720 case VOID_FTYPE_PV16HI_V16SI_HI:
34721 case VOID_FTYPE_PV16QI_V8DI_QI:
34722 case VOID_FTYPE_PV16QI_V16SI_HI:
34723 nargs = 2;
34724 klass = store;
34725 /* Reserve memory operand for target. */
34726 memory = ARRAY_SIZE (args);
34727 break;
34728 case V16SF_FTYPE_PCV16SF_V16SF_HI:
34729 case V16SI_FTYPE_PCV16SI_V16SI_HI:
34730 case V8DF_FTYPE_PCV8DF_V8DF_QI:
34731 case V8DI_FTYPE_PCV8DI_V8DI_QI:
34732 case V2DF_FTYPE_PCDOUBLE_V2DF_QI:
34733 case V4SF_FTYPE_PCFLOAT_V4SF_QI:
34734 nargs = 3;
34735 klass = load;
34736 memory = 0;
34737 switch (icode)
34738 {
34739 /* These builtins and instructions require the memory
34740 to be properly aligned. */
34741 case CODE_FOR_avx512f_loadv16sf_mask:
34742 case CODE_FOR_avx512f_loadv16si_mask:
34743 case CODE_FOR_avx512f_loadv8df_mask:
34744 case CODE_FOR_avx512f_loadv8di_mask:
34745 aligned_mem = true;
34746 break;
34747 default:
34748 break;
34749 }
34750 break;
34751 case VOID_FTYPE_UINT_UINT_UINT:
34752 case VOID_FTYPE_UINT64_UINT_UINT:
34753 case UCHAR_FTYPE_UINT_UINT_UINT:
34754 case UCHAR_FTYPE_UINT64_UINT_UINT:
34755 nargs = 3;
34756 klass = load;
34757 memory = ARRAY_SIZE (args);
34758 last_arg_constant = true;
34759 break;
34760 default:
34761 gcc_unreachable ();
34762 }
34763
34764 gcc_assert (nargs <= ARRAY_SIZE (args));
34765
34766 if (klass == store)
34767 {
34768 arg = CALL_EXPR_ARG (exp, 0);
34769 op = expand_normal (arg);
34770 gcc_assert (target == 0);
34771 if (memory)
34772 {
34773 op = ix86_zero_extend_to_Pmode (op);
34774 target = gen_rtx_MEM (tmode, op);
34775 /* target at this point has just BITS_PER_UNIT MEM_ALIGN
34776 on it. Try to improve it using get_pointer_alignment,
34777 and if the special builtin is one that requires strict
34778 mode alignment, also from it's GET_MODE_ALIGNMENT.
34779 Failure to do so could lead to ix86_legitimate_combined_insn
34780 rejecting all changes to such insns. */
34781 unsigned int align = get_pointer_alignment (arg);
34782 if (aligned_mem && align < GET_MODE_ALIGNMENT (tmode))
34783 align = GET_MODE_ALIGNMENT (tmode);
34784 if (MEM_ALIGN (target) < align)
34785 set_mem_align (target, align);
34786 }
34787 else
34788 target = force_reg (tmode, op);
34789 arg_adjust = 1;
34790 }
34791 else
34792 {
34793 arg_adjust = 0;
34794 if (optimize
34795 || target == 0
34796 || !register_operand (target, tmode)
34797 || GET_MODE (target) != tmode)
34798 target = gen_reg_rtx (tmode);
34799 }
34800
34801 for (i = 0; i < nargs; i++)
34802 {
34803 enum machine_mode mode = insn_p->operand[i + 1].mode;
34804 bool match;
34805
34806 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
34807 op = expand_normal (arg);
34808 match = insn_p->operand[i + 1].predicate (op, mode);
34809
34810 if (last_arg_constant && (i + 1) == nargs)
34811 {
34812 if (!match)
34813 {
34814 if (icode == CODE_FOR_lwp_lwpvalsi3
34815 || icode == CODE_FOR_lwp_lwpinssi3
34816 || icode == CODE_FOR_lwp_lwpvaldi3
34817 || icode == CODE_FOR_lwp_lwpinsdi3)
34818 error ("the last argument must be a 32-bit immediate");
34819 else
34820 error ("the last argument must be an 8-bit immediate");
34821 return const0_rtx;
34822 }
34823 }
34824 else
34825 {
34826 if (i == memory)
34827 {
34828 /* This must be the memory operand. */
34829 op = ix86_zero_extend_to_Pmode (op);
34830 op = gen_rtx_MEM (mode, op);
34831 /* op at this point has just BITS_PER_UNIT MEM_ALIGN
34832 on it. Try to improve it using get_pointer_alignment,
34833 and if the special builtin is one that requires strict
34834 mode alignment, also from it's GET_MODE_ALIGNMENT.
34835 Failure to do so could lead to ix86_legitimate_combined_insn
34836 rejecting all changes to such insns. */
34837 unsigned int align = get_pointer_alignment (arg);
34838 if (aligned_mem && align < GET_MODE_ALIGNMENT (mode))
34839 align = GET_MODE_ALIGNMENT (mode);
34840 if (MEM_ALIGN (op) < align)
34841 set_mem_align (op, align);
34842 }
34843 else
34844 {
34845 /* This must be register. */
34846 if (VECTOR_MODE_P (mode))
34847 op = safe_vector_operand (op, mode);
34848
34849 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34850 op = copy_to_mode_reg (mode, op);
34851 else
34852 {
34853 op = copy_to_reg (op);
34854 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
34855 }
34856 }
34857 }
34858
34859 args[i].op = op;
34860 args[i].mode = mode;
34861 }
34862
34863 switch (nargs)
34864 {
34865 case 0:
34866 pat = GEN_FCN (icode) (target);
34867 break;
34868 case 1:
34869 pat = GEN_FCN (icode) (target, args[0].op);
34870 break;
34871 case 2:
34872 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
34873 break;
34874 case 3:
34875 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
34876 break;
34877 default:
34878 gcc_unreachable ();
34879 }
34880
34881 if (! pat)
34882 return 0;
34883 emit_insn (pat);
34884 return klass == store ? 0 : target;
34885 }
34886
34887 /* Return the integer constant in ARG. Constrain it to be in the range
34888 of the subparts of VEC_TYPE; issue an error if not. */
34889
34890 static int
34891 get_element_number (tree vec_type, tree arg)
34892 {
34893 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
34894
34895 if (!tree_fits_uhwi_p (arg)
34896 || (elt = tree_to_uhwi (arg), elt > max))
34897 {
34898 error ("selector must be an integer constant in the range 0..%wi", max);
34899 return 0;
34900 }
34901
34902 return elt;
34903 }
34904
34905 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
34906 ix86_expand_vector_init. We DO have language-level syntax for this, in
34907 the form of (type){ init-list }. Except that since we can't place emms
34908 instructions from inside the compiler, we can't allow the use of MMX
34909 registers unless the user explicitly asks for it. So we do *not* define
34910 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
34911 we have builtins invoked by mmintrin.h that gives us license to emit
34912 these sorts of instructions. */
34913
34914 static rtx
34915 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
34916 {
34917 enum machine_mode tmode = TYPE_MODE (type);
34918 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
34919 int i, n_elt = GET_MODE_NUNITS (tmode);
34920 rtvec v = rtvec_alloc (n_elt);
34921
34922 gcc_assert (VECTOR_MODE_P (tmode));
34923 gcc_assert (call_expr_nargs (exp) == n_elt);
34924
34925 for (i = 0; i < n_elt; ++i)
34926 {
34927 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
34928 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
34929 }
34930
34931 if (!target || !register_operand (target, tmode))
34932 target = gen_reg_rtx (tmode);
34933
34934 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
34935 return target;
34936 }
34937
34938 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
34939 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
34940 had a language-level syntax for referencing vector elements. */
34941
34942 static rtx
34943 ix86_expand_vec_ext_builtin (tree exp, rtx target)
34944 {
34945 enum machine_mode tmode, mode0;
34946 tree arg0, arg1;
34947 int elt;
34948 rtx op0;
34949
34950 arg0 = CALL_EXPR_ARG (exp, 0);
34951 arg1 = CALL_EXPR_ARG (exp, 1);
34952
34953 op0 = expand_normal (arg0);
34954 elt = get_element_number (TREE_TYPE (arg0), arg1);
34955
34956 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
34957 mode0 = TYPE_MODE (TREE_TYPE (arg0));
34958 gcc_assert (VECTOR_MODE_P (mode0));
34959
34960 op0 = force_reg (mode0, op0);
34961
34962 if (optimize || !target || !register_operand (target, tmode))
34963 target = gen_reg_rtx (tmode);
34964
34965 ix86_expand_vector_extract (true, target, op0, elt);
34966
34967 return target;
34968 }
34969
34970 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
34971 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
34972 a language-level syntax for referencing vector elements. */
34973
34974 static rtx
34975 ix86_expand_vec_set_builtin (tree exp)
34976 {
34977 enum machine_mode tmode, mode1;
34978 tree arg0, arg1, arg2;
34979 int elt;
34980 rtx op0, op1, target;
34981
34982 arg0 = CALL_EXPR_ARG (exp, 0);
34983 arg1 = CALL_EXPR_ARG (exp, 1);
34984 arg2 = CALL_EXPR_ARG (exp, 2);
34985
34986 tmode = TYPE_MODE (TREE_TYPE (arg0));
34987 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
34988 gcc_assert (VECTOR_MODE_P (tmode));
34989
34990 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
34991 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
34992 elt = get_element_number (TREE_TYPE (arg0), arg2);
34993
34994 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
34995 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
34996
34997 op0 = force_reg (tmode, op0);
34998 op1 = force_reg (mode1, op1);
34999
35000 /* OP0 is the source of these builtin functions and shouldn't be
35001 modified. Create a copy, use it and return it as target. */
35002 target = gen_reg_rtx (tmode);
35003 emit_move_insn (target, op0);
35004 ix86_expand_vector_set (true, target, op1, elt);
35005
35006 return target;
35007 }
35008
35009 /* Expand an expression EXP that calls a built-in function,
35010 with result going to TARGET if that's convenient
35011 (and in mode MODE if that's convenient).
35012 SUBTARGET may be used as the target for computing one of EXP's operands.
35013 IGNORE is nonzero if the value is to be ignored. */
35014
35015 static rtx
35016 ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
35017 enum machine_mode mode, int ignore)
35018 {
35019 const struct builtin_description *d;
35020 size_t i;
35021 enum insn_code icode;
35022 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
35023 tree arg0, arg1, arg2, arg3, arg4;
35024 rtx op0, op1, op2, op3, op4, pat, insn;
35025 enum machine_mode mode0, mode1, mode2, mode3, mode4;
35026 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
35027
35028 /* For CPU builtins that can be folded, fold first and expand the fold. */
35029 switch (fcode)
35030 {
35031 case IX86_BUILTIN_CPU_INIT:
35032 {
35033 /* Make it call __cpu_indicator_init in libgcc. */
35034 tree call_expr, fndecl, type;
35035 type = build_function_type_list (integer_type_node, NULL_TREE);
35036 fndecl = build_fn_decl ("__cpu_indicator_init", type);
35037 call_expr = build_call_expr (fndecl, 0);
35038 return expand_expr (call_expr, target, mode, EXPAND_NORMAL);
35039 }
35040 case IX86_BUILTIN_CPU_IS:
35041 case IX86_BUILTIN_CPU_SUPPORTS:
35042 {
35043 tree arg0 = CALL_EXPR_ARG (exp, 0);
35044 tree fold_expr = fold_builtin_cpu (fndecl, &arg0);
35045 gcc_assert (fold_expr != NULL_TREE);
35046 return expand_expr (fold_expr, target, mode, EXPAND_NORMAL);
35047 }
35048 }
35049
35050 /* Determine whether the builtin function is available under the current ISA.
35051 Originally the builtin was not created if it wasn't applicable to the
35052 current ISA based on the command line switches. With function specific
35053 options, we need to check in the context of the function making the call
35054 whether it is supported. */
35055 if (ix86_builtins_isa[fcode].isa
35056 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
35057 {
35058 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
35059 NULL, (enum fpmath_unit) 0, false);
35060
35061 if (!opts)
35062 error ("%qE needs unknown isa option", fndecl);
35063 else
35064 {
35065 gcc_assert (opts != NULL);
35066 error ("%qE needs isa option %s", fndecl, opts);
35067 free (opts);
35068 }
35069 return const0_rtx;
35070 }
35071
35072 switch (fcode)
35073 {
35074 case IX86_BUILTIN_MASKMOVQ:
35075 case IX86_BUILTIN_MASKMOVDQU:
35076 icode = (fcode == IX86_BUILTIN_MASKMOVQ
35077 ? CODE_FOR_mmx_maskmovq
35078 : CODE_FOR_sse2_maskmovdqu);
35079 /* Note the arg order is different from the operand order. */
35080 arg1 = CALL_EXPR_ARG (exp, 0);
35081 arg2 = CALL_EXPR_ARG (exp, 1);
35082 arg0 = CALL_EXPR_ARG (exp, 2);
35083 op0 = expand_normal (arg0);
35084 op1 = expand_normal (arg1);
35085 op2 = expand_normal (arg2);
35086 mode0 = insn_data[icode].operand[0].mode;
35087 mode1 = insn_data[icode].operand[1].mode;
35088 mode2 = insn_data[icode].operand[2].mode;
35089
35090 op0 = ix86_zero_extend_to_Pmode (op0);
35091 op0 = gen_rtx_MEM (mode1, op0);
35092
35093 if (!insn_data[icode].operand[0].predicate (op0, mode0))
35094 op0 = copy_to_mode_reg (mode0, op0);
35095 if (!insn_data[icode].operand[1].predicate (op1, mode1))
35096 op1 = copy_to_mode_reg (mode1, op1);
35097 if (!insn_data[icode].operand[2].predicate (op2, mode2))
35098 op2 = copy_to_mode_reg (mode2, op2);
35099 pat = GEN_FCN (icode) (op0, op1, op2);
35100 if (! pat)
35101 return 0;
35102 emit_insn (pat);
35103 return 0;
35104
35105 case IX86_BUILTIN_LDMXCSR:
35106 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
35107 target = assign_386_stack_local (SImode, SLOT_TEMP);
35108 emit_move_insn (target, op0);
35109 emit_insn (gen_sse_ldmxcsr (target));
35110 return 0;
35111
35112 case IX86_BUILTIN_STMXCSR:
35113 target = assign_386_stack_local (SImode, SLOT_TEMP);
35114 emit_insn (gen_sse_stmxcsr (target));
35115 return copy_to_mode_reg (SImode, target);
35116
35117 case IX86_BUILTIN_CLFLUSH:
35118 arg0 = CALL_EXPR_ARG (exp, 0);
35119 op0 = expand_normal (arg0);
35120 icode = CODE_FOR_sse2_clflush;
35121 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35122 op0 = ix86_zero_extend_to_Pmode (op0);
35123
35124 emit_insn (gen_sse2_clflush (op0));
35125 return 0;
35126
35127 case IX86_BUILTIN_CLFLUSHOPT:
35128 arg0 = CALL_EXPR_ARG (exp, 0);
35129 op0 = expand_normal (arg0);
35130 icode = CODE_FOR_clflushopt;
35131 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35132 op0 = ix86_zero_extend_to_Pmode (op0);
35133
35134 emit_insn (gen_clflushopt (op0));
35135 return 0;
35136
35137 case IX86_BUILTIN_MONITOR:
35138 arg0 = CALL_EXPR_ARG (exp, 0);
35139 arg1 = CALL_EXPR_ARG (exp, 1);
35140 arg2 = CALL_EXPR_ARG (exp, 2);
35141 op0 = expand_normal (arg0);
35142 op1 = expand_normal (arg1);
35143 op2 = expand_normal (arg2);
35144 if (!REG_P (op0))
35145 op0 = ix86_zero_extend_to_Pmode (op0);
35146 if (!REG_P (op1))
35147 op1 = copy_to_mode_reg (SImode, op1);
35148 if (!REG_P (op2))
35149 op2 = copy_to_mode_reg (SImode, op2);
35150 emit_insn (ix86_gen_monitor (op0, op1, op2));
35151 return 0;
35152
35153 case IX86_BUILTIN_MWAIT:
35154 arg0 = CALL_EXPR_ARG (exp, 0);
35155 arg1 = CALL_EXPR_ARG (exp, 1);
35156 op0 = expand_normal (arg0);
35157 op1 = expand_normal (arg1);
35158 if (!REG_P (op0))
35159 op0 = copy_to_mode_reg (SImode, op0);
35160 if (!REG_P (op1))
35161 op1 = copy_to_mode_reg (SImode, op1);
35162 emit_insn (gen_sse3_mwait (op0, op1));
35163 return 0;
35164
35165 case IX86_BUILTIN_VEC_INIT_V2SI:
35166 case IX86_BUILTIN_VEC_INIT_V4HI:
35167 case IX86_BUILTIN_VEC_INIT_V8QI:
35168 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
35169
35170 case IX86_BUILTIN_VEC_EXT_V2DF:
35171 case IX86_BUILTIN_VEC_EXT_V2DI:
35172 case IX86_BUILTIN_VEC_EXT_V4SF:
35173 case IX86_BUILTIN_VEC_EXT_V4SI:
35174 case IX86_BUILTIN_VEC_EXT_V8HI:
35175 case IX86_BUILTIN_VEC_EXT_V2SI:
35176 case IX86_BUILTIN_VEC_EXT_V4HI:
35177 case IX86_BUILTIN_VEC_EXT_V16QI:
35178 return ix86_expand_vec_ext_builtin (exp, target);
35179
35180 case IX86_BUILTIN_VEC_SET_V2DI:
35181 case IX86_BUILTIN_VEC_SET_V4SF:
35182 case IX86_BUILTIN_VEC_SET_V4SI:
35183 case IX86_BUILTIN_VEC_SET_V8HI:
35184 case IX86_BUILTIN_VEC_SET_V4HI:
35185 case IX86_BUILTIN_VEC_SET_V16QI:
35186 return ix86_expand_vec_set_builtin (exp);
35187
35188 case IX86_BUILTIN_INFQ:
35189 case IX86_BUILTIN_HUGE_VALQ:
35190 {
35191 REAL_VALUE_TYPE inf;
35192 rtx tmp;
35193
35194 real_inf (&inf);
35195 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
35196
35197 tmp = validize_mem (force_const_mem (mode, tmp));
35198
35199 if (target == 0)
35200 target = gen_reg_rtx (mode);
35201
35202 emit_move_insn (target, tmp);
35203 return target;
35204 }
35205
35206 case IX86_BUILTIN_RDPMC:
35207 case IX86_BUILTIN_RDTSC:
35208 case IX86_BUILTIN_RDTSCP:
35209
35210 op0 = gen_reg_rtx (DImode);
35211 op1 = gen_reg_rtx (DImode);
35212
35213 if (fcode == IX86_BUILTIN_RDPMC)
35214 {
35215 arg0 = CALL_EXPR_ARG (exp, 0);
35216 op2 = expand_normal (arg0);
35217 if (!register_operand (op2, SImode))
35218 op2 = copy_to_mode_reg (SImode, op2);
35219
35220 insn = (TARGET_64BIT
35221 ? gen_rdpmc_rex64 (op0, op1, op2)
35222 : gen_rdpmc (op0, op2));
35223 emit_insn (insn);
35224 }
35225 else if (fcode == IX86_BUILTIN_RDTSC)
35226 {
35227 insn = (TARGET_64BIT
35228 ? gen_rdtsc_rex64 (op0, op1)
35229 : gen_rdtsc (op0));
35230 emit_insn (insn);
35231 }
35232 else
35233 {
35234 op2 = gen_reg_rtx (SImode);
35235
35236 insn = (TARGET_64BIT
35237 ? gen_rdtscp_rex64 (op0, op1, op2)
35238 : gen_rdtscp (op0, op2));
35239 emit_insn (insn);
35240
35241 arg0 = CALL_EXPR_ARG (exp, 0);
35242 op4 = expand_normal (arg0);
35243 if (!address_operand (op4, VOIDmode))
35244 {
35245 op4 = convert_memory_address (Pmode, op4);
35246 op4 = copy_addr_to_reg (op4);
35247 }
35248 emit_move_insn (gen_rtx_MEM (SImode, op4), op2);
35249 }
35250
35251 if (target == 0)
35252 {
35253 /* mode is VOIDmode if __builtin_rd* has been called
35254 without lhs. */
35255 if (mode == VOIDmode)
35256 return target;
35257 target = gen_reg_rtx (mode);
35258 }
35259
35260 if (TARGET_64BIT)
35261 {
35262 op1 = expand_simple_binop (DImode, ASHIFT, op1, GEN_INT (32),
35263 op1, 1, OPTAB_DIRECT);
35264 op0 = expand_simple_binop (DImode, IOR, op0, op1,
35265 op0, 1, OPTAB_DIRECT);
35266 }
35267
35268 emit_move_insn (target, op0);
35269 return target;
35270
35271 case IX86_BUILTIN_FXSAVE:
35272 case IX86_BUILTIN_FXRSTOR:
35273 case IX86_BUILTIN_FXSAVE64:
35274 case IX86_BUILTIN_FXRSTOR64:
35275 case IX86_BUILTIN_FNSTENV:
35276 case IX86_BUILTIN_FLDENV:
35277 mode0 = BLKmode;
35278 switch (fcode)
35279 {
35280 case IX86_BUILTIN_FXSAVE:
35281 icode = CODE_FOR_fxsave;
35282 break;
35283 case IX86_BUILTIN_FXRSTOR:
35284 icode = CODE_FOR_fxrstor;
35285 break;
35286 case IX86_BUILTIN_FXSAVE64:
35287 icode = CODE_FOR_fxsave64;
35288 break;
35289 case IX86_BUILTIN_FXRSTOR64:
35290 icode = CODE_FOR_fxrstor64;
35291 break;
35292 case IX86_BUILTIN_FNSTENV:
35293 icode = CODE_FOR_fnstenv;
35294 break;
35295 case IX86_BUILTIN_FLDENV:
35296 icode = CODE_FOR_fldenv;
35297 break;
35298 default:
35299 gcc_unreachable ();
35300 }
35301
35302 arg0 = CALL_EXPR_ARG (exp, 0);
35303 op0 = expand_normal (arg0);
35304
35305 if (!address_operand (op0, VOIDmode))
35306 {
35307 op0 = convert_memory_address (Pmode, op0);
35308 op0 = copy_addr_to_reg (op0);
35309 }
35310 op0 = gen_rtx_MEM (mode0, op0);
35311
35312 pat = GEN_FCN (icode) (op0);
35313 if (pat)
35314 emit_insn (pat);
35315 return 0;
35316
35317 case IX86_BUILTIN_XSAVE:
35318 case IX86_BUILTIN_XRSTOR:
35319 case IX86_BUILTIN_XSAVE64:
35320 case IX86_BUILTIN_XRSTOR64:
35321 case IX86_BUILTIN_XSAVEOPT:
35322 case IX86_BUILTIN_XSAVEOPT64:
35323 case IX86_BUILTIN_XSAVES:
35324 case IX86_BUILTIN_XRSTORS:
35325 case IX86_BUILTIN_XSAVES64:
35326 case IX86_BUILTIN_XRSTORS64:
35327 case IX86_BUILTIN_XSAVEC:
35328 case IX86_BUILTIN_XSAVEC64:
35329 arg0 = CALL_EXPR_ARG (exp, 0);
35330 arg1 = CALL_EXPR_ARG (exp, 1);
35331 op0 = expand_normal (arg0);
35332 op1 = expand_normal (arg1);
35333
35334 if (!address_operand (op0, VOIDmode))
35335 {
35336 op0 = convert_memory_address (Pmode, op0);
35337 op0 = copy_addr_to_reg (op0);
35338 }
35339 op0 = gen_rtx_MEM (BLKmode, op0);
35340
35341 op1 = force_reg (DImode, op1);
35342
35343 if (TARGET_64BIT)
35344 {
35345 op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
35346 NULL, 1, OPTAB_DIRECT);
35347 switch (fcode)
35348 {
35349 case IX86_BUILTIN_XSAVE:
35350 icode = CODE_FOR_xsave_rex64;
35351 break;
35352 case IX86_BUILTIN_XRSTOR:
35353 icode = CODE_FOR_xrstor_rex64;
35354 break;
35355 case IX86_BUILTIN_XSAVE64:
35356 icode = CODE_FOR_xsave64;
35357 break;
35358 case IX86_BUILTIN_XRSTOR64:
35359 icode = CODE_FOR_xrstor64;
35360 break;
35361 case IX86_BUILTIN_XSAVEOPT:
35362 icode = CODE_FOR_xsaveopt_rex64;
35363 break;
35364 case IX86_BUILTIN_XSAVEOPT64:
35365 icode = CODE_FOR_xsaveopt64;
35366 break;
35367 case IX86_BUILTIN_XSAVES:
35368 icode = CODE_FOR_xsaves_rex64;
35369 break;
35370 case IX86_BUILTIN_XRSTORS:
35371 icode = CODE_FOR_xrstors_rex64;
35372 break;
35373 case IX86_BUILTIN_XSAVES64:
35374 icode = CODE_FOR_xsaves64;
35375 break;
35376 case IX86_BUILTIN_XRSTORS64:
35377 icode = CODE_FOR_xrstors64;
35378 break;
35379 case IX86_BUILTIN_XSAVEC:
35380 icode = CODE_FOR_xsavec_rex64;
35381 break;
35382 case IX86_BUILTIN_XSAVEC64:
35383 icode = CODE_FOR_xsavec64;
35384 break;
35385 default:
35386 gcc_unreachable ();
35387 }
35388
35389 op2 = gen_lowpart (SImode, op2);
35390 op1 = gen_lowpart (SImode, op1);
35391 pat = GEN_FCN (icode) (op0, op1, op2);
35392 }
35393 else
35394 {
35395 switch (fcode)
35396 {
35397 case IX86_BUILTIN_XSAVE:
35398 icode = CODE_FOR_xsave;
35399 break;
35400 case IX86_BUILTIN_XRSTOR:
35401 icode = CODE_FOR_xrstor;
35402 break;
35403 case IX86_BUILTIN_XSAVEOPT:
35404 icode = CODE_FOR_xsaveopt;
35405 break;
35406 case IX86_BUILTIN_XSAVES:
35407 icode = CODE_FOR_xsaves;
35408 break;
35409 case IX86_BUILTIN_XRSTORS:
35410 icode = CODE_FOR_xrstors;
35411 break;
35412 case IX86_BUILTIN_XSAVEC:
35413 icode = CODE_FOR_xsavec;
35414 break;
35415 default:
35416 gcc_unreachable ();
35417 }
35418 pat = GEN_FCN (icode) (op0, op1);
35419 }
35420
35421 if (pat)
35422 emit_insn (pat);
35423 return 0;
35424
35425 case IX86_BUILTIN_LLWPCB:
35426 arg0 = CALL_EXPR_ARG (exp, 0);
35427 op0 = expand_normal (arg0);
35428 icode = CODE_FOR_lwp_llwpcb;
35429 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35430 op0 = ix86_zero_extend_to_Pmode (op0);
35431 emit_insn (gen_lwp_llwpcb (op0));
35432 return 0;
35433
35434 case IX86_BUILTIN_SLWPCB:
35435 icode = CODE_FOR_lwp_slwpcb;
35436 if (!target
35437 || !insn_data[icode].operand[0].predicate (target, Pmode))
35438 target = gen_reg_rtx (Pmode);
35439 emit_insn (gen_lwp_slwpcb (target));
35440 return target;
35441
35442 case IX86_BUILTIN_BEXTRI32:
35443 case IX86_BUILTIN_BEXTRI64:
35444 arg0 = CALL_EXPR_ARG (exp, 0);
35445 arg1 = CALL_EXPR_ARG (exp, 1);
35446 op0 = expand_normal (arg0);
35447 op1 = expand_normal (arg1);
35448 icode = (fcode == IX86_BUILTIN_BEXTRI32
35449 ? CODE_FOR_tbm_bextri_si
35450 : CODE_FOR_tbm_bextri_di);
35451 if (!CONST_INT_P (op1))
35452 {
35453 error ("last argument must be an immediate");
35454 return const0_rtx;
35455 }
35456 else
35457 {
35458 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
35459 unsigned char lsb_index = INTVAL (op1) & 0xFF;
35460 op1 = GEN_INT (length);
35461 op2 = GEN_INT (lsb_index);
35462 pat = GEN_FCN (icode) (target, op0, op1, op2);
35463 if (pat)
35464 emit_insn (pat);
35465 return target;
35466 }
35467
35468 case IX86_BUILTIN_RDRAND16_STEP:
35469 icode = CODE_FOR_rdrandhi_1;
35470 mode0 = HImode;
35471 goto rdrand_step;
35472
35473 case IX86_BUILTIN_RDRAND32_STEP:
35474 icode = CODE_FOR_rdrandsi_1;
35475 mode0 = SImode;
35476 goto rdrand_step;
35477
35478 case IX86_BUILTIN_RDRAND64_STEP:
35479 icode = CODE_FOR_rdranddi_1;
35480 mode0 = DImode;
35481
35482 rdrand_step:
35483 op0 = gen_reg_rtx (mode0);
35484 emit_insn (GEN_FCN (icode) (op0));
35485
35486 arg0 = CALL_EXPR_ARG (exp, 0);
35487 op1 = expand_normal (arg0);
35488 if (!address_operand (op1, VOIDmode))
35489 {
35490 op1 = convert_memory_address (Pmode, op1);
35491 op1 = copy_addr_to_reg (op1);
35492 }
35493 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
35494
35495 op1 = gen_reg_rtx (SImode);
35496 emit_move_insn (op1, CONST1_RTX (SImode));
35497
35498 /* Emit SImode conditional move. */
35499 if (mode0 == HImode)
35500 {
35501 op2 = gen_reg_rtx (SImode);
35502 emit_insn (gen_zero_extendhisi2 (op2, op0));
35503 }
35504 else if (mode0 == SImode)
35505 op2 = op0;
35506 else
35507 op2 = gen_rtx_SUBREG (SImode, op0, 0);
35508
35509 if (target == 0
35510 || !register_operand (target, SImode))
35511 target = gen_reg_rtx (SImode);
35512
35513 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
35514 const0_rtx);
35515 emit_insn (gen_rtx_SET (VOIDmode, target,
35516 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
35517 return target;
35518
35519 case IX86_BUILTIN_RDSEED16_STEP:
35520 icode = CODE_FOR_rdseedhi_1;
35521 mode0 = HImode;
35522 goto rdseed_step;
35523
35524 case IX86_BUILTIN_RDSEED32_STEP:
35525 icode = CODE_FOR_rdseedsi_1;
35526 mode0 = SImode;
35527 goto rdseed_step;
35528
35529 case IX86_BUILTIN_RDSEED64_STEP:
35530 icode = CODE_FOR_rdseeddi_1;
35531 mode0 = DImode;
35532
35533 rdseed_step:
35534 op0 = gen_reg_rtx (mode0);
35535 emit_insn (GEN_FCN (icode) (op0));
35536
35537 arg0 = CALL_EXPR_ARG (exp, 0);
35538 op1 = expand_normal (arg0);
35539 if (!address_operand (op1, VOIDmode))
35540 {
35541 op1 = convert_memory_address (Pmode, op1);
35542 op1 = copy_addr_to_reg (op1);
35543 }
35544 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
35545
35546 op2 = gen_reg_rtx (QImode);
35547
35548 pat = gen_rtx_LTU (QImode, gen_rtx_REG (CCCmode, FLAGS_REG),
35549 const0_rtx);
35550 emit_insn (gen_rtx_SET (VOIDmode, op2, pat));
35551
35552 if (target == 0
35553 || !register_operand (target, SImode))
35554 target = gen_reg_rtx (SImode);
35555
35556 emit_insn (gen_zero_extendqisi2 (target, op2));
35557 return target;
35558
35559 case IX86_BUILTIN_ADDCARRYX32:
35560 icode = TARGET_ADX ? CODE_FOR_adcxsi3 : CODE_FOR_addsi3_carry;
35561 mode0 = SImode;
35562 goto addcarryx;
35563
35564 case IX86_BUILTIN_ADDCARRYX64:
35565 icode = TARGET_ADX ? CODE_FOR_adcxdi3 : CODE_FOR_adddi3_carry;
35566 mode0 = DImode;
35567
35568 addcarryx:
35569 arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in. */
35570 arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1. */
35571 arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2. */
35572 arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out. */
35573
35574 op0 = gen_reg_rtx (QImode);
35575
35576 /* Generate CF from input operand. */
35577 op1 = expand_normal (arg0);
35578 op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1));
35579 emit_insn (gen_addqi3_cc (op0, op1, constm1_rtx));
35580
35581 /* Gen ADCX instruction to compute X+Y+CF. */
35582 op2 = expand_normal (arg1);
35583 op3 = expand_normal (arg2);
35584
35585 if (!REG_P (op2))
35586 op2 = copy_to_mode_reg (mode0, op2);
35587 if (!REG_P (op3))
35588 op3 = copy_to_mode_reg (mode0, op3);
35589
35590 op0 = gen_reg_rtx (mode0);
35591
35592 op4 = gen_rtx_REG (CCCmode, FLAGS_REG);
35593 pat = gen_rtx_LTU (VOIDmode, op4, const0_rtx);
35594 emit_insn (GEN_FCN (icode) (op0, op2, op3, op4, pat));
35595
35596 /* Store the result. */
35597 op4 = expand_normal (arg3);
35598 if (!address_operand (op4, VOIDmode))
35599 {
35600 op4 = convert_memory_address (Pmode, op4);
35601 op4 = copy_addr_to_reg (op4);
35602 }
35603 emit_move_insn (gen_rtx_MEM (mode0, op4), op0);
35604
35605 /* Return current CF value. */
35606 if (target == 0)
35607 target = gen_reg_rtx (QImode);
35608
35609 PUT_MODE (pat, QImode);
35610 emit_insn (gen_rtx_SET (VOIDmode, target, pat));
35611 return target;
35612
35613 case IX86_BUILTIN_READ_FLAGS:
35614 emit_insn (gen_push (gen_rtx_REG (word_mode, FLAGS_REG)));
35615
35616 if (optimize
35617 || target == NULL_RTX
35618 || !nonimmediate_operand (target, word_mode)
35619 || GET_MODE (target) != word_mode)
35620 target = gen_reg_rtx (word_mode);
35621
35622 emit_insn (gen_pop (target));
35623 return target;
35624
35625 case IX86_BUILTIN_WRITE_FLAGS:
35626
35627 arg0 = CALL_EXPR_ARG (exp, 0);
35628 op0 = expand_normal (arg0);
35629 if (!general_no_elim_operand (op0, word_mode))
35630 op0 = copy_to_mode_reg (word_mode, op0);
35631
35632 emit_insn (gen_push (op0));
35633 emit_insn (gen_pop (gen_rtx_REG (word_mode, FLAGS_REG)));
35634 return 0;
35635
35636 case IX86_BUILTIN_KORTESTC16:
35637 icode = CODE_FOR_kortestchi;
35638 mode0 = HImode;
35639 mode1 = CCCmode;
35640 goto kortest;
35641
35642 case IX86_BUILTIN_KORTESTZ16:
35643 icode = CODE_FOR_kortestzhi;
35644 mode0 = HImode;
35645 mode1 = CCZmode;
35646
35647 kortest:
35648 arg0 = CALL_EXPR_ARG (exp, 0); /* Mask reg src1. */
35649 arg1 = CALL_EXPR_ARG (exp, 1); /* Mask reg src2. */
35650 op0 = expand_normal (arg0);
35651 op1 = expand_normal (arg1);
35652
35653 op0 = copy_to_reg (op0);
35654 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
35655 op1 = copy_to_reg (op1);
35656 op1 = simplify_gen_subreg (mode0, op1, GET_MODE (op1), 0);
35657
35658 target = gen_reg_rtx (QImode);
35659 emit_insn (gen_rtx_SET (mode0, target, const0_rtx));
35660
35661 /* Emit kortest. */
35662 emit_insn (GEN_FCN (icode) (op0, op1));
35663 /* And use setcc to return result from flags. */
35664 ix86_expand_setcc (target, EQ,
35665 gen_rtx_REG (mode1, FLAGS_REG), const0_rtx);
35666 return target;
35667
35668 case IX86_BUILTIN_GATHERSIV2DF:
35669 icode = CODE_FOR_avx2_gathersiv2df;
35670 goto gather_gen;
35671 case IX86_BUILTIN_GATHERSIV4DF:
35672 icode = CODE_FOR_avx2_gathersiv4df;
35673 goto gather_gen;
35674 case IX86_BUILTIN_GATHERDIV2DF:
35675 icode = CODE_FOR_avx2_gatherdiv2df;
35676 goto gather_gen;
35677 case IX86_BUILTIN_GATHERDIV4DF:
35678 icode = CODE_FOR_avx2_gatherdiv4df;
35679 goto gather_gen;
35680 case IX86_BUILTIN_GATHERSIV4SF:
35681 icode = CODE_FOR_avx2_gathersiv4sf;
35682 goto gather_gen;
35683 case IX86_BUILTIN_GATHERSIV8SF:
35684 icode = CODE_FOR_avx2_gathersiv8sf;
35685 goto gather_gen;
35686 case IX86_BUILTIN_GATHERDIV4SF:
35687 icode = CODE_FOR_avx2_gatherdiv4sf;
35688 goto gather_gen;
35689 case IX86_BUILTIN_GATHERDIV8SF:
35690 icode = CODE_FOR_avx2_gatherdiv8sf;
35691 goto gather_gen;
35692 case IX86_BUILTIN_GATHERSIV2DI:
35693 icode = CODE_FOR_avx2_gathersiv2di;
35694 goto gather_gen;
35695 case IX86_BUILTIN_GATHERSIV4DI:
35696 icode = CODE_FOR_avx2_gathersiv4di;
35697 goto gather_gen;
35698 case IX86_BUILTIN_GATHERDIV2DI:
35699 icode = CODE_FOR_avx2_gatherdiv2di;
35700 goto gather_gen;
35701 case IX86_BUILTIN_GATHERDIV4DI:
35702 icode = CODE_FOR_avx2_gatherdiv4di;
35703 goto gather_gen;
35704 case IX86_BUILTIN_GATHERSIV4SI:
35705 icode = CODE_FOR_avx2_gathersiv4si;
35706 goto gather_gen;
35707 case IX86_BUILTIN_GATHERSIV8SI:
35708 icode = CODE_FOR_avx2_gathersiv8si;
35709 goto gather_gen;
35710 case IX86_BUILTIN_GATHERDIV4SI:
35711 icode = CODE_FOR_avx2_gatherdiv4si;
35712 goto gather_gen;
35713 case IX86_BUILTIN_GATHERDIV8SI:
35714 icode = CODE_FOR_avx2_gatherdiv8si;
35715 goto gather_gen;
35716 case IX86_BUILTIN_GATHERALTSIV4DF:
35717 icode = CODE_FOR_avx2_gathersiv4df;
35718 goto gather_gen;
35719 case IX86_BUILTIN_GATHERALTDIV8SF:
35720 icode = CODE_FOR_avx2_gatherdiv8sf;
35721 goto gather_gen;
35722 case IX86_BUILTIN_GATHERALTSIV4DI:
35723 icode = CODE_FOR_avx2_gathersiv4di;
35724 goto gather_gen;
35725 case IX86_BUILTIN_GATHERALTDIV8SI:
35726 icode = CODE_FOR_avx2_gatherdiv8si;
35727 goto gather_gen;
35728 case IX86_BUILTIN_GATHER3SIV16SF:
35729 icode = CODE_FOR_avx512f_gathersiv16sf;
35730 goto gather_gen;
35731 case IX86_BUILTIN_GATHER3SIV8DF:
35732 icode = CODE_FOR_avx512f_gathersiv8df;
35733 goto gather_gen;
35734 case IX86_BUILTIN_GATHER3DIV16SF:
35735 icode = CODE_FOR_avx512f_gatherdiv16sf;
35736 goto gather_gen;
35737 case IX86_BUILTIN_GATHER3DIV8DF:
35738 icode = CODE_FOR_avx512f_gatherdiv8df;
35739 goto gather_gen;
35740 case IX86_BUILTIN_GATHER3SIV16SI:
35741 icode = CODE_FOR_avx512f_gathersiv16si;
35742 goto gather_gen;
35743 case IX86_BUILTIN_GATHER3SIV8DI:
35744 icode = CODE_FOR_avx512f_gathersiv8di;
35745 goto gather_gen;
35746 case IX86_BUILTIN_GATHER3DIV16SI:
35747 icode = CODE_FOR_avx512f_gatherdiv16si;
35748 goto gather_gen;
35749 case IX86_BUILTIN_GATHER3DIV8DI:
35750 icode = CODE_FOR_avx512f_gatherdiv8di;
35751 goto gather_gen;
35752 case IX86_BUILTIN_GATHER3ALTSIV8DF:
35753 icode = CODE_FOR_avx512f_gathersiv8df;
35754 goto gather_gen;
35755 case IX86_BUILTIN_GATHER3ALTDIV16SF:
35756 icode = CODE_FOR_avx512f_gatherdiv16sf;
35757 goto gather_gen;
35758 case IX86_BUILTIN_GATHER3ALTSIV8DI:
35759 icode = CODE_FOR_avx512f_gathersiv8di;
35760 goto gather_gen;
35761 case IX86_BUILTIN_GATHER3ALTDIV16SI:
35762 icode = CODE_FOR_avx512f_gatherdiv16si;
35763 goto gather_gen;
35764 case IX86_BUILTIN_SCATTERSIV16SF:
35765 icode = CODE_FOR_avx512f_scattersiv16sf;
35766 goto scatter_gen;
35767 case IX86_BUILTIN_SCATTERSIV8DF:
35768 icode = CODE_FOR_avx512f_scattersiv8df;
35769 goto scatter_gen;
35770 case IX86_BUILTIN_SCATTERDIV16SF:
35771 icode = CODE_FOR_avx512f_scatterdiv16sf;
35772 goto scatter_gen;
35773 case IX86_BUILTIN_SCATTERDIV8DF:
35774 icode = CODE_FOR_avx512f_scatterdiv8df;
35775 goto scatter_gen;
35776 case IX86_BUILTIN_SCATTERSIV16SI:
35777 icode = CODE_FOR_avx512f_scattersiv16si;
35778 goto scatter_gen;
35779 case IX86_BUILTIN_SCATTERSIV8DI:
35780 icode = CODE_FOR_avx512f_scattersiv8di;
35781 goto scatter_gen;
35782 case IX86_BUILTIN_SCATTERDIV16SI:
35783 icode = CODE_FOR_avx512f_scatterdiv16si;
35784 goto scatter_gen;
35785 case IX86_BUILTIN_SCATTERDIV8DI:
35786 icode = CODE_FOR_avx512f_scatterdiv8di;
35787 goto scatter_gen;
35788
35789 case IX86_BUILTIN_GATHERPFDPD:
35790 icode = CODE_FOR_avx512pf_gatherpfv8sidf;
35791 goto vec_prefetch_gen;
35792 case IX86_BUILTIN_GATHERPFDPS:
35793 icode = CODE_FOR_avx512pf_gatherpfv16sisf;
35794 goto vec_prefetch_gen;
35795 case IX86_BUILTIN_GATHERPFQPD:
35796 icode = CODE_FOR_avx512pf_gatherpfv8didf;
35797 goto vec_prefetch_gen;
35798 case IX86_BUILTIN_GATHERPFQPS:
35799 icode = CODE_FOR_avx512pf_gatherpfv8disf;
35800 goto vec_prefetch_gen;
35801 case IX86_BUILTIN_SCATTERPFDPD:
35802 icode = CODE_FOR_avx512pf_scatterpfv8sidf;
35803 goto vec_prefetch_gen;
35804 case IX86_BUILTIN_SCATTERPFDPS:
35805 icode = CODE_FOR_avx512pf_scatterpfv16sisf;
35806 goto vec_prefetch_gen;
35807 case IX86_BUILTIN_SCATTERPFQPD:
35808 icode = CODE_FOR_avx512pf_scatterpfv8didf;
35809 goto vec_prefetch_gen;
35810 case IX86_BUILTIN_SCATTERPFQPS:
35811 icode = CODE_FOR_avx512pf_scatterpfv8disf;
35812 goto vec_prefetch_gen;
35813
35814 gather_gen:
35815 rtx half;
35816 rtx (*gen) (rtx, rtx);
35817
35818 arg0 = CALL_EXPR_ARG (exp, 0);
35819 arg1 = CALL_EXPR_ARG (exp, 1);
35820 arg2 = CALL_EXPR_ARG (exp, 2);
35821 arg3 = CALL_EXPR_ARG (exp, 3);
35822 arg4 = CALL_EXPR_ARG (exp, 4);
35823 op0 = expand_normal (arg0);
35824 op1 = expand_normal (arg1);
35825 op2 = expand_normal (arg2);
35826 op3 = expand_normal (arg3);
35827 op4 = expand_normal (arg4);
35828 /* Note the arg order is different from the operand order. */
35829 mode0 = insn_data[icode].operand[1].mode;
35830 mode2 = insn_data[icode].operand[3].mode;
35831 mode3 = insn_data[icode].operand[4].mode;
35832 mode4 = insn_data[icode].operand[5].mode;
35833
35834 if (target == NULL_RTX
35835 || GET_MODE (target) != insn_data[icode].operand[0].mode
35836 || !insn_data[icode].operand[0].predicate (target,
35837 GET_MODE (target)))
35838 subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
35839 else
35840 subtarget = target;
35841
35842 switch (fcode)
35843 {
35844 case IX86_BUILTIN_GATHER3ALTSIV8DF:
35845 case IX86_BUILTIN_GATHER3ALTSIV8DI:
35846 half = gen_reg_rtx (V8SImode);
35847 if (!nonimmediate_operand (op2, V16SImode))
35848 op2 = copy_to_mode_reg (V16SImode, op2);
35849 emit_insn (gen_vec_extract_lo_v16si (half, op2));
35850 op2 = half;
35851 break;
35852 case IX86_BUILTIN_GATHERALTSIV4DF:
35853 case IX86_BUILTIN_GATHERALTSIV4DI:
35854 half = gen_reg_rtx (V4SImode);
35855 if (!nonimmediate_operand (op2, V8SImode))
35856 op2 = copy_to_mode_reg (V8SImode, op2);
35857 emit_insn (gen_vec_extract_lo_v8si (half, op2));
35858 op2 = half;
35859 break;
35860 case IX86_BUILTIN_GATHER3ALTDIV16SF:
35861 case IX86_BUILTIN_GATHER3ALTDIV16SI:
35862 half = gen_reg_rtx (mode0);
35863 if (mode0 == V8SFmode)
35864 gen = gen_vec_extract_lo_v16sf;
35865 else
35866 gen = gen_vec_extract_lo_v16si;
35867 if (!nonimmediate_operand (op0, GET_MODE (op0)))
35868 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
35869 emit_insn (gen (half, op0));
35870 op0 = half;
35871 if (GET_MODE (op3) != VOIDmode)
35872 {
35873 if (!nonimmediate_operand (op3, GET_MODE (op3)))
35874 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
35875 emit_insn (gen (half, op3));
35876 op3 = half;
35877 }
35878 break;
35879 case IX86_BUILTIN_GATHERALTDIV8SF:
35880 case IX86_BUILTIN_GATHERALTDIV8SI:
35881 half = gen_reg_rtx (mode0);
35882 if (mode0 == V4SFmode)
35883 gen = gen_vec_extract_lo_v8sf;
35884 else
35885 gen = gen_vec_extract_lo_v8si;
35886 if (!nonimmediate_operand (op0, GET_MODE (op0)))
35887 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
35888 emit_insn (gen (half, op0));
35889 op0 = half;
35890 if (GET_MODE (op3) != VOIDmode)
35891 {
35892 if (!nonimmediate_operand (op3, GET_MODE (op3)))
35893 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
35894 emit_insn (gen (half, op3));
35895 op3 = half;
35896 }
35897 break;
35898 default:
35899 break;
35900 }
35901
35902 /* Force memory operand only with base register here. But we
35903 don't want to do it on memory operand for other builtin
35904 functions. */
35905 op1 = ix86_zero_extend_to_Pmode (op1);
35906
35907 if (!insn_data[icode].operand[1].predicate (op0, mode0))
35908 op0 = copy_to_mode_reg (mode0, op0);
35909 if (!insn_data[icode].operand[2].predicate (op1, Pmode))
35910 op1 = copy_to_mode_reg (Pmode, op1);
35911 if (!insn_data[icode].operand[3].predicate (op2, mode2))
35912 op2 = copy_to_mode_reg (mode2, op2);
35913 if (GET_MODE (op3) == mode3 || GET_MODE (op3) == VOIDmode)
35914 {
35915 if (!insn_data[icode].operand[4].predicate (op3, mode3))
35916 op3 = copy_to_mode_reg (mode3, op3);
35917 }
35918 else
35919 {
35920 op3 = copy_to_reg (op3);
35921 op3 = simplify_gen_subreg (mode3, op3, GET_MODE (op3), 0);
35922 }
35923 if (!insn_data[icode].operand[5].predicate (op4, mode4))
35924 {
35925 error ("the last argument must be scale 1, 2, 4, 8");
35926 return const0_rtx;
35927 }
35928
35929 /* Optimize. If mask is known to have all high bits set,
35930 replace op0 with pc_rtx to signal that the instruction
35931 overwrites the whole destination and doesn't use its
35932 previous contents. */
35933 if (optimize)
35934 {
35935 if (TREE_CODE (arg3) == INTEGER_CST)
35936 {
35937 if (integer_all_onesp (arg3))
35938 op0 = pc_rtx;
35939 }
35940 else if (TREE_CODE (arg3) == VECTOR_CST)
35941 {
35942 unsigned int negative = 0;
35943 for (i = 0; i < VECTOR_CST_NELTS (arg3); ++i)
35944 {
35945 tree cst = VECTOR_CST_ELT (arg3, i);
35946 if (TREE_CODE (cst) == INTEGER_CST
35947 && tree_int_cst_sign_bit (cst))
35948 negative++;
35949 else if (TREE_CODE (cst) == REAL_CST
35950 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
35951 negative++;
35952 }
35953 if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
35954 op0 = pc_rtx;
35955 }
35956 else if (TREE_CODE (arg3) == SSA_NAME
35957 && TREE_CODE (TREE_TYPE (arg3)) == VECTOR_TYPE)
35958 {
35959 /* Recognize also when mask is like:
35960 __v2df src = _mm_setzero_pd ();
35961 __v2df mask = _mm_cmpeq_pd (src, src);
35962 or
35963 __v8sf src = _mm256_setzero_ps ();
35964 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
35965 as that is a cheaper way to load all ones into
35966 a register than having to load a constant from
35967 memory. */
35968 gimple def_stmt = SSA_NAME_DEF_STMT (arg3);
35969 if (is_gimple_call (def_stmt))
35970 {
35971 tree fndecl = gimple_call_fndecl (def_stmt);
35972 if (fndecl
35973 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
35974 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
35975 {
35976 case IX86_BUILTIN_CMPPD:
35977 case IX86_BUILTIN_CMPPS:
35978 case IX86_BUILTIN_CMPPD256:
35979 case IX86_BUILTIN_CMPPS256:
35980 if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
35981 break;
35982 /* FALLTHRU */
35983 case IX86_BUILTIN_CMPEQPD:
35984 case IX86_BUILTIN_CMPEQPS:
35985 if (initializer_zerop (gimple_call_arg (def_stmt, 0))
35986 && initializer_zerop (gimple_call_arg (def_stmt,
35987 1)))
35988 op0 = pc_rtx;
35989 break;
35990 default:
35991 break;
35992 }
35993 }
35994 }
35995 }
35996
35997 pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
35998 if (! pat)
35999 return const0_rtx;
36000 emit_insn (pat);
36001
36002 switch (fcode)
36003 {
36004 case IX86_BUILTIN_GATHER3DIV16SF:
36005 if (target == NULL_RTX)
36006 target = gen_reg_rtx (V8SFmode);
36007 emit_insn (gen_vec_extract_lo_v16sf (target, subtarget));
36008 break;
36009 case IX86_BUILTIN_GATHER3DIV16SI:
36010 if (target == NULL_RTX)
36011 target = gen_reg_rtx (V8SImode);
36012 emit_insn (gen_vec_extract_lo_v16si (target, subtarget));
36013 break;
36014 case IX86_BUILTIN_GATHERDIV8SF:
36015 if (target == NULL_RTX)
36016 target = gen_reg_rtx (V4SFmode);
36017 emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
36018 break;
36019 case IX86_BUILTIN_GATHERDIV8SI:
36020 if (target == NULL_RTX)
36021 target = gen_reg_rtx (V4SImode);
36022 emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
36023 break;
36024 default:
36025 target = subtarget;
36026 break;
36027 }
36028 return target;
36029
36030 scatter_gen:
36031 arg0 = CALL_EXPR_ARG (exp, 0);
36032 arg1 = CALL_EXPR_ARG (exp, 1);
36033 arg2 = CALL_EXPR_ARG (exp, 2);
36034 arg3 = CALL_EXPR_ARG (exp, 3);
36035 arg4 = CALL_EXPR_ARG (exp, 4);
36036 op0 = expand_normal (arg0);
36037 op1 = expand_normal (arg1);
36038 op2 = expand_normal (arg2);
36039 op3 = expand_normal (arg3);
36040 op4 = expand_normal (arg4);
36041 mode1 = insn_data[icode].operand[1].mode;
36042 mode2 = insn_data[icode].operand[2].mode;
36043 mode3 = insn_data[icode].operand[3].mode;
36044 mode4 = insn_data[icode].operand[4].mode;
36045
36046 /* Force memory operand only with base register here. But we
36047 don't want to do it on memory operand for other builtin
36048 functions. */
36049 op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1));
36050
36051 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
36052 op0 = copy_to_mode_reg (Pmode, op0);
36053
36054 if (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
36055 {
36056 if (!insn_data[icode].operand[1].predicate (op1, mode1))
36057 op1 = copy_to_mode_reg (mode1, op1);
36058 }
36059 else
36060 {
36061 op1 = copy_to_reg (op1);
36062 op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
36063 }
36064
36065 if (!insn_data[icode].operand[2].predicate (op2, mode2))
36066 op2 = copy_to_mode_reg (mode2, op2);
36067
36068 if (!insn_data[icode].operand[3].predicate (op3, mode3))
36069 op3 = copy_to_mode_reg (mode3, op3);
36070
36071 if (!insn_data[icode].operand[4].predicate (op4, mode4))
36072 {
36073 error ("the last argument must be scale 1, 2, 4, 8");
36074 return const0_rtx;
36075 }
36076
36077 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
36078 if (! pat)
36079 return const0_rtx;
36080
36081 emit_insn (pat);
36082 return 0;
36083
36084 vec_prefetch_gen:
36085 arg0 = CALL_EXPR_ARG (exp, 0);
36086 arg1 = CALL_EXPR_ARG (exp, 1);
36087 arg2 = CALL_EXPR_ARG (exp, 2);
36088 arg3 = CALL_EXPR_ARG (exp, 3);
36089 arg4 = CALL_EXPR_ARG (exp, 4);
36090 op0 = expand_normal (arg0);
36091 op1 = expand_normal (arg1);
36092 op2 = expand_normal (arg2);
36093 op3 = expand_normal (arg3);
36094 op4 = expand_normal (arg4);
36095 mode0 = insn_data[icode].operand[0].mode;
36096 mode1 = insn_data[icode].operand[1].mode;
36097 mode3 = insn_data[icode].operand[3].mode;
36098 mode4 = insn_data[icode].operand[4].mode;
36099
36100 if (GET_MODE (op0) == mode0
36101 || (GET_MODE (op0) == VOIDmode && op0 != constm1_rtx))
36102 {
36103 if (!insn_data[icode].operand[0].predicate (op0, mode0))
36104 op0 = copy_to_mode_reg (mode0, op0);
36105 }
36106 else if (op0 != constm1_rtx)
36107 {
36108 op0 = copy_to_reg (op0);
36109 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
36110 }
36111
36112 if (!insn_data[icode].operand[1].predicate (op1, mode1))
36113 op1 = copy_to_mode_reg (mode1, op1);
36114
36115 /* Force memory operand only with base register here. But we
36116 don't want to do it on memory operand for other builtin
36117 functions. */
36118 op2 = force_reg (Pmode, convert_to_mode (Pmode, op2, 1));
36119
36120 if (!insn_data[icode].operand[2].predicate (op2, Pmode))
36121 op2 = copy_to_mode_reg (Pmode, op2);
36122
36123 if (!insn_data[icode].operand[3].predicate (op3, mode3))
36124 {
36125 error ("the forth argument must be scale 1, 2, 4, 8");
36126 return const0_rtx;
36127 }
36128
36129 if (!insn_data[icode].operand[4].predicate (op4, mode4))
36130 {
36131 error ("incorrect hint operand");
36132 return const0_rtx;
36133 }
36134
36135 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
36136 if (! pat)
36137 return const0_rtx;
36138
36139 emit_insn (pat);
36140
36141 return 0;
36142
36143 case IX86_BUILTIN_XABORT:
36144 icode = CODE_FOR_xabort;
36145 arg0 = CALL_EXPR_ARG (exp, 0);
36146 op0 = expand_normal (arg0);
36147 mode0 = insn_data[icode].operand[0].mode;
36148 if (!insn_data[icode].operand[0].predicate (op0, mode0))
36149 {
36150 error ("the xabort's argument must be an 8-bit immediate");
36151 return const0_rtx;
36152 }
36153 emit_insn (gen_xabort (op0));
36154 return 0;
36155
36156 default:
36157 break;
36158 }
36159
36160 for (i = 0, d = bdesc_special_args;
36161 i < ARRAY_SIZE (bdesc_special_args);
36162 i++, d++)
36163 if (d->code == fcode)
36164 return ix86_expand_special_args_builtin (d, exp, target);
36165
36166 for (i = 0, d = bdesc_args;
36167 i < ARRAY_SIZE (bdesc_args);
36168 i++, d++)
36169 if (d->code == fcode)
36170 switch (fcode)
36171 {
36172 case IX86_BUILTIN_FABSQ:
36173 case IX86_BUILTIN_COPYSIGNQ:
36174 if (!TARGET_SSE)
36175 /* Emit a normal call if SSE isn't available. */
36176 return expand_call (exp, target, ignore);
36177 default:
36178 return ix86_expand_args_builtin (d, exp, target);
36179 }
36180
36181 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
36182 if (d->code == fcode)
36183 return ix86_expand_sse_comi (d, exp, target);
36184
36185 for (i = 0, d = bdesc_round_args; i < ARRAY_SIZE (bdesc_round_args); i++, d++)
36186 if (d->code == fcode)
36187 return ix86_expand_round_builtin (d, exp, target);
36188
36189 for (i = 0, d = bdesc_pcmpestr;
36190 i < ARRAY_SIZE (bdesc_pcmpestr);
36191 i++, d++)
36192 if (d->code == fcode)
36193 return ix86_expand_sse_pcmpestr (d, exp, target);
36194
36195 for (i = 0, d = bdesc_pcmpistr;
36196 i < ARRAY_SIZE (bdesc_pcmpistr);
36197 i++, d++)
36198 if (d->code == fcode)
36199 return ix86_expand_sse_pcmpistr (d, exp, target);
36200
36201 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
36202 if (d->code == fcode)
36203 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
36204 (enum ix86_builtin_func_type)
36205 d->flag, d->comparison);
36206
36207 gcc_unreachable ();
36208 }
36209
36210 /* This returns the target-specific builtin with code CODE if
36211 current_function_decl has visibility on this builtin, which is checked
36212 using isa flags. Returns NULL_TREE otherwise. */
36213
36214 static tree ix86_get_builtin (enum ix86_builtins code)
36215 {
36216 struct cl_target_option *opts;
36217 tree target_tree = NULL_TREE;
36218
36219 /* Determine the isa flags of current_function_decl. */
36220
36221 if (current_function_decl)
36222 target_tree = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl);
36223
36224 if (target_tree == NULL)
36225 target_tree = target_option_default_node;
36226
36227 opts = TREE_TARGET_OPTION (target_tree);
36228
36229 if (ix86_builtins_isa[(int) code].isa & opts->x_ix86_isa_flags)
36230 return ix86_builtin_decl (code, true);
36231 else
36232 return NULL_TREE;
36233 }
36234
36235 /* Returns a function decl for a vectorized version of the builtin function
36236 with builtin function code FN and the result vector type TYPE, or NULL_TREE
36237 if it is not available. */
36238
36239 static tree
36240 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
36241 tree type_in)
36242 {
36243 enum machine_mode in_mode, out_mode;
36244 int in_n, out_n;
36245 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
36246
36247 if (TREE_CODE (type_out) != VECTOR_TYPE
36248 || TREE_CODE (type_in) != VECTOR_TYPE
36249 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
36250 return NULL_TREE;
36251
36252 out_mode = TYPE_MODE (TREE_TYPE (type_out));
36253 out_n = TYPE_VECTOR_SUBPARTS (type_out);
36254 in_mode = TYPE_MODE (TREE_TYPE (type_in));
36255 in_n = TYPE_VECTOR_SUBPARTS (type_in);
36256
36257 switch (fn)
36258 {
36259 case BUILT_IN_SQRT:
36260 if (out_mode == DFmode && in_mode == DFmode)
36261 {
36262 if (out_n == 2 && in_n == 2)
36263 return ix86_get_builtin (IX86_BUILTIN_SQRTPD);
36264 else if (out_n == 4 && in_n == 4)
36265 return ix86_get_builtin (IX86_BUILTIN_SQRTPD256);
36266 else if (out_n == 8 && in_n == 8)
36267 return ix86_get_builtin (IX86_BUILTIN_SQRTPD512);
36268 }
36269 break;
36270
36271 case BUILT_IN_EXP2F:
36272 if (out_mode == SFmode && in_mode == SFmode)
36273 {
36274 if (out_n == 16 && in_n == 16)
36275 return ix86_get_builtin (IX86_BUILTIN_EXP2PS);
36276 }
36277 break;
36278
36279 case BUILT_IN_SQRTF:
36280 if (out_mode == SFmode && in_mode == SFmode)
36281 {
36282 if (out_n == 4 && in_n == 4)
36283 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR);
36284 else if (out_n == 8 && in_n == 8)
36285 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR256);
36286 else if (out_n == 16 && in_n == 16)
36287 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR512);
36288 }
36289 break;
36290
36291 case BUILT_IN_IFLOOR:
36292 case BUILT_IN_LFLOOR:
36293 case BUILT_IN_LLFLOOR:
36294 /* The round insn does not trap on denormals. */
36295 if (flag_trapping_math || !TARGET_ROUND)
36296 break;
36297
36298 if (out_mode == SImode && in_mode == DFmode)
36299 {
36300 if (out_n == 4 && in_n == 2)
36301 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX);
36302 else if (out_n == 8 && in_n == 4)
36303 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256);
36304 else if (out_n == 16 && in_n == 8)
36305 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512);
36306 }
36307 break;
36308
36309 case BUILT_IN_IFLOORF:
36310 case BUILT_IN_LFLOORF:
36311 case BUILT_IN_LLFLOORF:
36312 /* The round insn does not trap on denormals. */
36313 if (flag_trapping_math || !TARGET_ROUND)
36314 break;
36315
36316 if (out_mode == SImode && in_mode == SFmode)
36317 {
36318 if (out_n == 4 && in_n == 4)
36319 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX);
36320 else if (out_n == 8 && in_n == 8)
36321 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256);
36322 }
36323 break;
36324
36325 case BUILT_IN_ICEIL:
36326 case BUILT_IN_LCEIL:
36327 case BUILT_IN_LLCEIL:
36328 /* The round insn does not trap on denormals. */
36329 if (flag_trapping_math || !TARGET_ROUND)
36330 break;
36331
36332 if (out_mode == SImode && in_mode == DFmode)
36333 {
36334 if (out_n == 4 && in_n == 2)
36335 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX);
36336 else if (out_n == 8 && in_n == 4)
36337 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256);
36338 else if (out_n == 16 && in_n == 8)
36339 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512);
36340 }
36341 break;
36342
36343 case BUILT_IN_ICEILF:
36344 case BUILT_IN_LCEILF:
36345 case BUILT_IN_LLCEILF:
36346 /* The round insn does not trap on denormals. */
36347 if (flag_trapping_math || !TARGET_ROUND)
36348 break;
36349
36350 if (out_mode == SImode && in_mode == SFmode)
36351 {
36352 if (out_n == 4 && in_n == 4)
36353 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX);
36354 else if (out_n == 8 && in_n == 8)
36355 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256);
36356 }
36357 break;
36358
36359 case BUILT_IN_IRINT:
36360 case BUILT_IN_LRINT:
36361 case BUILT_IN_LLRINT:
36362 if (out_mode == SImode && in_mode == DFmode)
36363 {
36364 if (out_n == 4 && in_n == 2)
36365 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX);
36366 else if (out_n == 8 && in_n == 4)
36367 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256);
36368 }
36369 break;
36370
36371 case BUILT_IN_IRINTF:
36372 case BUILT_IN_LRINTF:
36373 case BUILT_IN_LLRINTF:
36374 if (out_mode == SImode && in_mode == SFmode)
36375 {
36376 if (out_n == 4 && in_n == 4)
36377 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ);
36378 else if (out_n == 8 && in_n == 8)
36379 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256);
36380 }
36381 break;
36382
36383 case BUILT_IN_IROUND:
36384 case BUILT_IN_LROUND:
36385 case BUILT_IN_LLROUND:
36386 /* The round insn does not trap on denormals. */
36387 if (flag_trapping_math || !TARGET_ROUND)
36388 break;
36389
36390 if (out_mode == SImode && in_mode == DFmode)
36391 {
36392 if (out_n == 4 && in_n == 2)
36393 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX);
36394 else if (out_n == 8 && in_n == 4)
36395 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256);
36396 else if (out_n == 16 && in_n == 8)
36397 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512);
36398 }
36399 break;
36400
36401 case BUILT_IN_IROUNDF:
36402 case BUILT_IN_LROUNDF:
36403 case BUILT_IN_LLROUNDF:
36404 /* The round insn does not trap on denormals. */
36405 if (flag_trapping_math || !TARGET_ROUND)
36406 break;
36407
36408 if (out_mode == SImode && in_mode == SFmode)
36409 {
36410 if (out_n == 4 && in_n == 4)
36411 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX);
36412 else if (out_n == 8 && in_n == 8)
36413 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256);
36414 }
36415 break;
36416
36417 case BUILT_IN_COPYSIGN:
36418 if (out_mode == DFmode && in_mode == DFmode)
36419 {
36420 if (out_n == 2 && in_n == 2)
36421 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD);
36422 else if (out_n == 4 && in_n == 4)
36423 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD256);
36424 else if (out_n == 8 && in_n == 8)
36425 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD512);
36426 }
36427 break;
36428
36429 case BUILT_IN_COPYSIGNF:
36430 if (out_mode == SFmode && in_mode == SFmode)
36431 {
36432 if (out_n == 4 && in_n == 4)
36433 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS);
36434 else if (out_n == 8 && in_n == 8)
36435 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS256);
36436 else if (out_n == 16 && in_n == 16)
36437 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS512);
36438 }
36439 break;
36440
36441 case BUILT_IN_FLOOR:
36442 /* The round insn does not trap on denormals. */
36443 if (flag_trapping_math || !TARGET_ROUND)
36444 break;
36445
36446 if (out_mode == DFmode && in_mode == DFmode)
36447 {
36448 if (out_n == 2 && in_n == 2)
36449 return ix86_get_builtin (IX86_BUILTIN_FLOORPD);
36450 else if (out_n == 4 && in_n == 4)
36451 return ix86_get_builtin (IX86_BUILTIN_FLOORPD256);
36452 }
36453 break;
36454
36455 case BUILT_IN_FLOORF:
36456 /* The round insn does not trap on denormals. */
36457 if (flag_trapping_math || !TARGET_ROUND)
36458 break;
36459
36460 if (out_mode == SFmode && in_mode == SFmode)
36461 {
36462 if (out_n == 4 && in_n == 4)
36463 return ix86_get_builtin (IX86_BUILTIN_FLOORPS);
36464 else if (out_n == 8 && in_n == 8)
36465 return ix86_get_builtin (IX86_BUILTIN_FLOORPS256);
36466 }
36467 break;
36468
36469 case BUILT_IN_CEIL:
36470 /* The round insn does not trap on denormals. */
36471 if (flag_trapping_math || !TARGET_ROUND)
36472 break;
36473
36474 if (out_mode == DFmode && in_mode == DFmode)
36475 {
36476 if (out_n == 2 && in_n == 2)
36477 return ix86_get_builtin (IX86_BUILTIN_CEILPD);
36478 else if (out_n == 4 && in_n == 4)
36479 return ix86_get_builtin (IX86_BUILTIN_CEILPD256);
36480 }
36481 break;
36482
36483 case BUILT_IN_CEILF:
36484 /* The round insn does not trap on denormals. */
36485 if (flag_trapping_math || !TARGET_ROUND)
36486 break;
36487
36488 if (out_mode == SFmode && in_mode == SFmode)
36489 {
36490 if (out_n == 4 && in_n == 4)
36491 return ix86_get_builtin (IX86_BUILTIN_CEILPS);
36492 else if (out_n == 8 && in_n == 8)
36493 return ix86_get_builtin (IX86_BUILTIN_CEILPS256);
36494 }
36495 break;
36496
36497 case BUILT_IN_TRUNC:
36498 /* The round insn does not trap on denormals. */
36499 if (flag_trapping_math || !TARGET_ROUND)
36500 break;
36501
36502 if (out_mode == DFmode && in_mode == DFmode)
36503 {
36504 if (out_n == 2 && in_n == 2)
36505 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD);
36506 else if (out_n == 4 && in_n == 4)
36507 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256);
36508 }
36509 break;
36510
36511 case BUILT_IN_TRUNCF:
36512 /* The round insn does not trap on denormals. */
36513 if (flag_trapping_math || !TARGET_ROUND)
36514 break;
36515
36516 if (out_mode == SFmode && in_mode == SFmode)
36517 {
36518 if (out_n == 4 && in_n == 4)
36519 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS);
36520 else if (out_n == 8 && in_n == 8)
36521 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256);
36522 }
36523 break;
36524
36525 case BUILT_IN_RINT:
36526 /* The round insn does not trap on denormals. */
36527 if (flag_trapping_math || !TARGET_ROUND)
36528 break;
36529
36530 if (out_mode == DFmode && in_mode == DFmode)
36531 {
36532 if (out_n == 2 && in_n == 2)
36533 return ix86_get_builtin (IX86_BUILTIN_RINTPD);
36534 else if (out_n == 4 && in_n == 4)
36535 return ix86_get_builtin (IX86_BUILTIN_RINTPD256);
36536 }
36537 break;
36538
36539 case BUILT_IN_RINTF:
36540 /* The round insn does not trap on denormals. */
36541 if (flag_trapping_math || !TARGET_ROUND)
36542 break;
36543
36544 if (out_mode == SFmode && in_mode == SFmode)
36545 {
36546 if (out_n == 4 && in_n == 4)
36547 return ix86_get_builtin (IX86_BUILTIN_RINTPS);
36548 else if (out_n == 8 && in_n == 8)
36549 return ix86_get_builtin (IX86_BUILTIN_RINTPS256);
36550 }
36551 break;
36552
36553 case BUILT_IN_ROUND:
36554 /* The round insn does not trap on denormals. */
36555 if (flag_trapping_math || !TARGET_ROUND)
36556 break;
36557
36558 if (out_mode == DFmode && in_mode == DFmode)
36559 {
36560 if (out_n == 2 && in_n == 2)
36561 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ);
36562 else if (out_n == 4 && in_n == 4)
36563 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ256);
36564 }
36565 break;
36566
36567 case BUILT_IN_ROUNDF:
36568 /* The round insn does not trap on denormals. */
36569 if (flag_trapping_math || !TARGET_ROUND)
36570 break;
36571
36572 if (out_mode == SFmode && in_mode == SFmode)
36573 {
36574 if (out_n == 4 && in_n == 4)
36575 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ);
36576 else if (out_n == 8 && in_n == 8)
36577 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ256);
36578 }
36579 break;
36580
36581 case BUILT_IN_FMA:
36582 if (out_mode == DFmode && in_mode == DFmode)
36583 {
36584 if (out_n == 2 && in_n == 2)
36585 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD);
36586 if (out_n == 4 && in_n == 4)
36587 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD256);
36588 }
36589 break;
36590
36591 case BUILT_IN_FMAF:
36592 if (out_mode == SFmode && in_mode == SFmode)
36593 {
36594 if (out_n == 4 && in_n == 4)
36595 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS);
36596 if (out_n == 8 && in_n == 8)
36597 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS256);
36598 }
36599 break;
36600
36601 default:
36602 break;
36603 }
36604
36605 /* Dispatch to a handler for a vectorization library. */
36606 if (ix86_veclib_handler)
36607 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
36608 type_in);
36609
36610 return NULL_TREE;
36611 }
36612
36613 /* Handler for an SVML-style interface to
36614 a library with vectorized intrinsics. */
36615
36616 static tree
36617 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
36618 {
36619 char name[20];
36620 tree fntype, new_fndecl, args;
36621 unsigned arity;
36622 const char *bname;
36623 enum machine_mode el_mode, in_mode;
36624 int n, in_n;
36625
36626 /* The SVML is suitable for unsafe math only. */
36627 if (!flag_unsafe_math_optimizations)
36628 return NULL_TREE;
36629
36630 el_mode = TYPE_MODE (TREE_TYPE (type_out));
36631 n = TYPE_VECTOR_SUBPARTS (type_out);
36632 in_mode = TYPE_MODE (TREE_TYPE (type_in));
36633 in_n = TYPE_VECTOR_SUBPARTS (type_in);
36634 if (el_mode != in_mode
36635 || n != in_n)
36636 return NULL_TREE;
36637
36638 switch (fn)
36639 {
36640 case BUILT_IN_EXP:
36641 case BUILT_IN_LOG:
36642 case BUILT_IN_LOG10:
36643 case BUILT_IN_POW:
36644 case BUILT_IN_TANH:
36645 case BUILT_IN_TAN:
36646 case BUILT_IN_ATAN:
36647 case BUILT_IN_ATAN2:
36648 case BUILT_IN_ATANH:
36649 case BUILT_IN_CBRT:
36650 case BUILT_IN_SINH:
36651 case BUILT_IN_SIN:
36652 case BUILT_IN_ASINH:
36653 case BUILT_IN_ASIN:
36654 case BUILT_IN_COSH:
36655 case BUILT_IN_COS:
36656 case BUILT_IN_ACOSH:
36657 case BUILT_IN_ACOS:
36658 if (el_mode != DFmode || n != 2)
36659 return NULL_TREE;
36660 break;
36661
36662 case BUILT_IN_EXPF:
36663 case BUILT_IN_LOGF:
36664 case BUILT_IN_LOG10F:
36665 case BUILT_IN_POWF:
36666 case BUILT_IN_TANHF:
36667 case BUILT_IN_TANF:
36668 case BUILT_IN_ATANF:
36669 case BUILT_IN_ATAN2F:
36670 case BUILT_IN_ATANHF:
36671 case BUILT_IN_CBRTF:
36672 case BUILT_IN_SINHF:
36673 case BUILT_IN_SINF:
36674 case BUILT_IN_ASINHF:
36675 case BUILT_IN_ASINF:
36676 case BUILT_IN_COSHF:
36677 case BUILT_IN_COSF:
36678 case BUILT_IN_ACOSHF:
36679 case BUILT_IN_ACOSF:
36680 if (el_mode != SFmode || n != 4)
36681 return NULL_TREE;
36682 break;
36683
36684 default:
36685 return NULL_TREE;
36686 }
36687
36688 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
36689
36690 if (fn == BUILT_IN_LOGF)
36691 strcpy (name, "vmlsLn4");
36692 else if (fn == BUILT_IN_LOG)
36693 strcpy (name, "vmldLn2");
36694 else if (n == 4)
36695 {
36696 sprintf (name, "vmls%s", bname+10);
36697 name[strlen (name)-1] = '4';
36698 }
36699 else
36700 sprintf (name, "vmld%s2", bname+10);
36701
36702 /* Convert to uppercase. */
36703 name[4] &= ~0x20;
36704
36705 arity = 0;
36706 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
36707 args;
36708 args = TREE_CHAIN (args))
36709 arity++;
36710
36711 if (arity == 1)
36712 fntype = build_function_type_list (type_out, type_in, NULL);
36713 else
36714 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
36715
36716 /* Build a function declaration for the vectorized function. */
36717 new_fndecl = build_decl (BUILTINS_LOCATION,
36718 FUNCTION_DECL, get_identifier (name), fntype);
36719 TREE_PUBLIC (new_fndecl) = 1;
36720 DECL_EXTERNAL (new_fndecl) = 1;
36721 DECL_IS_NOVOPS (new_fndecl) = 1;
36722 TREE_READONLY (new_fndecl) = 1;
36723
36724 return new_fndecl;
36725 }
36726
36727 /* Handler for an ACML-style interface to
36728 a library with vectorized intrinsics. */
36729
36730 static tree
36731 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
36732 {
36733 char name[20] = "__vr.._";
36734 tree fntype, new_fndecl, args;
36735 unsigned arity;
36736 const char *bname;
36737 enum machine_mode el_mode, in_mode;
36738 int n, in_n;
36739
36740 /* The ACML is 64bits only and suitable for unsafe math only as
36741 it does not correctly support parts of IEEE with the required
36742 precision such as denormals. */
36743 if (!TARGET_64BIT
36744 || !flag_unsafe_math_optimizations)
36745 return NULL_TREE;
36746
36747 el_mode = TYPE_MODE (TREE_TYPE (type_out));
36748 n = TYPE_VECTOR_SUBPARTS (type_out);
36749 in_mode = TYPE_MODE (TREE_TYPE (type_in));
36750 in_n = TYPE_VECTOR_SUBPARTS (type_in);
36751 if (el_mode != in_mode
36752 || n != in_n)
36753 return NULL_TREE;
36754
36755 switch (fn)
36756 {
36757 case BUILT_IN_SIN:
36758 case BUILT_IN_COS:
36759 case BUILT_IN_EXP:
36760 case BUILT_IN_LOG:
36761 case BUILT_IN_LOG2:
36762 case BUILT_IN_LOG10:
36763 name[4] = 'd';
36764 name[5] = '2';
36765 if (el_mode != DFmode
36766 || n != 2)
36767 return NULL_TREE;
36768 break;
36769
36770 case BUILT_IN_SINF:
36771 case BUILT_IN_COSF:
36772 case BUILT_IN_EXPF:
36773 case BUILT_IN_POWF:
36774 case BUILT_IN_LOGF:
36775 case BUILT_IN_LOG2F:
36776 case BUILT_IN_LOG10F:
36777 name[4] = 's';
36778 name[5] = '4';
36779 if (el_mode != SFmode
36780 || n != 4)
36781 return NULL_TREE;
36782 break;
36783
36784 default:
36785 return NULL_TREE;
36786 }
36787
36788 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
36789 sprintf (name + 7, "%s", bname+10);
36790
36791 arity = 0;
36792 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
36793 args;
36794 args = TREE_CHAIN (args))
36795 arity++;
36796
36797 if (arity == 1)
36798 fntype = build_function_type_list (type_out, type_in, NULL);
36799 else
36800 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
36801
36802 /* Build a function declaration for the vectorized function. */
36803 new_fndecl = build_decl (BUILTINS_LOCATION,
36804 FUNCTION_DECL, get_identifier (name), fntype);
36805 TREE_PUBLIC (new_fndecl) = 1;
36806 DECL_EXTERNAL (new_fndecl) = 1;
36807 DECL_IS_NOVOPS (new_fndecl) = 1;
36808 TREE_READONLY (new_fndecl) = 1;
36809
36810 return new_fndecl;
36811 }
36812
36813 /* Returns a decl of a function that implements gather load with
36814 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
36815 Return NULL_TREE if it is not available. */
36816
36817 static tree
36818 ix86_vectorize_builtin_gather (const_tree mem_vectype,
36819 const_tree index_type, int scale)
36820 {
36821 bool si;
36822 enum ix86_builtins code;
36823
36824 if (! TARGET_AVX2)
36825 return NULL_TREE;
36826
36827 if ((TREE_CODE (index_type) != INTEGER_TYPE
36828 && !POINTER_TYPE_P (index_type))
36829 || (TYPE_MODE (index_type) != SImode
36830 && TYPE_MODE (index_type) != DImode))
36831 return NULL_TREE;
36832
36833 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
36834 return NULL_TREE;
36835
36836 /* v*gather* insn sign extends index to pointer mode. */
36837 if (TYPE_PRECISION (index_type) < POINTER_SIZE
36838 && TYPE_UNSIGNED (index_type))
36839 return NULL_TREE;
36840
36841 if (scale <= 0
36842 || scale > 8
36843 || (scale & (scale - 1)) != 0)
36844 return NULL_TREE;
36845
36846 si = TYPE_MODE (index_type) == SImode;
36847 switch (TYPE_MODE (mem_vectype))
36848 {
36849 case V2DFmode:
36850 code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
36851 break;
36852 case V4DFmode:
36853 code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
36854 break;
36855 case V2DImode:
36856 code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
36857 break;
36858 case V4DImode:
36859 code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
36860 break;
36861 case V4SFmode:
36862 code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
36863 break;
36864 case V8SFmode:
36865 code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
36866 break;
36867 case V4SImode:
36868 code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
36869 break;
36870 case V8SImode:
36871 code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
36872 break;
36873 case V8DFmode:
36874 if (TARGET_AVX512F)
36875 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DF : IX86_BUILTIN_GATHER3DIV8DF;
36876 else
36877 return NULL_TREE;
36878 break;
36879 case V8DImode:
36880 if (TARGET_AVX512F)
36881 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DI : IX86_BUILTIN_GATHER3DIV8DI;
36882 else
36883 return NULL_TREE;
36884 break;
36885 case V16SFmode:
36886 if (TARGET_AVX512F)
36887 code = si ? IX86_BUILTIN_GATHER3SIV16SF : IX86_BUILTIN_GATHER3ALTDIV16SF;
36888 else
36889 return NULL_TREE;
36890 break;
36891 case V16SImode:
36892 if (TARGET_AVX512F)
36893 code = si ? IX86_BUILTIN_GATHER3SIV16SI : IX86_BUILTIN_GATHER3ALTDIV16SI;
36894 else
36895 return NULL_TREE;
36896 break;
36897 default:
36898 return NULL_TREE;
36899 }
36900
36901 return ix86_get_builtin (code);
36902 }
36903
36904 /* Returns a code for a target-specific builtin that implements
36905 reciprocal of the function, or NULL_TREE if not available. */
36906
36907 static tree
36908 ix86_builtin_reciprocal (unsigned int fn, bool md_fn, bool)
36909 {
36910 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
36911 && flag_finite_math_only && !flag_trapping_math
36912 && flag_unsafe_math_optimizations))
36913 return NULL_TREE;
36914
36915 if (md_fn)
36916 /* Machine dependent builtins. */
36917 switch (fn)
36918 {
36919 /* Vectorized version of sqrt to rsqrt conversion. */
36920 case IX86_BUILTIN_SQRTPS_NR:
36921 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR);
36922
36923 case IX86_BUILTIN_SQRTPS_NR256:
36924 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256);
36925
36926 default:
36927 return NULL_TREE;
36928 }
36929 else
36930 /* Normal builtins. */
36931 switch (fn)
36932 {
36933 /* Sqrt to rsqrt conversion. */
36934 case BUILT_IN_SQRTF:
36935 return ix86_get_builtin (IX86_BUILTIN_RSQRTF);
36936
36937 default:
36938 return NULL_TREE;
36939 }
36940 }
36941 \f
36942 /* Helper for avx_vpermilps256_operand et al. This is also used by
36943 the expansion functions to turn the parallel back into a mask.
36944 The return value is 0 for no match and the imm8+1 for a match. */
36945
36946 int
36947 avx_vpermilp_parallel (rtx par, enum machine_mode mode)
36948 {
36949 unsigned i, nelt = GET_MODE_NUNITS (mode);
36950 unsigned mask = 0;
36951 unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
36952
36953 if (XVECLEN (par, 0) != (int) nelt)
36954 return 0;
36955
36956 /* Validate that all of the elements are constants, and not totally
36957 out of range. Copy the data into an integral array to make the
36958 subsequent checks easier. */
36959 for (i = 0; i < nelt; ++i)
36960 {
36961 rtx er = XVECEXP (par, 0, i);
36962 unsigned HOST_WIDE_INT ei;
36963
36964 if (!CONST_INT_P (er))
36965 return 0;
36966 ei = INTVAL (er);
36967 if (ei >= nelt)
36968 return 0;
36969 ipar[i] = ei;
36970 }
36971
36972 switch (mode)
36973 {
36974 case V8DFmode:
36975 /* In the 512-bit DFmode case, we can only move elements within
36976 a 128-bit lane. First fill the second part of the mask,
36977 then fallthru. */
36978 for (i = 4; i < 6; ++i)
36979 {
36980 if (ipar[i] < 4 || ipar[i] >= 6)
36981 return 0;
36982 mask |= (ipar[i] - 4) << i;
36983 }
36984 for (i = 6; i < 8; ++i)
36985 {
36986 if (ipar[i] < 6)
36987 return 0;
36988 mask |= (ipar[i] - 6) << i;
36989 }
36990 /* FALLTHRU */
36991
36992 case V4DFmode:
36993 /* In the 256-bit DFmode case, we can only move elements within
36994 a 128-bit lane. */
36995 for (i = 0; i < 2; ++i)
36996 {
36997 if (ipar[i] >= 2)
36998 return 0;
36999 mask |= ipar[i] << i;
37000 }
37001 for (i = 2; i < 4; ++i)
37002 {
37003 if (ipar[i] < 2)
37004 return 0;
37005 mask |= (ipar[i] - 2) << i;
37006 }
37007 break;
37008
37009 case V16SFmode:
37010 /* In 512 bit SFmode case, permutation in the upper 256 bits
37011 must mirror the permutation in the lower 256-bits. */
37012 for (i = 0; i < 8; ++i)
37013 if (ipar[i] + 8 != ipar[i + 8])
37014 return 0;
37015 /* FALLTHRU */
37016
37017 case V8SFmode:
37018 /* In 256 bit SFmode case, we have full freedom of
37019 movement within the low 128-bit lane, but the high 128-bit
37020 lane must mirror the exact same pattern. */
37021 for (i = 0; i < 4; ++i)
37022 if (ipar[i] + 4 != ipar[i + 4])
37023 return 0;
37024 nelt = 4;
37025 /* FALLTHRU */
37026
37027 case V2DFmode:
37028 case V4SFmode:
37029 /* In the 128-bit case, we've full freedom in the placement of
37030 the elements from the source operand. */
37031 for (i = 0; i < nelt; ++i)
37032 mask |= ipar[i] << (i * (nelt / 2));
37033 break;
37034
37035 default:
37036 gcc_unreachable ();
37037 }
37038
37039 /* Make sure success has a non-zero value by adding one. */
37040 return mask + 1;
37041 }
37042
37043 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
37044 the expansion functions to turn the parallel back into a mask.
37045 The return value is 0 for no match and the imm8+1 for a match. */
37046
37047 int
37048 avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
37049 {
37050 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
37051 unsigned mask = 0;
37052 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
37053
37054 if (XVECLEN (par, 0) != (int) nelt)
37055 return 0;
37056
37057 /* Validate that all of the elements are constants, and not totally
37058 out of range. Copy the data into an integral array to make the
37059 subsequent checks easier. */
37060 for (i = 0; i < nelt; ++i)
37061 {
37062 rtx er = XVECEXP (par, 0, i);
37063 unsigned HOST_WIDE_INT ei;
37064
37065 if (!CONST_INT_P (er))
37066 return 0;
37067 ei = INTVAL (er);
37068 if (ei >= 2 * nelt)
37069 return 0;
37070 ipar[i] = ei;
37071 }
37072
37073 /* Validate that the halves of the permute are halves. */
37074 for (i = 0; i < nelt2 - 1; ++i)
37075 if (ipar[i] + 1 != ipar[i + 1])
37076 return 0;
37077 for (i = nelt2; i < nelt - 1; ++i)
37078 if (ipar[i] + 1 != ipar[i + 1])
37079 return 0;
37080
37081 /* Reconstruct the mask. */
37082 for (i = 0; i < 2; ++i)
37083 {
37084 unsigned e = ipar[i * nelt2];
37085 if (e % nelt2)
37086 return 0;
37087 e /= nelt2;
37088 mask |= e << (i * 4);
37089 }
37090
37091 /* Make sure success has a non-zero value by adding one. */
37092 return mask + 1;
37093 }
37094 \f
37095 /* Return a register priority for hard reg REGNO. */
37096 static int
37097 ix86_register_priority (int hard_regno)
37098 {
37099 /* ebp and r13 as the base always wants a displacement, r12 as the
37100 base always wants an index. So discourage their usage in an
37101 address. */
37102 if (hard_regno == R12_REG || hard_regno == R13_REG)
37103 return 0;
37104 if (hard_regno == BP_REG)
37105 return 1;
37106 /* New x86-64 int registers result in bigger code size. Discourage
37107 them. */
37108 if (FIRST_REX_INT_REG <= hard_regno && hard_regno <= LAST_REX_INT_REG)
37109 return 2;
37110 /* New x86-64 SSE registers result in bigger code size. Discourage
37111 them. */
37112 if (FIRST_REX_SSE_REG <= hard_regno && hard_regno <= LAST_REX_SSE_REG)
37113 return 2;
37114 /* Usage of AX register results in smaller code. Prefer it. */
37115 if (hard_regno == 0)
37116 return 4;
37117 return 3;
37118 }
37119
37120 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
37121
37122 Put float CONST_DOUBLE in the constant pool instead of fp regs.
37123 QImode must go into class Q_REGS.
37124 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
37125 movdf to do mem-to-mem moves through integer regs. */
37126
37127 static reg_class_t
37128 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
37129 {
37130 enum machine_mode mode = GET_MODE (x);
37131
37132 /* We're only allowed to return a subclass of CLASS. Many of the
37133 following checks fail for NO_REGS, so eliminate that early. */
37134 if (regclass == NO_REGS)
37135 return NO_REGS;
37136
37137 /* All classes can load zeros. */
37138 if (x == CONST0_RTX (mode))
37139 return regclass;
37140
37141 /* Force constants into memory if we are loading a (nonzero) constant into
37142 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
37143 instructions to load from a constant. */
37144 if (CONSTANT_P (x)
37145 && (MAYBE_MMX_CLASS_P (regclass)
37146 || MAYBE_SSE_CLASS_P (regclass)
37147 || MAYBE_MASK_CLASS_P (regclass)))
37148 return NO_REGS;
37149
37150 /* Prefer SSE regs only, if we can use them for math. */
37151 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
37152 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
37153
37154 /* Floating-point constants need more complex checks. */
37155 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
37156 {
37157 /* General regs can load everything. */
37158 if (reg_class_subset_p (regclass, GENERAL_REGS))
37159 return regclass;
37160
37161 /* Floats can load 0 and 1 plus some others. Note that we eliminated
37162 zero above. We only want to wind up preferring 80387 registers if
37163 we plan on doing computation with them. */
37164 if (TARGET_80387
37165 && standard_80387_constant_p (x) > 0)
37166 {
37167 /* Limit class to non-sse. */
37168 if (regclass == FLOAT_SSE_REGS)
37169 return FLOAT_REGS;
37170 if (regclass == FP_TOP_SSE_REGS)
37171 return FP_TOP_REG;
37172 if (regclass == FP_SECOND_SSE_REGS)
37173 return FP_SECOND_REG;
37174 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
37175 return regclass;
37176 }
37177
37178 return NO_REGS;
37179 }
37180
37181 /* Generally when we see PLUS here, it's the function invariant
37182 (plus soft-fp const_int). Which can only be computed into general
37183 regs. */
37184 if (GET_CODE (x) == PLUS)
37185 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
37186
37187 /* QImode constants are easy to load, but non-constant QImode data
37188 must go into Q_REGS. */
37189 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
37190 {
37191 if (reg_class_subset_p (regclass, Q_REGS))
37192 return regclass;
37193 if (reg_class_subset_p (Q_REGS, regclass))
37194 return Q_REGS;
37195 return NO_REGS;
37196 }
37197
37198 return regclass;
37199 }
37200
37201 /* Discourage putting floating-point values in SSE registers unless
37202 SSE math is being used, and likewise for the 387 registers. */
37203 static reg_class_t
37204 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
37205 {
37206 enum machine_mode mode = GET_MODE (x);
37207
37208 /* Restrict the output reload class to the register bank that we are doing
37209 math on. If we would like not to return a subset of CLASS, reject this
37210 alternative: if reload cannot do this, it will still use its choice. */
37211 mode = GET_MODE (x);
37212 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
37213 return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
37214
37215 if (X87_FLOAT_MODE_P (mode))
37216 {
37217 if (regclass == FP_TOP_SSE_REGS)
37218 return FP_TOP_REG;
37219 else if (regclass == FP_SECOND_SSE_REGS)
37220 return FP_SECOND_REG;
37221 else
37222 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
37223 }
37224
37225 return regclass;
37226 }
37227
37228 static reg_class_t
37229 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
37230 enum machine_mode mode, secondary_reload_info *sri)
37231 {
37232 /* Double-word spills from general registers to non-offsettable memory
37233 references (zero-extended addresses) require special handling. */
37234 if (TARGET_64BIT
37235 && MEM_P (x)
37236 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
37237 && INTEGER_CLASS_P (rclass)
37238 && !offsettable_memref_p (x))
37239 {
37240 sri->icode = (in_p
37241 ? CODE_FOR_reload_noff_load
37242 : CODE_FOR_reload_noff_store);
37243 /* Add the cost of moving address to a temporary. */
37244 sri->extra_cost = 1;
37245
37246 return NO_REGS;
37247 }
37248
37249 /* QImode spills from non-QI registers require
37250 intermediate register on 32bit targets. */
37251 if (mode == QImode
37252 && (MAYBE_MASK_CLASS_P (rclass)
37253 || (!TARGET_64BIT && !in_p
37254 && INTEGER_CLASS_P (rclass)
37255 && MAYBE_NON_Q_CLASS_P (rclass))))
37256 {
37257 int regno;
37258
37259 if (REG_P (x))
37260 regno = REGNO (x);
37261 else
37262 regno = -1;
37263
37264 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
37265 regno = true_regnum (x);
37266
37267 /* Return Q_REGS if the operand is in memory. */
37268 if (regno == -1)
37269 return Q_REGS;
37270 }
37271
37272 /* This condition handles corner case where an expression involving
37273 pointers gets vectorized. We're trying to use the address of a
37274 stack slot as a vector initializer.
37275
37276 (set (reg:V2DI 74 [ vect_cst_.2 ])
37277 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
37278
37279 Eventually frame gets turned into sp+offset like this:
37280
37281 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37282 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
37283 (const_int 392 [0x188]))))
37284
37285 That later gets turned into:
37286
37287 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37288 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
37289 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
37290
37291 We'll have the following reload recorded:
37292
37293 Reload 0: reload_in (DI) =
37294 (plus:DI (reg/f:DI 7 sp)
37295 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
37296 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37297 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
37298 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
37299 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37300 reload_reg_rtx: (reg:V2DI 22 xmm1)
37301
37302 Which isn't going to work since SSE instructions can't handle scalar
37303 additions. Returning GENERAL_REGS forces the addition into integer
37304 register and reload can handle subsequent reloads without problems. */
37305
37306 if (in_p && GET_CODE (x) == PLUS
37307 && SSE_CLASS_P (rclass)
37308 && SCALAR_INT_MODE_P (mode))
37309 return GENERAL_REGS;
37310
37311 return NO_REGS;
37312 }
37313
37314 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
37315
37316 static bool
37317 ix86_class_likely_spilled_p (reg_class_t rclass)
37318 {
37319 switch (rclass)
37320 {
37321 case AREG:
37322 case DREG:
37323 case CREG:
37324 case BREG:
37325 case AD_REGS:
37326 case SIREG:
37327 case DIREG:
37328 case SSE_FIRST_REG:
37329 case FP_TOP_REG:
37330 case FP_SECOND_REG:
37331 return true;
37332
37333 default:
37334 break;
37335 }
37336
37337 return false;
37338 }
37339
37340 /* If we are copying between general and FP registers, we need a memory
37341 location. The same is true for SSE and MMX registers.
37342
37343 To optimize register_move_cost performance, allow inline variant.
37344
37345 The macro can't work reliably when one of the CLASSES is class containing
37346 registers from multiple units (SSE, MMX, integer). We avoid this by never
37347 combining those units in single alternative in the machine description.
37348 Ensure that this constraint holds to avoid unexpected surprises.
37349
37350 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
37351 enforce these sanity checks. */
37352
37353 static inline bool
37354 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
37355 enum machine_mode mode, int strict)
37356 {
37357 if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
37358 return false;
37359 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
37360 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
37361 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
37362 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
37363 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
37364 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
37365 {
37366 gcc_assert (!strict || lra_in_progress);
37367 return true;
37368 }
37369
37370 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
37371 return true;
37372
37373 /* ??? This is a lie. We do have moves between mmx/general, and for
37374 mmx/sse2. But by saying we need secondary memory we discourage the
37375 register allocator from using the mmx registers unless needed. */
37376 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
37377 return true;
37378
37379 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
37380 {
37381 /* SSE1 doesn't have any direct moves from other classes. */
37382 if (!TARGET_SSE2)
37383 return true;
37384
37385 /* If the target says that inter-unit moves are more expensive
37386 than moving through memory, then don't generate them. */
37387 if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
37388 || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
37389 return true;
37390
37391 /* Between SSE and general, we have moves no larger than word size. */
37392 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
37393 return true;
37394 }
37395
37396 return false;
37397 }
37398
37399 bool
37400 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
37401 enum machine_mode mode, int strict)
37402 {
37403 return inline_secondary_memory_needed (class1, class2, mode, strict);
37404 }
37405
37406 /* Implement the TARGET_CLASS_MAX_NREGS hook.
37407
37408 On the 80386, this is the size of MODE in words,
37409 except in the FP regs, where a single reg is always enough. */
37410
37411 static unsigned char
37412 ix86_class_max_nregs (reg_class_t rclass, enum machine_mode mode)
37413 {
37414 if (MAYBE_INTEGER_CLASS_P (rclass))
37415 {
37416 if (mode == XFmode)
37417 return (TARGET_64BIT ? 2 : 3);
37418 else if (mode == XCmode)
37419 return (TARGET_64BIT ? 4 : 6);
37420 else
37421 return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
37422 }
37423 else
37424 {
37425 if (COMPLEX_MODE_P (mode))
37426 return 2;
37427 else
37428 return 1;
37429 }
37430 }
37431
37432 /* Return true if the registers in CLASS cannot represent the change from
37433 modes FROM to TO. */
37434
37435 bool
37436 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
37437 enum reg_class regclass)
37438 {
37439 if (from == to)
37440 return false;
37441
37442 /* x87 registers can't do subreg at all, as all values are reformatted
37443 to extended precision. */
37444 if (MAYBE_FLOAT_CLASS_P (regclass))
37445 return true;
37446
37447 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
37448 {
37449 /* Vector registers do not support QI or HImode loads. If we don't
37450 disallow a change to these modes, reload will assume it's ok to
37451 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
37452 the vec_dupv4hi pattern. */
37453 if (GET_MODE_SIZE (from) < 4)
37454 return true;
37455
37456 /* Vector registers do not support subreg with nonzero offsets, which
37457 are otherwise valid for integer registers. Since we can't see
37458 whether we have a nonzero offset from here, prohibit all
37459 nonparadoxical subregs changing size. */
37460 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
37461 return true;
37462 }
37463
37464 return false;
37465 }
37466
37467 /* Return the cost of moving data of mode M between a
37468 register and memory. A value of 2 is the default; this cost is
37469 relative to those in `REGISTER_MOVE_COST'.
37470
37471 This function is used extensively by register_move_cost that is used to
37472 build tables at startup. Make it inline in this case.
37473 When IN is 2, return maximum of in and out move cost.
37474
37475 If moving between registers and memory is more expensive than
37476 between two registers, you should define this macro to express the
37477 relative cost.
37478
37479 Model also increased moving costs of QImode registers in non
37480 Q_REGS classes.
37481 */
37482 static inline int
37483 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
37484 int in)
37485 {
37486 int cost;
37487 if (FLOAT_CLASS_P (regclass))
37488 {
37489 int index;
37490 switch (mode)
37491 {
37492 case SFmode:
37493 index = 0;
37494 break;
37495 case DFmode:
37496 index = 1;
37497 break;
37498 case XFmode:
37499 index = 2;
37500 break;
37501 default:
37502 return 100;
37503 }
37504 if (in == 2)
37505 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
37506 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
37507 }
37508 if (SSE_CLASS_P (regclass))
37509 {
37510 int index;
37511 switch (GET_MODE_SIZE (mode))
37512 {
37513 case 4:
37514 index = 0;
37515 break;
37516 case 8:
37517 index = 1;
37518 break;
37519 case 16:
37520 index = 2;
37521 break;
37522 default:
37523 return 100;
37524 }
37525 if (in == 2)
37526 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
37527 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
37528 }
37529 if (MMX_CLASS_P (regclass))
37530 {
37531 int index;
37532 switch (GET_MODE_SIZE (mode))
37533 {
37534 case 4:
37535 index = 0;
37536 break;
37537 case 8:
37538 index = 1;
37539 break;
37540 default:
37541 return 100;
37542 }
37543 if (in)
37544 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
37545 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
37546 }
37547 switch (GET_MODE_SIZE (mode))
37548 {
37549 case 1:
37550 if (Q_CLASS_P (regclass) || TARGET_64BIT)
37551 {
37552 if (!in)
37553 return ix86_cost->int_store[0];
37554 if (TARGET_PARTIAL_REG_DEPENDENCY
37555 && optimize_function_for_speed_p (cfun))
37556 cost = ix86_cost->movzbl_load;
37557 else
37558 cost = ix86_cost->int_load[0];
37559 if (in == 2)
37560 return MAX (cost, ix86_cost->int_store[0]);
37561 return cost;
37562 }
37563 else
37564 {
37565 if (in == 2)
37566 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
37567 if (in)
37568 return ix86_cost->movzbl_load;
37569 else
37570 return ix86_cost->int_store[0] + 4;
37571 }
37572 break;
37573 case 2:
37574 if (in == 2)
37575 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
37576 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
37577 default:
37578 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
37579 if (mode == TFmode)
37580 mode = XFmode;
37581 if (in == 2)
37582 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
37583 else if (in)
37584 cost = ix86_cost->int_load[2];
37585 else
37586 cost = ix86_cost->int_store[2];
37587 return (cost * (((int) GET_MODE_SIZE (mode)
37588 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
37589 }
37590 }
37591
37592 static int
37593 ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
37594 bool in)
37595 {
37596 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
37597 }
37598
37599
37600 /* Return the cost of moving data from a register in class CLASS1 to
37601 one in class CLASS2.
37602
37603 It is not required that the cost always equal 2 when FROM is the same as TO;
37604 on some machines it is expensive to move between registers if they are not
37605 general registers. */
37606
37607 static int
37608 ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
37609 reg_class_t class2_i)
37610 {
37611 enum reg_class class1 = (enum reg_class) class1_i;
37612 enum reg_class class2 = (enum reg_class) class2_i;
37613
37614 /* In case we require secondary memory, compute cost of the store followed
37615 by load. In order to avoid bad register allocation choices, we need
37616 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
37617
37618 if (inline_secondary_memory_needed (class1, class2, mode, 0))
37619 {
37620 int cost = 1;
37621
37622 cost += inline_memory_move_cost (mode, class1, 2);
37623 cost += inline_memory_move_cost (mode, class2, 2);
37624
37625 /* In case of copying from general_purpose_register we may emit multiple
37626 stores followed by single load causing memory size mismatch stall.
37627 Count this as arbitrarily high cost of 20. */
37628 if (targetm.class_max_nregs (class1, mode)
37629 > targetm.class_max_nregs (class2, mode))
37630 cost += 20;
37631
37632 /* In the case of FP/MMX moves, the registers actually overlap, and we
37633 have to switch modes in order to treat them differently. */
37634 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
37635 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
37636 cost += 20;
37637
37638 return cost;
37639 }
37640
37641 /* Moves between SSE/MMX and integer unit are expensive. */
37642 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
37643 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
37644
37645 /* ??? By keeping returned value relatively high, we limit the number
37646 of moves between integer and MMX/SSE registers for all targets.
37647 Additionally, high value prevents problem with x86_modes_tieable_p(),
37648 where integer modes in MMX/SSE registers are not tieable
37649 because of missing QImode and HImode moves to, from or between
37650 MMX/SSE registers. */
37651 return MAX (8, ix86_cost->mmxsse_to_integer);
37652
37653 if (MAYBE_FLOAT_CLASS_P (class1))
37654 return ix86_cost->fp_move;
37655 if (MAYBE_SSE_CLASS_P (class1))
37656 return ix86_cost->sse_move;
37657 if (MAYBE_MMX_CLASS_P (class1))
37658 return ix86_cost->mmx_move;
37659 return 2;
37660 }
37661
37662 /* Return TRUE if hard register REGNO can hold a value of machine-mode
37663 MODE. */
37664
37665 bool
37666 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
37667 {
37668 /* Flags and only flags can only hold CCmode values. */
37669 if (CC_REGNO_P (regno))
37670 return GET_MODE_CLASS (mode) == MODE_CC;
37671 if (GET_MODE_CLASS (mode) == MODE_CC
37672 || GET_MODE_CLASS (mode) == MODE_RANDOM
37673 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
37674 return false;
37675 if (STACK_REGNO_P (regno))
37676 return VALID_FP_MODE_P (mode);
37677 if (MASK_REGNO_P (regno))
37678 return VALID_MASK_REG_MODE (mode);
37679 if (SSE_REGNO_P (regno))
37680 {
37681 /* We implement the move patterns for all vector modes into and
37682 out of SSE registers, even when no operation instructions
37683 are available. */
37684
37685 /* For AVX-512 we allow, regardless of regno:
37686 - XI mode
37687 - any of 512-bit wide vector mode
37688 - any scalar mode. */
37689 if (TARGET_AVX512F
37690 && (mode == XImode
37691 || VALID_AVX512F_REG_MODE (mode)
37692 || VALID_AVX512F_SCALAR_MODE (mode)))
37693 return true;
37694
37695 /* xmm16-xmm31 are only available for AVX-512. */
37696 if (EXT_REX_SSE_REGNO_P (regno))
37697 return false;
37698
37699 /* OImode and AVX modes are available only when AVX is enabled. */
37700 return ((TARGET_AVX
37701 && VALID_AVX256_REG_OR_OI_MODE (mode))
37702 || VALID_SSE_REG_MODE (mode)
37703 || VALID_SSE2_REG_MODE (mode)
37704 || VALID_MMX_REG_MODE (mode)
37705 || VALID_MMX_REG_MODE_3DNOW (mode));
37706 }
37707 if (MMX_REGNO_P (regno))
37708 {
37709 /* We implement the move patterns for 3DNOW modes even in MMX mode,
37710 so if the register is available at all, then we can move data of
37711 the given mode into or out of it. */
37712 return (VALID_MMX_REG_MODE (mode)
37713 || VALID_MMX_REG_MODE_3DNOW (mode));
37714 }
37715
37716 if (mode == QImode)
37717 {
37718 /* Take care for QImode values - they can be in non-QI regs,
37719 but then they do cause partial register stalls. */
37720 if (ANY_QI_REGNO_P (regno))
37721 return true;
37722 if (!TARGET_PARTIAL_REG_STALL)
37723 return true;
37724 /* LRA checks if the hard register is OK for the given mode.
37725 QImode values can live in non-QI regs, so we allow all
37726 registers here. */
37727 if (lra_in_progress)
37728 return true;
37729 return !can_create_pseudo_p ();
37730 }
37731 /* We handle both integer and floats in the general purpose registers. */
37732 else if (VALID_INT_MODE_P (mode))
37733 return true;
37734 else if (VALID_FP_MODE_P (mode))
37735 return true;
37736 else if (VALID_DFP_MODE_P (mode))
37737 return true;
37738 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
37739 on to use that value in smaller contexts, this can easily force a
37740 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
37741 supporting DImode, allow it. */
37742 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
37743 return true;
37744
37745 return false;
37746 }
37747
37748 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
37749 tieable integer mode. */
37750
37751 static bool
37752 ix86_tieable_integer_mode_p (enum machine_mode mode)
37753 {
37754 switch (mode)
37755 {
37756 case HImode:
37757 case SImode:
37758 return true;
37759
37760 case QImode:
37761 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
37762
37763 case DImode:
37764 return TARGET_64BIT;
37765
37766 default:
37767 return false;
37768 }
37769 }
37770
37771 /* Return true if MODE1 is accessible in a register that can hold MODE2
37772 without copying. That is, all register classes that can hold MODE2
37773 can also hold MODE1. */
37774
37775 bool
37776 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
37777 {
37778 if (mode1 == mode2)
37779 return true;
37780
37781 if (ix86_tieable_integer_mode_p (mode1)
37782 && ix86_tieable_integer_mode_p (mode2))
37783 return true;
37784
37785 /* MODE2 being XFmode implies fp stack or general regs, which means we
37786 can tie any smaller floating point modes to it. Note that we do not
37787 tie this with TFmode. */
37788 if (mode2 == XFmode)
37789 return mode1 == SFmode || mode1 == DFmode;
37790
37791 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
37792 that we can tie it with SFmode. */
37793 if (mode2 == DFmode)
37794 return mode1 == SFmode;
37795
37796 /* If MODE2 is only appropriate for an SSE register, then tie with
37797 any other mode acceptable to SSE registers. */
37798 if (GET_MODE_SIZE (mode2) == 32
37799 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
37800 return (GET_MODE_SIZE (mode1) == 32
37801 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
37802 if (GET_MODE_SIZE (mode2) == 16
37803 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
37804 return (GET_MODE_SIZE (mode1) == 16
37805 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
37806
37807 /* If MODE2 is appropriate for an MMX register, then tie
37808 with any other mode acceptable to MMX registers. */
37809 if (GET_MODE_SIZE (mode2) == 8
37810 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
37811 return (GET_MODE_SIZE (mode1) == 8
37812 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
37813
37814 return false;
37815 }
37816
37817 /* Return the cost of moving between two registers of mode MODE. */
37818
37819 static int
37820 ix86_set_reg_reg_cost (enum machine_mode mode)
37821 {
37822 unsigned int units = UNITS_PER_WORD;
37823
37824 switch (GET_MODE_CLASS (mode))
37825 {
37826 default:
37827 break;
37828
37829 case MODE_CC:
37830 units = GET_MODE_SIZE (CCmode);
37831 break;
37832
37833 case MODE_FLOAT:
37834 if ((TARGET_SSE && mode == TFmode)
37835 || (TARGET_80387 && mode == XFmode)
37836 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
37837 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
37838 units = GET_MODE_SIZE (mode);
37839 break;
37840
37841 case MODE_COMPLEX_FLOAT:
37842 if ((TARGET_SSE && mode == TCmode)
37843 || (TARGET_80387 && mode == XCmode)
37844 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
37845 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
37846 units = GET_MODE_SIZE (mode);
37847 break;
37848
37849 case MODE_VECTOR_INT:
37850 case MODE_VECTOR_FLOAT:
37851 if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
37852 || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
37853 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
37854 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
37855 || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
37856 units = GET_MODE_SIZE (mode);
37857 }
37858
37859 /* Return the cost of moving between two registers of mode MODE,
37860 assuming that the move will be in pieces of at most UNITS bytes. */
37861 return COSTS_N_INSNS ((GET_MODE_SIZE (mode) + units - 1) / units);
37862 }
37863
37864 /* Compute a (partial) cost for rtx X. Return true if the complete
37865 cost has been computed, and false if subexpressions should be
37866 scanned. In either case, *TOTAL contains the cost result. */
37867
37868 static bool
37869 ix86_rtx_costs (rtx x, int code_i, int outer_code_i, int opno, int *total,
37870 bool speed)
37871 {
37872 rtx mask;
37873 enum rtx_code code = (enum rtx_code) code_i;
37874 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
37875 enum machine_mode mode = GET_MODE (x);
37876 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
37877
37878 switch (code)
37879 {
37880 case SET:
37881 if (register_operand (SET_DEST (x), VOIDmode)
37882 && reg_or_0_operand (SET_SRC (x), VOIDmode))
37883 {
37884 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
37885 return true;
37886 }
37887 return false;
37888
37889 case CONST_INT:
37890 case CONST:
37891 case LABEL_REF:
37892 case SYMBOL_REF:
37893 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
37894 *total = 3;
37895 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
37896 *total = 2;
37897 else if (flag_pic && SYMBOLIC_CONST (x)
37898 && !(TARGET_64BIT
37899 && (GET_CODE (x) == LABEL_REF
37900 || (GET_CODE (x) == SYMBOL_REF
37901 && SYMBOL_REF_LOCAL_P (x)))))
37902 *total = 1;
37903 else
37904 *total = 0;
37905 return true;
37906
37907 case CONST_DOUBLE:
37908 if (mode == VOIDmode)
37909 {
37910 *total = 0;
37911 return true;
37912 }
37913 switch (standard_80387_constant_p (x))
37914 {
37915 case 1: /* 0.0 */
37916 *total = 1;
37917 return true;
37918 default: /* Other constants */
37919 *total = 2;
37920 return true;
37921 case 0:
37922 case -1:
37923 break;
37924 }
37925 if (SSE_FLOAT_MODE_P (mode))
37926 {
37927 case CONST_VECTOR:
37928 switch (standard_sse_constant_p (x))
37929 {
37930 case 0:
37931 break;
37932 case 1: /* 0: xor eliminates false dependency */
37933 *total = 0;
37934 return true;
37935 default: /* -1: cmp contains false dependency */
37936 *total = 1;
37937 return true;
37938 }
37939 }
37940 /* Fall back to (MEM (SYMBOL_REF)), since that's where
37941 it'll probably end up. Add a penalty for size. */
37942 *total = (COSTS_N_INSNS (1)
37943 + (flag_pic != 0 && !TARGET_64BIT)
37944 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
37945 return true;
37946
37947 case ZERO_EXTEND:
37948 /* The zero extensions is often completely free on x86_64, so make
37949 it as cheap as possible. */
37950 if (TARGET_64BIT && mode == DImode
37951 && GET_MODE (XEXP (x, 0)) == SImode)
37952 *total = 1;
37953 else if (TARGET_ZERO_EXTEND_WITH_AND)
37954 *total = cost->add;
37955 else
37956 *total = cost->movzx;
37957 return false;
37958
37959 case SIGN_EXTEND:
37960 *total = cost->movsx;
37961 return false;
37962
37963 case ASHIFT:
37964 if (SCALAR_INT_MODE_P (mode)
37965 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
37966 && CONST_INT_P (XEXP (x, 1)))
37967 {
37968 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
37969 if (value == 1)
37970 {
37971 *total = cost->add;
37972 return false;
37973 }
37974 if ((value == 2 || value == 3)
37975 && cost->lea <= cost->shift_const)
37976 {
37977 *total = cost->lea;
37978 return false;
37979 }
37980 }
37981 /* FALLTHRU */
37982
37983 case ROTATE:
37984 case ASHIFTRT:
37985 case LSHIFTRT:
37986 case ROTATERT:
37987 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
37988 {
37989 /* ??? Should be SSE vector operation cost. */
37990 /* At least for published AMD latencies, this really is the same
37991 as the latency for a simple fpu operation like fabs. */
37992 /* V*QImode is emulated with 1-11 insns. */
37993 if (mode == V16QImode || mode == V32QImode)
37994 {
37995 int count = 11;
37996 if (TARGET_XOP && mode == V16QImode)
37997 {
37998 /* For XOP we use vpshab, which requires a broadcast of the
37999 value to the variable shift insn. For constants this
38000 means a V16Q const in mem; even when we can perform the
38001 shift with one insn set the cost to prefer paddb. */
38002 if (CONSTANT_P (XEXP (x, 1)))
38003 {
38004 *total = (cost->fabs
38005 + rtx_cost (XEXP (x, 0), code, 0, speed)
38006 + (speed ? 2 : COSTS_N_BYTES (16)));
38007 return true;
38008 }
38009 count = 3;
38010 }
38011 else if (TARGET_SSSE3)
38012 count = 7;
38013 *total = cost->fabs * count;
38014 }
38015 else
38016 *total = cost->fabs;
38017 }
38018 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
38019 {
38020 if (CONST_INT_P (XEXP (x, 1)))
38021 {
38022 if (INTVAL (XEXP (x, 1)) > 32)
38023 *total = cost->shift_const + COSTS_N_INSNS (2);
38024 else
38025 *total = cost->shift_const * 2;
38026 }
38027 else
38028 {
38029 if (GET_CODE (XEXP (x, 1)) == AND)
38030 *total = cost->shift_var * 2;
38031 else
38032 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
38033 }
38034 }
38035 else
38036 {
38037 if (CONST_INT_P (XEXP (x, 1)))
38038 *total = cost->shift_const;
38039 else if (GET_CODE (XEXP (x, 1)) == SUBREG
38040 && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND)
38041 {
38042 /* Return the cost after shift-and truncation. */
38043 *total = cost->shift_var;
38044 return true;
38045 }
38046 else
38047 *total = cost->shift_var;
38048 }
38049 return false;
38050
38051 case FMA:
38052 {
38053 rtx sub;
38054
38055 gcc_assert (FLOAT_MODE_P (mode));
38056 gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
38057
38058 /* ??? SSE scalar/vector cost should be used here. */
38059 /* ??? Bald assumption that fma has the same cost as fmul. */
38060 *total = cost->fmul;
38061 *total += rtx_cost (XEXP (x, 1), FMA, 1, speed);
38062
38063 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
38064 sub = XEXP (x, 0);
38065 if (GET_CODE (sub) == NEG)
38066 sub = XEXP (sub, 0);
38067 *total += rtx_cost (sub, FMA, 0, speed);
38068
38069 sub = XEXP (x, 2);
38070 if (GET_CODE (sub) == NEG)
38071 sub = XEXP (sub, 0);
38072 *total += rtx_cost (sub, FMA, 2, speed);
38073 return true;
38074 }
38075
38076 case MULT:
38077 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38078 {
38079 /* ??? SSE scalar cost should be used here. */
38080 *total = cost->fmul;
38081 return false;
38082 }
38083 else if (X87_FLOAT_MODE_P (mode))
38084 {
38085 *total = cost->fmul;
38086 return false;
38087 }
38088 else if (FLOAT_MODE_P (mode))
38089 {
38090 /* ??? SSE vector cost should be used here. */
38091 *total = cost->fmul;
38092 return false;
38093 }
38094 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
38095 {
38096 /* V*QImode is emulated with 7-13 insns. */
38097 if (mode == V16QImode || mode == V32QImode)
38098 {
38099 int extra = 11;
38100 if (TARGET_XOP && mode == V16QImode)
38101 extra = 5;
38102 else if (TARGET_SSSE3)
38103 extra = 6;
38104 *total = cost->fmul * 2 + cost->fabs * extra;
38105 }
38106 /* V*DImode is emulated with 5-8 insns. */
38107 else if (mode == V2DImode || mode == V4DImode)
38108 {
38109 if (TARGET_XOP && mode == V2DImode)
38110 *total = cost->fmul * 2 + cost->fabs * 3;
38111 else
38112 *total = cost->fmul * 3 + cost->fabs * 5;
38113 }
38114 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
38115 insns, including two PMULUDQ. */
38116 else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
38117 *total = cost->fmul * 2 + cost->fabs * 5;
38118 else
38119 *total = cost->fmul;
38120 return false;
38121 }
38122 else
38123 {
38124 rtx op0 = XEXP (x, 0);
38125 rtx op1 = XEXP (x, 1);
38126 int nbits;
38127 if (CONST_INT_P (XEXP (x, 1)))
38128 {
38129 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
38130 for (nbits = 0; value != 0; value &= value - 1)
38131 nbits++;
38132 }
38133 else
38134 /* This is arbitrary. */
38135 nbits = 7;
38136
38137 /* Compute costs correctly for widening multiplication. */
38138 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
38139 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
38140 == GET_MODE_SIZE (mode))
38141 {
38142 int is_mulwiden = 0;
38143 enum machine_mode inner_mode = GET_MODE (op0);
38144
38145 if (GET_CODE (op0) == GET_CODE (op1))
38146 is_mulwiden = 1, op1 = XEXP (op1, 0);
38147 else if (CONST_INT_P (op1))
38148 {
38149 if (GET_CODE (op0) == SIGN_EXTEND)
38150 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
38151 == INTVAL (op1);
38152 else
38153 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
38154 }
38155
38156 if (is_mulwiden)
38157 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
38158 }
38159
38160 *total = (cost->mult_init[MODE_INDEX (mode)]
38161 + nbits * cost->mult_bit
38162 + rtx_cost (op0, outer_code, opno, speed)
38163 + rtx_cost (op1, outer_code, opno, speed));
38164
38165 return true;
38166 }
38167
38168 case DIV:
38169 case UDIV:
38170 case MOD:
38171 case UMOD:
38172 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38173 /* ??? SSE cost should be used here. */
38174 *total = cost->fdiv;
38175 else if (X87_FLOAT_MODE_P (mode))
38176 *total = cost->fdiv;
38177 else if (FLOAT_MODE_P (mode))
38178 /* ??? SSE vector cost should be used here. */
38179 *total = cost->fdiv;
38180 else
38181 *total = cost->divide[MODE_INDEX (mode)];
38182 return false;
38183
38184 case PLUS:
38185 if (GET_MODE_CLASS (mode) == MODE_INT
38186 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
38187 {
38188 if (GET_CODE (XEXP (x, 0)) == PLUS
38189 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
38190 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
38191 && CONSTANT_P (XEXP (x, 1)))
38192 {
38193 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
38194 if (val == 2 || val == 4 || val == 8)
38195 {
38196 *total = cost->lea;
38197 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
38198 outer_code, opno, speed);
38199 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
38200 outer_code, opno, speed);
38201 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
38202 return true;
38203 }
38204 }
38205 else if (GET_CODE (XEXP (x, 0)) == MULT
38206 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
38207 {
38208 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
38209 if (val == 2 || val == 4 || val == 8)
38210 {
38211 *total = cost->lea;
38212 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
38213 outer_code, opno, speed);
38214 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
38215 return true;
38216 }
38217 }
38218 else if (GET_CODE (XEXP (x, 0)) == PLUS)
38219 {
38220 *total = cost->lea;
38221 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
38222 outer_code, opno, speed);
38223 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
38224 outer_code, opno, speed);
38225 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
38226 return true;
38227 }
38228 }
38229 /* FALLTHRU */
38230
38231 case MINUS:
38232 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38233 {
38234 /* ??? SSE cost should be used here. */
38235 *total = cost->fadd;
38236 return false;
38237 }
38238 else if (X87_FLOAT_MODE_P (mode))
38239 {
38240 *total = cost->fadd;
38241 return false;
38242 }
38243 else if (FLOAT_MODE_P (mode))
38244 {
38245 /* ??? SSE vector cost should be used here. */
38246 *total = cost->fadd;
38247 return false;
38248 }
38249 /* FALLTHRU */
38250
38251 case AND:
38252 case IOR:
38253 case XOR:
38254 if (GET_MODE_CLASS (mode) == MODE_INT
38255 && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
38256 {
38257 *total = (cost->add * 2
38258 + (rtx_cost (XEXP (x, 0), outer_code, opno, speed)
38259 << (GET_MODE (XEXP (x, 0)) != DImode))
38260 + (rtx_cost (XEXP (x, 1), outer_code, opno, speed)
38261 << (GET_MODE (XEXP (x, 1)) != DImode)));
38262 return true;
38263 }
38264 /* FALLTHRU */
38265
38266 case NEG:
38267 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38268 {
38269 /* ??? SSE cost should be used here. */
38270 *total = cost->fchs;
38271 return false;
38272 }
38273 else if (X87_FLOAT_MODE_P (mode))
38274 {
38275 *total = cost->fchs;
38276 return false;
38277 }
38278 else if (FLOAT_MODE_P (mode))
38279 {
38280 /* ??? SSE vector cost should be used here. */
38281 *total = cost->fchs;
38282 return false;
38283 }
38284 /* FALLTHRU */
38285
38286 case NOT:
38287 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
38288 {
38289 /* ??? Should be SSE vector operation cost. */
38290 /* At least for published AMD latencies, this really is the same
38291 as the latency for a simple fpu operation like fabs. */
38292 *total = cost->fabs;
38293 }
38294 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
38295 *total = cost->add * 2;
38296 else
38297 *total = cost->add;
38298 return false;
38299
38300 case COMPARE:
38301 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
38302 && XEXP (XEXP (x, 0), 1) == const1_rtx
38303 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
38304 && XEXP (x, 1) == const0_rtx)
38305 {
38306 /* This kind of construct is implemented using test[bwl].
38307 Treat it as if we had an AND. */
38308 *total = (cost->add
38309 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, opno, speed)
38310 + rtx_cost (const1_rtx, outer_code, opno, speed));
38311 return true;
38312 }
38313 return false;
38314
38315 case FLOAT_EXTEND:
38316 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
38317 *total = 0;
38318 return false;
38319
38320 case ABS:
38321 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38322 /* ??? SSE cost should be used here. */
38323 *total = cost->fabs;
38324 else if (X87_FLOAT_MODE_P (mode))
38325 *total = cost->fabs;
38326 else if (FLOAT_MODE_P (mode))
38327 /* ??? SSE vector cost should be used here. */
38328 *total = cost->fabs;
38329 return false;
38330
38331 case SQRT:
38332 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38333 /* ??? SSE cost should be used here. */
38334 *total = cost->fsqrt;
38335 else if (X87_FLOAT_MODE_P (mode))
38336 *total = cost->fsqrt;
38337 else if (FLOAT_MODE_P (mode))
38338 /* ??? SSE vector cost should be used here. */
38339 *total = cost->fsqrt;
38340 return false;
38341
38342 case UNSPEC:
38343 if (XINT (x, 1) == UNSPEC_TP)
38344 *total = 0;
38345 return false;
38346
38347 case VEC_SELECT:
38348 case VEC_CONCAT:
38349 case VEC_DUPLICATE:
38350 /* ??? Assume all of these vector manipulation patterns are
38351 recognizable. In which case they all pretty much have the
38352 same cost. */
38353 *total = cost->fabs;
38354 return true;
38355 case VEC_MERGE:
38356 mask = XEXP (x, 2);
38357 /* This is masked instruction, assume the same cost,
38358 as nonmasked variant. */
38359 if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
38360 *total = rtx_cost (XEXP (x, 0), outer_code, opno, speed);
38361 else
38362 *total = cost->fabs;
38363 return true;
38364
38365 default:
38366 return false;
38367 }
38368 }
38369
38370 #if TARGET_MACHO
38371
38372 static int current_machopic_label_num;
38373
38374 /* Given a symbol name and its associated stub, write out the
38375 definition of the stub. */
38376
38377 void
38378 machopic_output_stub (FILE *file, const char *symb, const char *stub)
38379 {
38380 unsigned int length;
38381 char *binder_name, *symbol_name, lazy_ptr_name[32];
38382 int label = ++current_machopic_label_num;
38383
38384 /* For 64-bit we shouldn't get here. */
38385 gcc_assert (!TARGET_64BIT);
38386
38387 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
38388 symb = targetm.strip_name_encoding (symb);
38389
38390 length = strlen (stub);
38391 binder_name = XALLOCAVEC (char, length + 32);
38392 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
38393
38394 length = strlen (symb);
38395 symbol_name = XALLOCAVEC (char, length + 32);
38396 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
38397
38398 sprintf (lazy_ptr_name, "L%d$lz", label);
38399
38400 if (MACHOPIC_ATT_STUB)
38401 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
38402 else if (MACHOPIC_PURE)
38403 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
38404 else
38405 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
38406
38407 fprintf (file, "%s:\n", stub);
38408 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
38409
38410 if (MACHOPIC_ATT_STUB)
38411 {
38412 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
38413 }
38414 else if (MACHOPIC_PURE)
38415 {
38416 /* PIC stub. */
38417 /* 25-byte PIC stub using "CALL get_pc_thunk". */
38418 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
38419 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
38420 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
38421 label, lazy_ptr_name, label);
38422 fprintf (file, "\tjmp\t*%%ecx\n");
38423 }
38424 else
38425 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
38426
38427 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
38428 it needs no stub-binding-helper. */
38429 if (MACHOPIC_ATT_STUB)
38430 return;
38431
38432 fprintf (file, "%s:\n", binder_name);
38433
38434 if (MACHOPIC_PURE)
38435 {
38436 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
38437 fprintf (file, "\tpushl\t%%ecx\n");
38438 }
38439 else
38440 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
38441
38442 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
38443
38444 /* N.B. Keep the correspondence of these
38445 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
38446 old-pic/new-pic/non-pic stubs; altering this will break
38447 compatibility with existing dylibs. */
38448 if (MACHOPIC_PURE)
38449 {
38450 /* 25-byte PIC stub using "CALL get_pc_thunk". */
38451 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
38452 }
38453 else
38454 /* 16-byte -mdynamic-no-pic stub. */
38455 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
38456
38457 fprintf (file, "%s:\n", lazy_ptr_name);
38458 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
38459 fprintf (file, ASM_LONG "%s\n", binder_name);
38460 }
38461 #endif /* TARGET_MACHO */
38462
38463 /* Order the registers for register allocator. */
38464
38465 void
38466 x86_order_regs_for_local_alloc (void)
38467 {
38468 int pos = 0;
38469 int i;
38470
38471 /* First allocate the local general purpose registers. */
38472 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
38473 if (GENERAL_REGNO_P (i) && call_used_regs[i])
38474 reg_alloc_order [pos++] = i;
38475
38476 /* Global general purpose registers. */
38477 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
38478 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
38479 reg_alloc_order [pos++] = i;
38480
38481 /* x87 registers come first in case we are doing FP math
38482 using them. */
38483 if (!TARGET_SSE_MATH)
38484 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
38485 reg_alloc_order [pos++] = i;
38486
38487 /* SSE registers. */
38488 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
38489 reg_alloc_order [pos++] = i;
38490 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
38491 reg_alloc_order [pos++] = i;
38492
38493 /* Extended REX SSE registers. */
38494 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
38495 reg_alloc_order [pos++] = i;
38496
38497 /* Mask register. */
38498 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
38499 reg_alloc_order [pos++] = i;
38500
38501 /* x87 registers. */
38502 if (TARGET_SSE_MATH)
38503 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
38504 reg_alloc_order [pos++] = i;
38505
38506 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
38507 reg_alloc_order [pos++] = i;
38508
38509 /* Initialize the rest of array as we do not allocate some registers
38510 at all. */
38511 while (pos < FIRST_PSEUDO_REGISTER)
38512 reg_alloc_order [pos++] = 0;
38513 }
38514
38515 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
38516 in struct attribute_spec handler. */
38517 static tree
38518 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
38519 tree args,
38520 int,
38521 bool *no_add_attrs)
38522 {
38523 if (TREE_CODE (*node) != FUNCTION_TYPE
38524 && TREE_CODE (*node) != METHOD_TYPE
38525 && TREE_CODE (*node) != FIELD_DECL
38526 && TREE_CODE (*node) != TYPE_DECL)
38527 {
38528 warning (OPT_Wattributes, "%qE attribute only applies to functions",
38529 name);
38530 *no_add_attrs = true;
38531 return NULL_TREE;
38532 }
38533 if (TARGET_64BIT)
38534 {
38535 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
38536 name);
38537 *no_add_attrs = true;
38538 return NULL_TREE;
38539 }
38540 if (is_attribute_p ("callee_pop_aggregate_return", name))
38541 {
38542 tree cst;
38543
38544 cst = TREE_VALUE (args);
38545 if (TREE_CODE (cst) != INTEGER_CST)
38546 {
38547 warning (OPT_Wattributes,
38548 "%qE attribute requires an integer constant argument",
38549 name);
38550 *no_add_attrs = true;
38551 }
38552 else if (compare_tree_int (cst, 0) != 0
38553 && compare_tree_int (cst, 1) != 0)
38554 {
38555 warning (OPT_Wattributes,
38556 "argument to %qE attribute is neither zero, nor one",
38557 name);
38558 *no_add_attrs = true;
38559 }
38560
38561 return NULL_TREE;
38562 }
38563
38564 return NULL_TREE;
38565 }
38566
38567 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
38568 struct attribute_spec.handler. */
38569 static tree
38570 ix86_handle_abi_attribute (tree *node, tree name, tree, int,
38571 bool *no_add_attrs)
38572 {
38573 if (TREE_CODE (*node) != FUNCTION_TYPE
38574 && TREE_CODE (*node) != METHOD_TYPE
38575 && TREE_CODE (*node) != FIELD_DECL
38576 && TREE_CODE (*node) != TYPE_DECL)
38577 {
38578 warning (OPT_Wattributes, "%qE attribute only applies to functions",
38579 name);
38580 *no_add_attrs = true;
38581 return NULL_TREE;
38582 }
38583
38584 /* Can combine regparm with all attributes but fastcall. */
38585 if (is_attribute_p ("ms_abi", name))
38586 {
38587 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
38588 {
38589 error ("ms_abi and sysv_abi attributes are not compatible");
38590 }
38591
38592 return NULL_TREE;
38593 }
38594 else if (is_attribute_p ("sysv_abi", name))
38595 {
38596 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
38597 {
38598 error ("ms_abi and sysv_abi attributes are not compatible");
38599 }
38600
38601 return NULL_TREE;
38602 }
38603
38604 return NULL_TREE;
38605 }
38606
38607 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
38608 struct attribute_spec.handler. */
38609 static tree
38610 ix86_handle_struct_attribute (tree *node, tree name, tree, int,
38611 bool *no_add_attrs)
38612 {
38613 tree *type = NULL;
38614 if (DECL_P (*node))
38615 {
38616 if (TREE_CODE (*node) == TYPE_DECL)
38617 type = &TREE_TYPE (*node);
38618 }
38619 else
38620 type = node;
38621
38622 if (!(type && RECORD_OR_UNION_TYPE_P (*type)))
38623 {
38624 warning (OPT_Wattributes, "%qE attribute ignored",
38625 name);
38626 *no_add_attrs = true;
38627 }
38628
38629 else if ((is_attribute_p ("ms_struct", name)
38630 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
38631 || ((is_attribute_p ("gcc_struct", name)
38632 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
38633 {
38634 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
38635 name);
38636 *no_add_attrs = true;
38637 }
38638
38639 return NULL_TREE;
38640 }
38641
38642 static tree
38643 ix86_handle_fndecl_attribute (tree *node, tree name, tree, int,
38644 bool *no_add_attrs)
38645 {
38646 if (TREE_CODE (*node) != FUNCTION_DECL)
38647 {
38648 warning (OPT_Wattributes, "%qE attribute only applies to functions",
38649 name);
38650 *no_add_attrs = true;
38651 }
38652 return NULL_TREE;
38653 }
38654
38655 static bool
38656 ix86_ms_bitfield_layout_p (const_tree record_type)
38657 {
38658 return ((TARGET_MS_BITFIELD_LAYOUT
38659 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
38660 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
38661 }
38662
38663 /* Returns an expression indicating where the this parameter is
38664 located on entry to the FUNCTION. */
38665
38666 static rtx
38667 x86_this_parameter (tree function)
38668 {
38669 tree type = TREE_TYPE (function);
38670 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
38671 int nregs;
38672
38673 if (TARGET_64BIT)
38674 {
38675 const int *parm_regs;
38676
38677 if (ix86_function_type_abi (type) == MS_ABI)
38678 parm_regs = x86_64_ms_abi_int_parameter_registers;
38679 else
38680 parm_regs = x86_64_int_parameter_registers;
38681 return gen_rtx_REG (Pmode, parm_regs[aggr]);
38682 }
38683
38684 nregs = ix86_function_regparm (type, function);
38685
38686 if (nregs > 0 && !stdarg_p (type))
38687 {
38688 int regno;
38689 unsigned int ccvt = ix86_get_callcvt (type);
38690
38691 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
38692 regno = aggr ? DX_REG : CX_REG;
38693 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
38694 {
38695 regno = CX_REG;
38696 if (aggr)
38697 return gen_rtx_MEM (SImode,
38698 plus_constant (Pmode, stack_pointer_rtx, 4));
38699 }
38700 else
38701 {
38702 regno = AX_REG;
38703 if (aggr)
38704 {
38705 regno = DX_REG;
38706 if (nregs == 1)
38707 return gen_rtx_MEM (SImode,
38708 plus_constant (Pmode,
38709 stack_pointer_rtx, 4));
38710 }
38711 }
38712 return gen_rtx_REG (SImode, regno);
38713 }
38714
38715 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
38716 aggr ? 8 : 4));
38717 }
38718
38719 /* Determine whether x86_output_mi_thunk can succeed. */
38720
38721 static bool
38722 x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
38723 const_tree function)
38724 {
38725 /* 64-bit can handle anything. */
38726 if (TARGET_64BIT)
38727 return true;
38728
38729 /* For 32-bit, everything's fine if we have one free register. */
38730 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
38731 return true;
38732
38733 /* Need a free register for vcall_offset. */
38734 if (vcall_offset)
38735 return false;
38736
38737 /* Need a free register for GOT references. */
38738 if (flag_pic && !targetm.binds_local_p (function))
38739 return false;
38740
38741 /* Otherwise ok. */
38742 return true;
38743 }
38744
38745 /* Output the assembler code for a thunk function. THUNK_DECL is the
38746 declaration for the thunk function itself, FUNCTION is the decl for
38747 the target function. DELTA is an immediate constant offset to be
38748 added to THIS. If VCALL_OFFSET is nonzero, the word at
38749 *(*this + vcall_offset) should be added to THIS. */
38750
38751 static void
38752 x86_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
38753 HOST_WIDE_INT vcall_offset, tree function)
38754 {
38755 rtx this_param = x86_this_parameter (function);
38756 rtx this_reg, tmp, fnaddr;
38757 unsigned int tmp_regno;
38758
38759 if (TARGET_64BIT)
38760 tmp_regno = R10_REG;
38761 else
38762 {
38763 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
38764 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
38765 tmp_regno = AX_REG;
38766 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
38767 tmp_regno = DX_REG;
38768 else
38769 tmp_regno = CX_REG;
38770 }
38771
38772 emit_note (NOTE_INSN_PROLOGUE_END);
38773
38774 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
38775 pull it in now and let DELTA benefit. */
38776 if (REG_P (this_param))
38777 this_reg = this_param;
38778 else if (vcall_offset)
38779 {
38780 /* Put the this parameter into %eax. */
38781 this_reg = gen_rtx_REG (Pmode, AX_REG);
38782 emit_move_insn (this_reg, this_param);
38783 }
38784 else
38785 this_reg = NULL_RTX;
38786
38787 /* Adjust the this parameter by a fixed constant. */
38788 if (delta)
38789 {
38790 rtx delta_rtx = GEN_INT (delta);
38791 rtx delta_dst = this_reg ? this_reg : this_param;
38792
38793 if (TARGET_64BIT)
38794 {
38795 if (!x86_64_general_operand (delta_rtx, Pmode))
38796 {
38797 tmp = gen_rtx_REG (Pmode, tmp_regno);
38798 emit_move_insn (tmp, delta_rtx);
38799 delta_rtx = tmp;
38800 }
38801 }
38802
38803 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
38804 }
38805
38806 /* Adjust the this parameter by a value stored in the vtable. */
38807 if (vcall_offset)
38808 {
38809 rtx vcall_addr, vcall_mem, this_mem;
38810
38811 tmp = gen_rtx_REG (Pmode, tmp_regno);
38812
38813 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
38814 if (Pmode != ptr_mode)
38815 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
38816 emit_move_insn (tmp, this_mem);
38817
38818 /* Adjust the this parameter. */
38819 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
38820 if (TARGET_64BIT
38821 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
38822 {
38823 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
38824 emit_move_insn (tmp2, GEN_INT (vcall_offset));
38825 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
38826 }
38827
38828 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
38829 if (Pmode != ptr_mode)
38830 emit_insn (gen_addsi_1_zext (this_reg,
38831 gen_rtx_REG (ptr_mode,
38832 REGNO (this_reg)),
38833 vcall_mem));
38834 else
38835 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
38836 }
38837
38838 /* If necessary, drop THIS back to its stack slot. */
38839 if (this_reg && this_reg != this_param)
38840 emit_move_insn (this_param, this_reg);
38841
38842 fnaddr = XEXP (DECL_RTL (function), 0);
38843 if (TARGET_64BIT)
38844 {
38845 if (!flag_pic || targetm.binds_local_p (function)
38846 || TARGET_PECOFF)
38847 ;
38848 else
38849 {
38850 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
38851 tmp = gen_rtx_CONST (Pmode, tmp);
38852 fnaddr = gen_const_mem (Pmode, tmp);
38853 }
38854 }
38855 else
38856 {
38857 if (!flag_pic || targetm.binds_local_p (function))
38858 ;
38859 #if TARGET_MACHO
38860 else if (TARGET_MACHO)
38861 {
38862 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
38863 fnaddr = XEXP (fnaddr, 0);
38864 }
38865 #endif /* TARGET_MACHO */
38866 else
38867 {
38868 tmp = gen_rtx_REG (Pmode, CX_REG);
38869 output_set_got (tmp, NULL_RTX);
38870
38871 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
38872 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
38873 fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
38874 fnaddr = gen_const_mem (Pmode, fnaddr);
38875 }
38876 }
38877
38878 /* Our sibling call patterns do not allow memories, because we have no
38879 predicate that can distinguish between frame and non-frame memory.
38880 For our purposes here, we can get away with (ab)using a jump pattern,
38881 because we're going to do no optimization. */
38882 if (MEM_P (fnaddr))
38883 {
38884 if (sibcall_insn_operand (fnaddr, word_mode))
38885 {
38886 tmp = gen_rtx_CALL (VOIDmode, fnaddr, const0_rtx);
38887 tmp = emit_call_insn (tmp);
38888 SIBLING_CALL_P (tmp) = 1;
38889 }
38890 else
38891 emit_jump_insn (gen_indirect_jump (fnaddr));
38892 }
38893 else
38894 {
38895 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
38896 fnaddr = legitimize_pic_address (fnaddr,
38897 gen_rtx_REG (Pmode, tmp_regno));
38898
38899 if (!sibcall_insn_operand (fnaddr, word_mode))
38900 {
38901 tmp = gen_rtx_REG (word_mode, tmp_regno);
38902 if (GET_MODE (fnaddr) != word_mode)
38903 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
38904 emit_move_insn (tmp, fnaddr);
38905 fnaddr = tmp;
38906 }
38907
38908 tmp = gen_rtx_MEM (QImode, fnaddr);
38909 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
38910 tmp = emit_call_insn (tmp);
38911 SIBLING_CALL_P (tmp) = 1;
38912 }
38913 emit_barrier ();
38914
38915 /* Emit just enough of rest_of_compilation to get the insns emitted.
38916 Note that use_thunk calls assemble_start_function et al. */
38917 tmp = get_insns ();
38918 shorten_branches (tmp);
38919 final_start_function (tmp, file, 1);
38920 final (tmp, file, 1);
38921 final_end_function ();
38922 }
38923
38924 static void
38925 x86_file_start (void)
38926 {
38927 default_file_start ();
38928 if (TARGET_16BIT)
38929 fputs ("\t.code16gcc\n", asm_out_file);
38930 #if TARGET_MACHO
38931 darwin_file_start ();
38932 #endif
38933 if (X86_FILE_START_VERSION_DIRECTIVE)
38934 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
38935 if (X86_FILE_START_FLTUSED)
38936 fputs ("\t.global\t__fltused\n", asm_out_file);
38937 if (ix86_asm_dialect == ASM_INTEL)
38938 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
38939 }
38940
38941 int
38942 x86_field_alignment (tree field, int computed)
38943 {
38944 enum machine_mode mode;
38945 tree type = TREE_TYPE (field);
38946
38947 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
38948 return computed;
38949 mode = TYPE_MODE (strip_array_types (type));
38950 if (mode == DFmode || mode == DCmode
38951 || GET_MODE_CLASS (mode) == MODE_INT
38952 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
38953 return MIN (32, computed);
38954 return computed;
38955 }
38956
38957 /* Output assembler code to FILE to increment profiler label # LABELNO
38958 for profiling a function entry. */
38959 void
38960 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
38961 {
38962 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
38963 : MCOUNT_NAME);
38964
38965 if (TARGET_64BIT)
38966 {
38967 #ifndef NO_PROFILE_COUNTERS
38968 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
38969 #endif
38970
38971 if (!TARGET_PECOFF && flag_pic)
38972 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
38973 else
38974 fprintf (file, "\tcall\t%s\n", mcount_name);
38975 }
38976 else if (flag_pic)
38977 {
38978 #ifndef NO_PROFILE_COUNTERS
38979 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
38980 LPREFIX, labelno);
38981 #endif
38982 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
38983 }
38984 else
38985 {
38986 #ifndef NO_PROFILE_COUNTERS
38987 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
38988 LPREFIX, labelno);
38989 #endif
38990 fprintf (file, "\tcall\t%s\n", mcount_name);
38991 }
38992 }
38993
38994 /* We don't have exact information about the insn sizes, but we may assume
38995 quite safely that we are informed about all 1 byte insns and memory
38996 address sizes. This is enough to eliminate unnecessary padding in
38997 99% of cases. */
38998
38999 static int
39000 min_insn_size (rtx insn)
39001 {
39002 int l = 0, len;
39003
39004 if (!INSN_P (insn) || !active_insn_p (insn))
39005 return 0;
39006
39007 /* Discard alignments we've emit and jump instructions. */
39008 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
39009 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
39010 return 0;
39011
39012 /* Important case - calls are always 5 bytes.
39013 It is common to have many calls in the row. */
39014 if (CALL_P (insn)
39015 && symbolic_reference_mentioned_p (PATTERN (insn))
39016 && !SIBLING_CALL_P (insn))
39017 return 5;
39018 len = get_attr_length (insn);
39019 if (len <= 1)
39020 return 1;
39021
39022 /* For normal instructions we rely on get_attr_length being exact,
39023 with a few exceptions. */
39024 if (!JUMP_P (insn))
39025 {
39026 enum attr_type type = get_attr_type (insn);
39027
39028 switch (type)
39029 {
39030 case TYPE_MULTI:
39031 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
39032 || asm_noperands (PATTERN (insn)) >= 0)
39033 return 0;
39034 break;
39035 case TYPE_OTHER:
39036 case TYPE_FCMP:
39037 break;
39038 default:
39039 /* Otherwise trust get_attr_length. */
39040 return len;
39041 }
39042
39043 l = get_attr_length_address (insn);
39044 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
39045 l = 4;
39046 }
39047 if (l)
39048 return 1+l;
39049 else
39050 return 2;
39051 }
39052
39053 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
39054
39055 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
39056 window. */
39057
39058 static void
39059 ix86_avoid_jump_mispredicts (void)
39060 {
39061 rtx insn, start = get_insns ();
39062 int nbytes = 0, njumps = 0;
39063 int isjump = 0;
39064
39065 /* Look for all minimal intervals of instructions containing 4 jumps.
39066 The intervals are bounded by START and INSN. NBYTES is the total
39067 size of instructions in the interval including INSN and not including
39068 START. When the NBYTES is smaller than 16 bytes, it is possible
39069 that the end of START and INSN ends up in the same 16byte page.
39070
39071 The smallest offset in the page INSN can start is the case where START
39072 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
39073 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
39074
39075 Don't consider asm goto as jump, while it can contain a jump, it doesn't
39076 have to, control transfer to label(s) can be performed through other
39077 means, and also we estimate minimum length of all asm stmts as 0. */
39078 for (insn = start; insn; insn = NEXT_INSN (insn))
39079 {
39080 int min_size;
39081
39082 if (LABEL_P (insn))
39083 {
39084 int align = label_to_alignment (insn);
39085 int max_skip = label_to_max_skip (insn);
39086
39087 if (max_skip > 15)
39088 max_skip = 15;
39089 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
39090 already in the current 16 byte page, because otherwise
39091 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
39092 bytes to reach 16 byte boundary. */
39093 if (align <= 0
39094 || (align <= 3 && max_skip != (1 << align) - 1))
39095 max_skip = 0;
39096 if (dump_file)
39097 fprintf (dump_file, "Label %i with max_skip %i\n",
39098 INSN_UID (insn), max_skip);
39099 if (max_skip)
39100 {
39101 while (nbytes + max_skip >= 16)
39102 {
39103 start = NEXT_INSN (start);
39104 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
39105 || CALL_P (start))
39106 njumps--, isjump = 1;
39107 else
39108 isjump = 0;
39109 nbytes -= min_insn_size (start);
39110 }
39111 }
39112 continue;
39113 }
39114
39115 min_size = min_insn_size (insn);
39116 nbytes += min_size;
39117 if (dump_file)
39118 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
39119 INSN_UID (insn), min_size);
39120 if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
39121 || CALL_P (insn))
39122 njumps++;
39123 else
39124 continue;
39125
39126 while (njumps > 3)
39127 {
39128 start = NEXT_INSN (start);
39129 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
39130 || CALL_P (start))
39131 njumps--, isjump = 1;
39132 else
39133 isjump = 0;
39134 nbytes -= min_insn_size (start);
39135 }
39136 gcc_assert (njumps >= 0);
39137 if (dump_file)
39138 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
39139 INSN_UID (start), INSN_UID (insn), nbytes);
39140
39141 if (njumps == 3 && isjump && nbytes < 16)
39142 {
39143 int padsize = 15 - nbytes + min_insn_size (insn);
39144
39145 if (dump_file)
39146 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
39147 INSN_UID (insn), padsize);
39148 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
39149 }
39150 }
39151 }
39152 #endif
39153
39154 /* AMD Athlon works faster
39155 when RET is not destination of conditional jump or directly preceded
39156 by other jump instruction. We avoid the penalty by inserting NOP just
39157 before the RET instructions in such cases. */
39158 static void
39159 ix86_pad_returns (void)
39160 {
39161 edge e;
39162 edge_iterator ei;
39163
39164 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
39165 {
39166 basic_block bb = e->src;
39167 rtx ret = BB_END (bb);
39168 rtx prev;
39169 bool replace = false;
39170
39171 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
39172 || optimize_bb_for_size_p (bb))
39173 continue;
39174 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
39175 if (active_insn_p (prev) || LABEL_P (prev))
39176 break;
39177 if (prev && LABEL_P (prev))
39178 {
39179 edge e;
39180 edge_iterator ei;
39181
39182 FOR_EACH_EDGE (e, ei, bb->preds)
39183 if (EDGE_FREQUENCY (e) && e->src->index >= 0
39184 && !(e->flags & EDGE_FALLTHRU))
39185 {
39186 replace = true;
39187 break;
39188 }
39189 }
39190 if (!replace)
39191 {
39192 prev = prev_active_insn (ret);
39193 if (prev
39194 && ((JUMP_P (prev) && any_condjump_p (prev))
39195 || CALL_P (prev)))
39196 replace = true;
39197 /* Empty functions get branch mispredict even when
39198 the jump destination is not visible to us. */
39199 if (!prev && !optimize_function_for_size_p (cfun))
39200 replace = true;
39201 }
39202 if (replace)
39203 {
39204 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
39205 delete_insn (ret);
39206 }
39207 }
39208 }
39209
39210 /* Count the minimum number of instructions in BB. Return 4 if the
39211 number of instructions >= 4. */
39212
39213 static int
39214 ix86_count_insn_bb (basic_block bb)
39215 {
39216 rtx insn;
39217 int insn_count = 0;
39218
39219 /* Count number of instructions in this block. Return 4 if the number
39220 of instructions >= 4. */
39221 FOR_BB_INSNS (bb, insn)
39222 {
39223 /* Only happen in exit blocks. */
39224 if (JUMP_P (insn)
39225 && ANY_RETURN_P (PATTERN (insn)))
39226 break;
39227
39228 if (NONDEBUG_INSN_P (insn)
39229 && GET_CODE (PATTERN (insn)) != USE
39230 && GET_CODE (PATTERN (insn)) != CLOBBER)
39231 {
39232 insn_count++;
39233 if (insn_count >= 4)
39234 return insn_count;
39235 }
39236 }
39237
39238 return insn_count;
39239 }
39240
39241
39242 /* Count the minimum number of instructions in code path in BB.
39243 Return 4 if the number of instructions >= 4. */
39244
39245 static int
39246 ix86_count_insn (basic_block bb)
39247 {
39248 edge e;
39249 edge_iterator ei;
39250 int min_prev_count;
39251
39252 /* Only bother counting instructions along paths with no
39253 more than 2 basic blocks between entry and exit. Given
39254 that BB has an edge to exit, determine if a predecessor
39255 of BB has an edge from entry. If so, compute the number
39256 of instructions in the predecessor block. If there
39257 happen to be multiple such blocks, compute the minimum. */
39258 min_prev_count = 4;
39259 FOR_EACH_EDGE (e, ei, bb->preds)
39260 {
39261 edge prev_e;
39262 edge_iterator prev_ei;
39263
39264 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
39265 {
39266 min_prev_count = 0;
39267 break;
39268 }
39269 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
39270 {
39271 if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
39272 {
39273 int count = ix86_count_insn_bb (e->src);
39274 if (count < min_prev_count)
39275 min_prev_count = count;
39276 break;
39277 }
39278 }
39279 }
39280
39281 if (min_prev_count < 4)
39282 min_prev_count += ix86_count_insn_bb (bb);
39283
39284 return min_prev_count;
39285 }
39286
39287 /* Pad short function to 4 instructions. */
39288
39289 static void
39290 ix86_pad_short_function (void)
39291 {
39292 edge e;
39293 edge_iterator ei;
39294
39295 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
39296 {
39297 rtx ret = BB_END (e->src);
39298 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
39299 {
39300 int insn_count = ix86_count_insn (e->src);
39301
39302 /* Pad short function. */
39303 if (insn_count < 4)
39304 {
39305 rtx insn = ret;
39306
39307 /* Find epilogue. */
39308 while (insn
39309 && (!NOTE_P (insn)
39310 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
39311 insn = PREV_INSN (insn);
39312
39313 if (!insn)
39314 insn = ret;
39315
39316 /* Two NOPs count as one instruction. */
39317 insn_count = 2 * (4 - insn_count);
39318 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
39319 }
39320 }
39321 }
39322 }
39323
39324 /* Fix up a Windows system unwinder issue. If an EH region falls through into
39325 the epilogue, the Windows system unwinder will apply epilogue logic and
39326 produce incorrect offsets. This can be avoided by adding a nop between
39327 the last insn that can throw and the first insn of the epilogue. */
39328
39329 static void
39330 ix86_seh_fixup_eh_fallthru (void)
39331 {
39332 edge e;
39333 edge_iterator ei;
39334
39335 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
39336 {
39337 rtx insn, next;
39338
39339 /* Find the beginning of the epilogue. */
39340 for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
39341 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
39342 break;
39343 if (insn == NULL)
39344 continue;
39345
39346 /* We only care about preceding insns that can throw. */
39347 insn = prev_active_insn (insn);
39348 if (insn == NULL || !can_throw_internal (insn))
39349 continue;
39350
39351 /* Do not separate calls from their debug information. */
39352 for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
39353 if (NOTE_P (next)
39354 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
39355 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION))
39356 insn = next;
39357 else
39358 break;
39359
39360 emit_insn_after (gen_nops (const1_rtx), insn);
39361 }
39362 }
39363
39364 /* Implement machine specific optimizations. We implement padding of returns
39365 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
39366 static void
39367 ix86_reorg (void)
39368 {
39369 /* We are freeing block_for_insn in the toplev to keep compatibility
39370 with old MDEP_REORGS that are not CFG based. Recompute it now. */
39371 compute_bb_for_insn ();
39372
39373 if (TARGET_SEH && current_function_has_exception_handlers ())
39374 ix86_seh_fixup_eh_fallthru ();
39375
39376 if (optimize && optimize_function_for_speed_p (cfun))
39377 {
39378 if (TARGET_PAD_SHORT_FUNCTION)
39379 ix86_pad_short_function ();
39380 else if (TARGET_PAD_RETURNS)
39381 ix86_pad_returns ();
39382 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
39383 if (TARGET_FOUR_JUMP_LIMIT)
39384 ix86_avoid_jump_mispredicts ();
39385 #endif
39386 }
39387 }
39388
39389 /* Return nonzero when QImode register that must be represented via REX prefix
39390 is used. */
39391 bool
39392 x86_extended_QIreg_mentioned_p (rtx insn)
39393 {
39394 int i;
39395 extract_insn_cached (insn);
39396 for (i = 0; i < recog_data.n_operands; i++)
39397 if (GENERAL_REG_P (recog_data.operand[i])
39398 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
39399 return true;
39400 return false;
39401 }
39402
39403 /* Return nonzero when P points to register encoded via REX prefix.
39404 Called via for_each_rtx. */
39405 static int
39406 extended_reg_mentioned_1 (rtx *p, void *)
39407 {
39408 unsigned int regno;
39409 if (!REG_P (*p))
39410 return 0;
39411 regno = REGNO (*p);
39412 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
39413 }
39414
39415 /* Return true when INSN mentions register that must be encoded using REX
39416 prefix. */
39417 bool
39418 x86_extended_reg_mentioned_p (rtx insn)
39419 {
39420 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
39421 extended_reg_mentioned_1, NULL);
39422 }
39423
39424 /* If profitable, negate (without causing overflow) integer constant
39425 of mode MODE at location LOC. Return true in this case. */
39426 bool
39427 x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
39428 {
39429 HOST_WIDE_INT val;
39430
39431 if (!CONST_INT_P (*loc))
39432 return false;
39433
39434 switch (mode)
39435 {
39436 case DImode:
39437 /* DImode x86_64 constants must fit in 32 bits. */
39438 gcc_assert (x86_64_immediate_operand (*loc, mode));
39439
39440 mode = SImode;
39441 break;
39442
39443 case SImode:
39444 case HImode:
39445 case QImode:
39446 break;
39447
39448 default:
39449 gcc_unreachable ();
39450 }
39451
39452 /* Avoid overflows. */
39453 if (mode_signbit_p (mode, *loc))
39454 return false;
39455
39456 val = INTVAL (*loc);
39457
39458 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
39459 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
39460 if ((val < 0 && val != -128)
39461 || val == 128)
39462 {
39463 *loc = GEN_INT (-val);
39464 return true;
39465 }
39466
39467 return false;
39468 }
39469
39470 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
39471 optabs would emit if we didn't have TFmode patterns. */
39472
39473 void
39474 x86_emit_floatuns (rtx operands[2])
39475 {
39476 rtx neglab, donelab, i0, i1, f0, in, out;
39477 enum machine_mode mode, inmode;
39478
39479 inmode = GET_MODE (operands[1]);
39480 gcc_assert (inmode == SImode || inmode == DImode);
39481
39482 out = operands[0];
39483 in = force_reg (inmode, operands[1]);
39484 mode = GET_MODE (out);
39485 neglab = gen_label_rtx ();
39486 donelab = gen_label_rtx ();
39487 f0 = gen_reg_rtx (mode);
39488
39489 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
39490
39491 expand_float (out, in, 0);
39492
39493 emit_jump_insn (gen_jump (donelab));
39494 emit_barrier ();
39495
39496 emit_label (neglab);
39497
39498 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
39499 1, OPTAB_DIRECT);
39500 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
39501 1, OPTAB_DIRECT);
39502 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
39503
39504 expand_float (f0, i0, 0);
39505
39506 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
39507
39508 emit_label (donelab);
39509 }
39510 \f
39511 /* AVX512F does support 64-byte integer vector operations,
39512 thus the longest vector we are faced with is V64QImode. */
39513 #define MAX_VECT_LEN 64
39514
39515 struct expand_vec_perm_d
39516 {
39517 rtx target, op0, op1;
39518 unsigned char perm[MAX_VECT_LEN];
39519 enum machine_mode vmode;
39520 unsigned char nelt;
39521 bool one_operand_p;
39522 bool testing_p;
39523 };
39524
39525 static bool canonicalize_perm (struct expand_vec_perm_d *d);
39526 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
39527 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
39528
39529 /* Get a vector mode of the same size as the original but with elements
39530 twice as wide. This is only guaranteed to apply to integral vectors. */
39531
39532 static inline enum machine_mode
39533 get_mode_wider_vector (enum machine_mode o)
39534 {
39535 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
39536 enum machine_mode n = GET_MODE_WIDER_MODE (o);
39537 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
39538 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
39539 return n;
39540 }
39541
39542 /* A subroutine of ix86_expand_vector_init_duplicate. Tries to
39543 fill target with val via vec_duplicate. */
39544
39545 static bool
39546 ix86_vector_duplicate_value (enum machine_mode mode, rtx target, rtx val)
39547 {
39548 bool ok;
39549 rtx insn, dup;
39550
39551 /* First attempt to recognize VAL as-is. */
39552 dup = gen_rtx_VEC_DUPLICATE (mode, val);
39553 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
39554 if (recog_memoized (insn) < 0)
39555 {
39556 rtx seq;
39557 /* If that fails, force VAL into a register. */
39558
39559 start_sequence ();
39560 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
39561 seq = get_insns ();
39562 end_sequence ();
39563 if (seq)
39564 emit_insn_before (seq, insn);
39565
39566 ok = recog_memoized (insn) >= 0;
39567 gcc_assert (ok);
39568 }
39569 return true;
39570 }
39571
39572 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
39573 with all elements equal to VAR. Return true if successful. */
39574
39575 static bool
39576 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
39577 rtx target, rtx val)
39578 {
39579 bool ok;
39580
39581 switch (mode)
39582 {
39583 case V2SImode:
39584 case V2SFmode:
39585 if (!mmx_ok)
39586 return false;
39587 /* FALLTHRU */
39588
39589 case V4DFmode:
39590 case V4DImode:
39591 case V8SFmode:
39592 case V8SImode:
39593 case V2DFmode:
39594 case V2DImode:
39595 case V4SFmode:
39596 case V4SImode:
39597 case V16SImode:
39598 case V8DImode:
39599 case V16SFmode:
39600 case V8DFmode:
39601 return ix86_vector_duplicate_value (mode, target, val);
39602
39603 case V4HImode:
39604 if (!mmx_ok)
39605 return false;
39606 if (TARGET_SSE || TARGET_3DNOW_A)
39607 {
39608 rtx x;
39609
39610 val = gen_lowpart (SImode, val);
39611 x = gen_rtx_TRUNCATE (HImode, val);
39612 x = gen_rtx_VEC_DUPLICATE (mode, x);
39613 emit_insn (gen_rtx_SET (VOIDmode, target, x));
39614 return true;
39615 }
39616 goto widen;
39617
39618 case V8QImode:
39619 if (!mmx_ok)
39620 return false;
39621 goto widen;
39622
39623 case V8HImode:
39624 if (TARGET_SSE2)
39625 {
39626 struct expand_vec_perm_d dperm;
39627 rtx tmp1, tmp2;
39628
39629 permute:
39630 memset (&dperm, 0, sizeof (dperm));
39631 dperm.target = target;
39632 dperm.vmode = mode;
39633 dperm.nelt = GET_MODE_NUNITS (mode);
39634 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
39635 dperm.one_operand_p = true;
39636
39637 /* Extend to SImode using a paradoxical SUBREG. */
39638 tmp1 = gen_reg_rtx (SImode);
39639 emit_move_insn (tmp1, gen_lowpart (SImode, val));
39640
39641 /* Insert the SImode value as low element of a V4SImode vector. */
39642 tmp2 = gen_reg_rtx (V4SImode);
39643 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
39644 emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2));
39645
39646 ok = (expand_vec_perm_1 (&dperm)
39647 || expand_vec_perm_broadcast_1 (&dperm));
39648 gcc_assert (ok);
39649 return ok;
39650 }
39651 goto widen;
39652
39653 case V16QImode:
39654 if (TARGET_SSE2)
39655 goto permute;
39656 goto widen;
39657
39658 widen:
39659 /* Replicate the value once into the next wider mode and recurse. */
39660 {
39661 enum machine_mode smode, wsmode, wvmode;
39662 rtx x;
39663
39664 smode = GET_MODE_INNER (mode);
39665 wvmode = get_mode_wider_vector (mode);
39666 wsmode = GET_MODE_INNER (wvmode);
39667
39668 val = convert_modes (wsmode, smode, val, true);
39669 x = expand_simple_binop (wsmode, ASHIFT, val,
39670 GEN_INT (GET_MODE_BITSIZE (smode)),
39671 NULL_RTX, 1, OPTAB_LIB_WIDEN);
39672 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
39673
39674 x = gen_reg_rtx (wvmode);
39675 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
39676 gcc_assert (ok);
39677 emit_move_insn (target, gen_lowpart (GET_MODE (target), x));
39678 return ok;
39679 }
39680
39681 case V16HImode:
39682 case V32QImode:
39683 {
39684 enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
39685 rtx x = gen_reg_rtx (hvmode);
39686
39687 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
39688 gcc_assert (ok);
39689
39690 x = gen_rtx_VEC_CONCAT (mode, x, x);
39691 emit_insn (gen_rtx_SET (VOIDmode, target, x));
39692 }
39693 return true;
39694
39695 default:
39696 return false;
39697 }
39698 }
39699
39700 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
39701 whose ONE_VAR element is VAR, and other elements are zero. Return true
39702 if successful. */
39703
39704 static bool
39705 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
39706 rtx target, rtx var, int one_var)
39707 {
39708 enum machine_mode vsimode;
39709 rtx new_target;
39710 rtx x, tmp;
39711 bool use_vector_set = false;
39712
39713 switch (mode)
39714 {
39715 case V2DImode:
39716 /* For SSE4.1, we normally use vector set. But if the second
39717 element is zero and inter-unit moves are OK, we use movq
39718 instead. */
39719 use_vector_set = (TARGET_64BIT && TARGET_SSE4_1
39720 && !(TARGET_INTER_UNIT_MOVES_TO_VEC
39721 && one_var == 0));
39722 break;
39723 case V16QImode:
39724 case V4SImode:
39725 case V4SFmode:
39726 use_vector_set = TARGET_SSE4_1;
39727 break;
39728 case V8HImode:
39729 use_vector_set = TARGET_SSE2;
39730 break;
39731 case V4HImode:
39732 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
39733 break;
39734 case V32QImode:
39735 case V16HImode:
39736 case V8SImode:
39737 case V8SFmode:
39738 case V4DFmode:
39739 use_vector_set = TARGET_AVX;
39740 break;
39741 case V4DImode:
39742 /* Use ix86_expand_vector_set in 64bit mode only. */
39743 use_vector_set = TARGET_AVX && TARGET_64BIT;
39744 break;
39745 default:
39746 break;
39747 }
39748
39749 if (use_vector_set)
39750 {
39751 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
39752 var = force_reg (GET_MODE_INNER (mode), var);
39753 ix86_expand_vector_set (mmx_ok, target, var, one_var);
39754 return true;
39755 }
39756
39757 switch (mode)
39758 {
39759 case V2SFmode:
39760 case V2SImode:
39761 if (!mmx_ok)
39762 return false;
39763 /* FALLTHRU */
39764
39765 case V2DFmode:
39766 case V2DImode:
39767 if (one_var != 0)
39768 return false;
39769 var = force_reg (GET_MODE_INNER (mode), var);
39770 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
39771 emit_insn (gen_rtx_SET (VOIDmode, target, x));
39772 return true;
39773
39774 case V4SFmode:
39775 case V4SImode:
39776 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
39777 new_target = gen_reg_rtx (mode);
39778 else
39779 new_target = target;
39780 var = force_reg (GET_MODE_INNER (mode), var);
39781 x = gen_rtx_VEC_DUPLICATE (mode, var);
39782 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
39783 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
39784 if (one_var != 0)
39785 {
39786 /* We need to shuffle the value to the correct position, so
39787 create a new pseudo to store the intermediate result. */
39788
39789 /* With SSE2, we can use the integer shuffle insns. */
39790 if (mode != V4SFmode && TARGET_SSE2)
39791 {
39792 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
39793 const1_rtx,
39794 GEN_INT (one_var == 1 ? 0 : 1),
39795 GEN_INT (one_var == 2 ? 0 : 1),
39796 GEN_INT (one_var == 3 ? 0 : 1)));
39797 if (target != new_target)
39798 emit_move_insn (target, new_target);
39799 return true;
39800 }
39801
39802 /* Otherwise convert the intermediate result to V4SFmode and
39803 use the SSE1 shuffle instructions. */
39804 if (mode != V4SFmode)
39805 {
39806 tmp = gen_reg_rtx (V4SFmode);
39807 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
39808 }
39809 else
39810 tmp = new_target;
39811
39812 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
39813 const1_rtx,
39814 GEN_INT (one_var == 1 ? 0 : 1),
39815 GEN_INT (one_var == 2 ? 0+4 : 1+4),
39816 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
39817
39818 if (mode != V4SFmode)
39819 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
39820 else if (tmp != target)
39821 emit_move_insn (target, tmp);
39822 }
39823 else if (target != new_target)
39824 emit_move_insn (target, new_target);
39825 return true;
39826
39827 case V8HImode:
39828 case V16QImode:
39829 vsimode = V4SImode;
39830 goto widen;
39831 case V4HImode:
39832 case V8QImode:
39833 if (!mmx_ok)
39834 return false;
39835 vsimode = V2SImode;
39836 goto widen;
39837 widen:
39838 if (one_var != 0)
39839 return false;
39840
39841 /* Zero extend the variable element to SImode and recurse. */
39842 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
39843
39844 x = gen_reg_rtx (vsimode);
39845 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
39846 var, one_var))
39847 gcc_unreachable ();
39848
39849 emit_move_insn (target, gen_lowpart (mode, x));
39850 return true;
39851
39852 default:
39853 return false;
39854 }
39855 }
39856
39857 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
39858 consisting of the values in VALS. It is known that all elements
39859 except ONE_VAR are constants. Return true if successful. */
39860
39861 static bool
39862 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
39863 rtx target, rtx vals, int one_var)
39864 {
39865 rtx var = XVECEXP (vals, 0, one_var);
39866 enum machine_mode wmode;
39867 rtx const_vec, x;
39868
39869 const_vec = copy_rtx (vals);
39870 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
39871 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
39872
39873 switch (mode)
39874 {
39875 case V2DFmode:
39876 case V2DImode:
39877 case V2SFmode:
39878 case V2SImode:
39879 /* For the two element vectors, it's just as easy to use
39880 the general case. */
39881 return false;
39882
39883 case V4DImode:
39884 /* Use ix86_expand_vector_set in 64bit mode only. */
39885 if (!TARGET_64BIT)
39886 return false;
39887 case V4DFmode:
39888 case V8SFmode:
39889 case V8SImode:
39890 case V16HImode:
39891 case V32QImode:
39892 case V4SFmode:
39893 case V4SImode:
39894 case V8HImode:
39895 case V4HImode:
39896 break;
39897
39898 case V16QImode:
39899 if (TARGET_SSE4_1)
39900 break;
39901 wmode = V8HImode;
39902 goto widen;
39903 case V8QImode:
39904 wmode = V4HImode;
39905 goto widen;
39906 widen:
39907 /* There's no way to set one QImode entry easily. Combine
39908 the variable value with its adjacent constant value, and
39909 promote to an HImode set. */
39910 x = XVECEXP (vals, 0, one_var ^ 1);
39911 if (one_var & 1)
39912 {
39913 var = convert_modes (HImode, QImode, var, true);
39914 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
39915 NULL_RTX, 1, OPTAB_LIB_WIDEN);
39916 x = GEN_INT (INTVAL (x) & 0xff);
39917 }
39918 else
39919 {
39920 var = convert_modes (HImode, QImode, var, true);
39921 x = gen_int_mode (INTVAL (x) << 8, HImode);
39922 }
39923 if (x != const0_rtx)
39924 var = expand_simple_binop (HImode, IOR, var, x, var,
39925 1, OPTAB_LIB_WIDEN);
39926
39927 x = gen_reg_rtx (wmode);
39928 emit_move_insn (x, gen_lowpart (wmode, const_vec));
39929 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
39930
39931 emit_move_insn (target, gen_lowpart (mode, x));
39932 return true;
39933
39934 default:
39935 return false;
39936 }
39937
39938 emit_move_insn (target, const_vec);
39939 ix86_expand_vector_set (mmx_ok, target, var, one_var);
39940 return true;
39941 }
39942
39943 /* A subroutine of ix86_expand_vector_init_general. Use vector
39944 concatenate to handle the most general case: all values variable,
39945 and none identical. */
39946
39947 static void
39948 ix86_expand_vector_init_concat (enum machine_mode mode,
39949 rtx target, rtx *ops, int n)
39950 {
39951 enum machine_mode cmode, hmode = VOIDmode, gmode = VOIDmode;
39952 rtx first[16], second[8], third[4];
39953 rtvec v;
39954 int i, j;
39955
39956 switch (n)
39957 {
39958 case 2:
39959 switch (mode)
39960 {
39961 case V16SImode:
39962 cmode = V8SImode;
39963 break;
39964 case V16SFmode:
39965 cmode = V8SFmode;
39966 break;
39967 case V8DImode:
39968 cmode = V4DImode;
39969 break;
39970 case V8DFmode:
39971 cmode = V4DFmode;
39972 break;
39973 case V8SImode:
39974 cmode = V4SImode;
39975 break;
39976 case V8SFmode:
39977 cmode = V4SFmode;
39978 break;
39979 case V4DImode:
39980 cmode = V2DImode;
39981 break;
39982 case V4DFmode:
39983 cmode = V2DFmode;
39984 break;
39985 case V4SImode:
39986 cmode = V2SImode;
39987 break;
39988 case V4SFmode:
39989 cmode = V2SFmode;
39990 break;
39991 case V2DImode:
39992 cmode = DImode;
39993 break;
39994 case V2SImode:
39995 cmode = SImode;
39996 break;
39997 case V2DFmode:
39998 cmode = DFmode;
39999 break;
40000 case V2SFmode:
40001 cmode = SFmode;
40002 break;
40003 default:
40004 gcc_unreachable ();
40005 }
40006
40007 if (!register_operand (ops[1], cmode))
40008 ops[1] = force_reg (cmode, ops[1]);
40009 if (!register_operand (ops[0], cmode))
40010 ops[0] = force_reg (cmode, ops[0]);
40011 emit_insn (gen_rtx_SET (VOIDmode, target,
40012 gen_rtx_VEC_CONCAT (mode, ops[0],
40013 ops[1])));
40014 break;
40015
40016 case 4:
40017 switch (mode)
40018 {
40019 case V4DImode:
40020 cmode = V2DImode;
40021 break;
40022 case V4DFmode:
40023 cmode = V2DFmode;
40024 break;
40025 case V4SImode:
40026 cmode = V2SImode;
40027 break;
40028 case V4SFmode:
40029 cmode = V2SFmode;
40030 break;
40031 default:
40032 gcc_unreachable ();
40033 }
40034 goto half;
40035
40036 case 8:
40037 switch (mode)
40038 {
40039 case V8DImode:
40040 cmode = V2DImode;
40041 hmode = V4DImode;
40042 break;
40043 case V8DFmode:
40044 cmode = V2DFmode;
40045 hmode = V4DFmode;
40046 break;
40047 case V8SImode:
40048 cmode = V2SImode;
40049 hmode = V4SImode;
40050 break;
40051 case V8SFmode:
40052 cmode = V2SFmode;
40053 hmode = V4SFmode;
40054 break;
40055 default:
40056 gcc_unreachable ();
40057 }
40058 goto half;
40059
40060 case 16:
40061 switch (mode)
40062 {
40063 case V16SImode:
40064 cmode = V2SImode;
40065 hmode = V4SImode;
40066 gmode = V8SImode;
40067 break;
40068 case V16SFmode:
40069 cmode = V2SFmode;
40070 hmode = V4SFmode;
40071 gmode = V8SFmode;
40072 break;
40073 default:
40074 gcc_unreachable ();
40075 }
40076 goto half;
40077
40078 half:
40079 /* FIXME: We process inputs backward to help RA. PR 36222. */
40080 i = n - 1;
40081 j = (n >> 1) - 1;
40082 for (; i > 0; i -= 2, j--)
40083 {
40084 first[j] = gen_reg_rtx (cmode);
40085 v = gen_rtvec (2, ops[i - 1], ops[i]);
40086 ix86_expand_vector_init (false, first[j],
40087 gen_rtx_PARALLEL (cmode, v));
40088 }
40089
40090 n >>= 1;
40091 if (n > 4)
40092 {
40093 gcc_assert (hmode != VOIDmode);
40094 gcc_assert (gmode != VOIDmode);
40095 for (i = j = 0; i < n; i += 2, j++)
40096 {
40097 second[j] = gen_reg_rtx (hmode);
40098 ix86_expand_vector_init_concat (hmode, second [j],
40099 &first [i], 2);
40100 }
40101 n >>= 1;
40102 for (i = j = 0; i < n; i += 2, j++)
40103 {
40104 third[j] = gen_reg_rtx (gmode);
40105 ix86_expand_vector_init_concat (gmode, third[j],
40106 &second[i], 2);
40107 }
40108 n >>= 1;
40109 ix86_expand_vector_init_concat (mode, target, third, n);
40110 }
40111 else if (n > 2)
40112 {
40113 gcc_assert (hmode != VOIDmode);
40114 for (i = j = 0; i < n; i += 2, j++)
40115 {
40116 second[j] = gen_reg_rtx (hmode);
40117 ix86_expand_vector_init_concat (hmode, second [j],
40118 &first [i], 2);
40119 }
40120 n >>= 1;
40121 ix86_expand_vector_init_concat (mode, target, second, n);
40122 }
40123 else
40124 ix86_expand_vector_init_concat (mode, target, first, n);
40125 break;
40126
40127 default:
40128 gcc_unreachable ();
40129 }
40130 }
40131
40132 /* A subroutine of ix86_expand_vector_init_general. Use vector
40133 interleave to handle the most general case: all values variable,
40134 and none identical. */
40135
40136 static void
40137 ix86_expand_vector_init_interleave (enum machine_mode mode,
40138 rtx target, rtx *ops, int n)
40139 {
40140 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
40141 int i, j;
40142 rtx op0, op1;
40143 rtx (*gen_load_even) (rtx, rtx, rtx);
40144 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
40145 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
40146
40147 switch (mode)
40148 {
40149 case V8HImode:
40150 gen_load_even = gen_vec_setv8hi;
40151 gen_interleave_first_low = gen_vec_interleave_lowv4si;
40152 gen_interleave_second_low = gen_vec_interleave_lowv2di;
40153 inner_mode = HImode;
40154 first_imode = V4SImode;
40155 second_imode = V2DImode;
40156 third_imode = VOIDmode;
40157 break;
40158 case V16QImode:
40159 gen_load_even = gen_vec_setv16qi;
40160 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
40161 gen_interleave_second_low = gen_vec_interleave_lowv4si;
40162 inner_mode = QImode;
40163 first_imode = V8HImode;
40164 second_imode = V4SImode;
40165 third_imode = V2DImode;
40166 break;
40167 default:
40168 gcc_unreachable ();
40169 }
40170
40171 for (i = 0; i < n; i++)
40172 {
40173 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
40174 op0 = gen_reg_rtx (SImode);
40175 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
40176
40177 /* Insert the SImode value as low element of V4SImode vector. */
40178 op1 = gen_reg_rtx (V4SImode);
40179 op0 = gen_rtx_VEC_MERGE (V4SImode,
40180 gen_rtx_VEC_DUPLICATE (V4SImode,
40181 op0),
40182 CONST0_RTX (V4SImode),
40183 const1_rtx);
40184 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
40185
40186 /* Cast the V4SImode vector back to a vector in orignal mode. */
40187 op0 = gen_reg_rtx (mode);
40188 emit_move_insn (op0, gen_lowpart (mode, op1));
40189
40190 /* Load even elements into the second position. */
40191 emit_insn (gen_load_even (op0,
40192 force_reg (inner_mode,
40193 ops [i + i + 1]),
40194 const1_rtx));
40195
40196 /* Cast vector to FIRST_IMODE vector. */
40197 ops[i] = gen_reg_rtx (first_imode);
40198 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
40199 }
40200
40201 /* Interleave low FIRST_IMODE vectors. */
40202 for (i = j = 0; i < n; i += 2, j++)
40203 {
40204 op0 = gen_reg_rtx (first_imode);
40205 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
40206
40207 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
40208 ops[j] = gen_reg_rtx (second_imode);
40209 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
40210 }
40211
40212 /* Interleave low SECOND_IMODE vectors. */
40213 switch (second_imode)
40214 {
40215 case V4SImode:
40216 for (i = j = 0; i < n / 2; i += 2, j++)
40217 {
40218 op0 = gen_reg_rtx (second_imode);
40219 emit_insn (gen_interleave_second_low (op0, ops[i],
40220 ops[i + 1]));
40221
40222 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
40223 vector. */
40224 ops[j] = gen_reg_rtx (third_imode);
40225 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
40226 }
40227 second_imode = V2DImode;
40228 gen_interleave_second_low = gen_vec_interleave_lowv2di;
40229 /* FALLTHRU */
40230
40231 case V2DImode:
40232 op0 = gen_reg_rtx (second_imode);
40233 emit_insn (gen_interleave_second_low (op0, ops[0],
40234 ops[1]));
40235
40236 /* Cast the SECOND_IMODE vector back to a vector on original
40237 mode. */
40238 emit_insn (gen_rtx_SET (VOIDmode, target,
40239 gen_lowpart (mode, op0)));
40240 break;
40241
40242 default:
40243 gcc_unreachable ();
40244 }
40245 }
40246
40247 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
40248 all values variable, and none identical. */
40249
40250 static void
40251 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
40252 rtx target, rtx vals)
40253 {
40254 rtx ops[64], op0, op1;
40255 enum machine_mode half_mode = VOIDmode;
40256 int n, i;
40257
40258 switch (mode)
40259 {
40260 case V2SFmode:
40261 case V2SImode:
40262 if (!mmx_ok && !TARGET_SSE)
40263 break;
40264 /* FALLTHRU */
40265
40266 case V16SImode:
40267 case V16SFmode:
40268 case V8DFmode:
40269 case V8DImode:
40270 case V8SFmode:
40271 case V8SImode:
40272 case V4DFmode:
40273 case V4DImode:
40274 case V4SFmode:
40275 case V4SImode:
40276 case V2DFmode:
40277 case V2DImode:
40278 n = GET_MODE_NUNITS (mode);
40279 for (i = 0; i < n; i++)
40280 ops[i] = XVECEXP (vals, 0, i);
40281 ix86_expand_vector_init_concat (mode, target, ops, n);
40282 return;
40283
40284 case V32QImode:
40285 half_mode = V16QImode;
40286 goto half;
40287
40288 case V16HImode:
40289 half_mode = V8HImode;
40290 goto half;
40291
40292 half:
40293 n = GET_MODE_NUNITS (mode);
40294 for (i = 0; i < n; i++)
40295 ops[i] = XVECEXP (vals, 0, i);
40296 op0 = gen_reg_rtx (half_mode);
40297 op1 = gen_reg_rtx (half_mode);
40298 ix86_expand_vector_init_interleave (half_mode, op0, ops,
40299 n >> 2);
40300 ix86_expand_vector_init_interleave (half_mode, op1,
40301 &ops [n >> 1], n >> 2);
40302 emit_insn (gen_rtx_SET (VOIDmode, target,
40303 gen_rtx_VEC_CONCAT (mode, op0, op1)));
40304 return;
40305
40306 case V16QImode:
40307 if (!TARGET_SSE4_1)
40308 break;
40309 /* FALLTHRU */
40310
40311 case V8HImode:
40312 if (!TARGET_SSE2)
40313 break;
40314
40315 /* Don't use ix86_expand_vector_init_interleave if we can't
40316 move from GPR to SSE register directly. */
40317 if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
40318 break;
40319
40320 n = GET_MODE_NUNITS (mode);
40321 for (i = 0; i < n; i++)
40322 ops[i] = XVECEXP (vals, 0, i);
40323 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
40324 return;
40325
40326 case V4HImode:
40327 case V8QImode:
40328 break;
40329
40330 default:
40331 gcc_unreachable ();
40332 }
40333
40334 {
40335 int i, j, n_elts, n_words, n_elt_per_word;
40336 enum machine_mode inner_mode;
40337 rtx words[4], shift;
40338
40339 inner_mode = GET_MODE_INNER (mode);
40340 n_elts = GET_MODE_NUNITS (mode);
40341 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
40342 n_elt_per_word = n_elts / n_words;
40343 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
40344
40345 for (i = 0; i < n_words; ++i)
40346 {
40347 rtx word = NULL_RTX;
40348
40349 for (j = 0; j < n_elt_per_word; ++j)
40350 {
40351 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
40352 elt = convert_modes (word_mode, inner_mode, elt, true);
40353
40354 if (j == 0)
40355 word = elt;
40356 else
40357 {
40358 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
40359 word, 1, OPTAB_LIB_WIDEN);
40360 word = expand_simple_binop (word_mode, IOR, word, elt,
40361 word, 1, OPTAB_LIB_WIDEN);
40362 }
40363 }
40364
40365 words[i] = word;
40366 }
40367
40368 if (n_words == 1)
40369 emit_move_insn (target, gen_lowpart (mode, words[0]));
40370 else if (n_words == 2)
40371 {
40372 rtx tmp = gen_reg_rtx (mode);
40373 emit_clobber (tmp);
40374 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
40375 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
40376 emit_move_insn (target, tmp);
40377 }
40378 else if (n_words == 4)
40379 {
40380 rtx tmp = gen_reg_rtx (V4SImode);
40381 gcc_assert (word_mode == SImode);
40382 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
40383 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
40384 emit_move_insn (target, gen_lowpart (mode, tmp));
40385 }
40386 else
40387 gcc_unreachable ();
40388 }
40389 }
40390
40391 /* Initialize vector TARGET via VALS. Suppress the use of MMX
40392 instructions unless MMX_OK is true. */
40393
40394 void
40395 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
40396 {
40397 enum machine_mode mode = GET_MODE (target);
40398 enum machine_mode inner_mode = GET_MODE_INNER (mode);
40399 int n_elts = GET_MODE_NUNITS (mode);
40400 int n_var = 0, one_var = -1;
40401 bool all_same = true, all_const_zero = true;
40402 int i;
40403 rtx x;
40404
40405 for (i = 0; i < n_elts; ++i)
40406 {
40407 x = XVECEXP (vals, 0, i);
40408 if (!(CONST_INT_P (x)
40409 || GET_CODE (x) == CONST_DOUBLE
40410 || GET_CODE (x) == CONST_FIXED))
40411 n_var++, one_var = i;
40412 else if (x != CONST0_RTX (inner_mode))
40413 all_const_zero = false;
40414 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
40415 all_same = false;
40416 }
40417
40418 /* Constants are best loaded from the constant pool. */
40419 if (n_var == 0)
40420 {
40421 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
40422 return;
40423 }
40424
40425 /* If all values are identical, broadcast the value. */
40426 if (all_same
40427 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
40428 XVECEXP (vals, 0, 0)))
40429 return;
40430
40431 /* Values where only one field is non-constant are best loaded from
40432 the pool and overwritten via move later. */
40433 if (n_var == 1)
40434 {
40435 if (all_const_zero
40436 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
40437 XVECEXP (vals, 0, one_var),
40438 one_var))
40439 return;
40440
40441 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
40442 return;
40443 }
40444
40445 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
40446 }
40447
40448 void
40449 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
40450 {
40451 enum machine_mode mode = GET_MODE (target);
40452 enum machine_mode inner_mode = GET_MODE_INNER (mode);
40453 enum machine_mode half_mode;
40454 bool use_vec_merge = false;
40455 rtx tmp;
40456 static rtx (*gen_extract[6][2]) (rtx, rtx)
40457 = {
40458 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
40459 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
40460 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
40461 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
40462 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
40463 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
40464 };
40465 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
40466 = {
40467 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
40468 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
40469 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
40470 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
40471 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
40472 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
40473 };
40474 int i, j, n;
40475
40476 switch (mode)
40477 {
40478 case V2SFmode:
40479 case V2SImode:
40480 if (mmx_ok)
40481 {
40482 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
40483 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
40484 if (elt == 0)
40485 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
40486 else
40487 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
40488 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40489 return;
40490 }
40491 break;
40492
40493 case V2DImode:
40494 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
40495 if (use_vec_merge)
40496 break;
40497
40498 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
40499 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
40500 if (elt == 0)
40501 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
40502 else
40503 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
40504 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40505 return;
40506
40507 case V2DFmode:
40508 {
40509 rtx op0, op1;
40510
40511 /* For the two element vectors, we implement a VEC_CONCAT with
40512 the extraction of the other element. */
40513
40514 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
40515 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
40516
40517 if (elt == 0)
40518 op0 = val, op1 = tmp;
40519 else
40520 op0 = tmp, op1 = val;
40521
40522 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
40523 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40524 }
40525 return;
40526
40527 case V4SFmode:
40528 use_vec_merge = TARGET_SSE4_1;
40529 if (use_vec_merge)
40530 break;
40531
40532 switch (elt)
40533 {
40534 case 0:
40535 use_vec_merge = true;
40536 break;
40537
40538 case 1:
40539 /* tmp = target = A B C D */
40540 tmp = copy_to_reg (target);
40541 /* target = A A B B */
40542 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
40543 /* target = X A B B */
40544 ix86_expand_vector_set (false, target, val, 0);
40545 /* target = A X C D */
40546 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
40547 const1_rtx, const0_rtx,
40548 GEN_INT (2+4), GEN_INT (3+4)));
40549 return;
40550
40551 case 2:
40552 /* tmp = target = A B C D */
40553 tmp = copy_to_reg (target);
40554 /* tmp = X B C D */
40555 ix86_expand_vector_set (false, tmp, val, 0);
40556 /* target = A B X D */
40557 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
40558 const0_rtx, const1_rtx,
40559 GEN_INT (0+4), GEN_INT (3+4)));
40560 return;
40561
40562 case 3:
40563 /* tmp = target = A B C D */
40564 tmp = copy_to_reg (target);
40565 /* tmp = X B C D */
40566 ix86_expand_vector_set (false, tmp, val, 0);
40567 /* target = A B X D */
40568 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
40569 const0_rtx, const1_rtx,
40570 GEN_INT (2+4), GEN_INT (0+4)));
40571 return;
40572
40573 default:
40574 gcc_unreachable ();
40575 }
40576 break;
40577
40578 case V4SImode:
40579 use_vec_merge = TARGET_SSE4_1;
40580 if (use_vec_merge)
40581 break;
40582
40583 /* Element 0 handled by vec_merge below. */
40584 if (elt == 0)
40585 {
40586 use_vec_merge = true;
40587 break;
40588 }
40589
40590 if (TARGET_SSE2)
40591 {
40592 /* With SSE2, use integer shuffles to swap element 0 and ELT,
40593 store into element 0, then shuffle them back. */
40594
40595 rtx order[4];
40596
40597 order[0] = GEN_INT (elt);
40598 order[1] = const1_rtx;
40599 order[2] = const2_rtx;
40600 order[3] = GEN_INT (3);
40601 order[elt] = const0_rtx;
40602
40603 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
40604 order[1], order[2], order[3]));
40605
40606 ix86_expand_vector_set (false, target, val, 0);
40607
40608 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
40609 order[1], order[2], order[3]));
40610 }
40611 else
40612 {
40613 /* For SSE1, we have to reuse the V4SF code. */
40614 rtx t = gen_reg_rtx (V4SFmode);
40615 ix86_expand_vector_set (false, t, gen_lowpart (SFmode, val), elt);
40616 emit_move_insn (target, gen_lowpart (mode, t));
40617 }
40618 return;
40619
40620 case V8HImode:
40621 use_vec_merge = TARGET_SSE2;
40622 break;
40623 case V4HImode:
40624 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
40625 break;
40626
40627 case V16QImode:
40628 use_vec_merge = TARGET_SSE4_1;
40629 break;
40630
40631 case V8QImode:
40632 break;
40633
40634 case V32QImode:
40635 half_mode = V16QImode;
40636 j = 0;
40637 n = 16;
40638 goto half;
40639
40640 case V16HImode:
40641 half_mode = V8HImode;
40642 j = 1;
40643 n = 8;
40644 goto half;
40645
40646 case V8SImode:
40647 half_mode = V4SImode;
40648 j = 2;
40649 n = 4;
40650 goto half;
40651
40652 case V4DImode:
40653 half_mode = V2DImode;
40654 j = 3;
40655 n = 2;
40656 goto half;
40657
40658 case V8SFmode:
40659 half_mode = V4SFmode;
40660 j = 4;
40661 n = 4;
40662 goto half;
40663
40664 case V4DFmode:
40665 half_mode = V2DFmode;
40666 j = 5;
40667 n = 2;
40668 goto half;
40669
40670 half:
40671 /* Compute offset. */
40672 i = elt / n;
40673 elt %= n;
40674
40675 gcc_assert (i <= 1);
40676
40677 /* Extract the half. */
40678 tmp = gen_reg_rtx (half_mode);
40679 emit_insn (gen_extract[j][i] (tmp, target));
40680
40681 /* Put val in tmp at elt. */
40682 ix86_expand_vector_set (false, tmp, val, elt);
40683
40684 /* Put it back. */
40685 emit_insn (gen_insert[j][i] (target, target, tmp));
40686 return;
40687
40688 default:
40689 break;
40690 }
40691
40692 if (use_vec_merge)
40693 {
40694 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
40695 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
40696 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40697 }
40698 else
40699 {
40700 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
40701
40702 emit_move_insn (mem, target);
40703
40704 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
40705 emit_move_insn (tmp, val);
40706
40707 emit_move_insn (target, mem);
40708 }
40709 }
40710
40711 void
40712 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
40713 {
40714 enum machine_mode mode = GET_MODE (vec);
40715 enum machine_mode inner_mode = GET_MODE_INNER (mode);
40716 bool use_vec_extr = false;
40717 rtx tmp;
40718
40719 switch (mode)
40720 {
40721 case V2SImode:
40722 case V2SFmode:
40723 if (!mmx_ok)
40724 break;
40725 /* FALLTHRU */
40726
40727 case V2DFmode:
40728 case V2DImode:
40729 use_vec_extr = true;
40730 break;
40731
40732 case V4SFmode:
40733 use_vec_extr = TARGET_SSE4_1;
40734 if (use_vec_extr)
40735 break;
40736
40737 switch (elt)
40738 {
40739 case 0:
40740 tmp = vec;
40741 break;
40742
40743 case 1:
40744 case 3:
40745 tmp = gen_reg_rtx (mode);
40746 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
40747 GEN_INT (elt), GEN_INT (elt),
40748 GEN_INT (elt+4), GEN_INT (elt+4)));
40749 break;
40750
40751 case 2:
40752 tmp = gen_reg_rtx (mode);
40753 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
40754 break;
40755
40756 default:
40757 gcc_unreachable ();
40758 }
40759 vec = tmp;
40760 use_vec_extr = true;
40761 elt = 0;
40762 break;
40763
40764 case V4SImode:
40765 use_vec_extr = TARGET_SSE4_1;
40766 if (use_vec_extr)
40767 break;
40768
40769 if (TARGET_SSE2)
40770 {
40771 switch (elt)
40772 {
40773 case 0:
40774 tmp = vec;
40775 break;
40776
40777 case 1:
40778 case 3:
40779 tmp = gen_reg_rtx (mode);
40780 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
40781 GEN_INT (elt), GEN_INT (elt),
40782 GEN_INT (elt), GEN_INT (elt)));
40783 break;
40784
40785 case 2:
40786 tmp = gen_reg_rtx (mode);
40787 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
40788 break;
40789
40790 default:
40791 gcc_unreachable ();
40792 }
40793 vec = tmp;
40794 use_vec_extr = true;
40795 elt = 0;
40796 }
40797 else
40798 {
40799 /* For SSE1, we have to reuse the V4SF code. */
40800 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
40801 gen_lowpart (V4SFmode, vec), elt);
40802 return;
40803 }
40804 break;
40805
40806 case V8HImode:
40807 use_vec_extr = TARGET_SSE2;
40808 break;
40809 case V4HImode:
40810 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
40811 break;
40812
40813 case V16QImode:
40814 use_vec_extr = TARGET_SSE4_1;
40815 break;
40816
40817 case V8SFmode:
40818 if (TARGET_AVX)
40819 {
40820 tmp = gen_reg_rtx (V4SFmode);
40821 if (elt < 4)
40822 emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
40823 else
40824 emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
40825 ix86_expand_vector_extract (false, target, tmp, elt & 3);
40826 return;
40827 }
40828 break;
40829
40830 case V4DFmode:
40831 if (TARGET_AVX)
40832 {
40833 tmp = gen_reg_rtx (V2DFmode);
40834 if (elt < 2)
40835 emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
40836 else
40837 emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
40838 ix86_expand_vector_extract (false, target, tmp, elt & 1);
40839 return;
40840 }
40841 break;
40842
40843 case V32QImode:
40844 if (TARGET_AVX)
40845 {
40846 tmp = gen_reg_rtx (V16QImode);
40847 if (elt < 16)
40848 emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
40849 else
40850 emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
40851 ix86_expand_vector_extract (false, target, tmp, elt & 15);
40852 return;
40853 }
40854 break;
40855
40856 case V16HImode:
40857 if (TARGET_AVX)
40858 {
40859 tmp = gen_reg_rtx (V8HImode);
40860 if (elt < 8)
40861 emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
40862 else
40863 emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
40864 ix86_expand_vector_extract (false, target, tmp, elt & 7);
40865 return;
40866 }
40867 break;
40868
40869 case V8SImode:
40870 if (TARGET_AVX)
40871 {
40872 tmp = gen_reg_rtx (V4SImode);
40873 if (elt < 4)
40874 emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
40875 else
40876 emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
40877 ix86_expand_vector_extract (false, target, tmp, elt & 3);
40878 return;
40879 }
40880 break;
40881
40882 case V4DImode:
40883 if (TARGET_AVX)
40884 {
40885 tmp = gen_reg_rtx (V2DImode);
40886 if (elt < 2)
40887 emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
40888 else
40889 emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
40890 ix86_expand_vector_extract (false, target, tmp, elt & 1);
40891 return;
40892 }
40893 break;
40894
40895 case V16SFmode:
40896 tmp = gen_reg_rtx (V8SFmode);
40897 if (elt < 8)
40898 emit_insn (gen_vec_extract_lo_v16sf (tmp, vec));
40899 else
40900 emit_insn (gen_vec_extract_hi_v16sf (tmp, vec));
40901 ix86_expand_vector_extract (false, target, tmp, elt & 7);
40902 return;
40903
40904 case V8DFmode:
40905 tmp = gen_reg_rtx (V4DFmode);
40906 if (elt < 4)
40907 emit_insn (gen_vec_extract_lo_v8df (tmp, vec));
40908 else
40909 emit_insn (gen_vec_extract_hi_v8df (tmp, vec));
40910 ix86_expand_vector_extract (false, target, tmp, elt & 3);
40911 return;
40912
40913 case V16SImode:
40914 tmp = gen_reg_rtx (V8SImode);
40915 if (elt < 8)
40916 emit_insn (gen_vec_extract_lo_v16si (tmp, vec));
40917 else
40918 emit_insn (gen_vec_extract_hi_v16si (tmp, vec));
40919 ix86_expand_vector_extract (false, target, tmp, elt & 7);
40920 return;
40921
40922 case V8DImode:
40923 tmp = gen_reg_rtx (V4DImode);
40924 if (elt < 4)
40925 emit_insn (gen_vec_extract_lo_v8di (tmp, vec));
40926 else
40927 emit_insn (gen_vec_extract_hi_v8di (tmp, vec));
40928 ix86_expand_vector_extract (false, target, tmp, elt & 3);
40929 return;
40930
40931 case V8QImode:
40932 /* ??? Could extract the appropriate HImode element and shift. */
40933 default:
40934 break;
40935 }
40936
40937 if (use_vec_extr)
40938 {
40939 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
40940 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
40941
40942 /* Let the rtl optimizers know about the zero extension performed. */
40943 if (inner_mode == QImode || inner_mode == HImode)
40944 {
40945 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
40946 target = gen_lowpart (SImode, target);
40947 }
40948
40949 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40950 }
40951 else
40952 {
40953 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
40954
40955 emit_move_insn (mem, vec);
40956
40957 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
40958 emit_move_insn (target, tmp);
40959 }
40960 }
40961
40962 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
40963 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
40964 The upper bits of DEST are undefined, though they shouldn't cause
40965 exceptions (some bits from src or all zeros are ok). */
40966
40967 static void
40968 emit_reduc_half (rtx dest, rtx src, int i)
40969 {
40970 rtx tem, d = dest;
40971 switch (GET_MODE (src))
40972 {
40973 case V4SFmode:
40974 if (i == 128)
40975 tem = gen_sse_movhlps (dest, src, src);
40976 else
40977 tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx,
40978 GEN_INT (1 + 4), GEN_INT (1 + 4));
40979 break;
40980 case V2DFmode:
40981 tem = gen_vec_interleave_highv2df (dest, src, src);
40982 break;
40983 case V16QImode:
40984 case V8HImode:
40985 case V4SImode:
40986 case V2DImode:
40987 d = gen_reg_rtx (V1TImode);
40988 tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src),
40989 GEN_INT (i / 2));
40990 break;
40991 case V8SFmode:
40992 if (i == 256)
40993 tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx);
40994 else
40995 tem = gen_avx_shufps256 (dest, src, src,
40996 GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
40997 break;
40998 case V4DFmode:
40999 if (i == 256)
41000 tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx);
41001 else
41002 tem = gen_avx_shufpd256 (dest, src, src, const1_rtx);
41003 break;
41004 case V32QImode:
41005 case V16HImode:
41006 case V8SImode:
41007 case V4DImode:
41008 if (i == 256)
41009 {
41010 if (GET_MODE (dest) != V4DImode)
41011 d = gen_reg_rtx (V4DImode);
41012 tem = gen_avx2_permv2ti (d, gen_lowpart (V4DImode, src),
41013 gen_lowpart (V4DImode, src),
41014 const1_rtx);
41015 }
41016 else
41017 {
41018 d = gen_reg_rtx (V2TImode);
41019 tem = gen_avx2_lshrv2ti3 (d, gen_lowpart (V2TImode, src),
41020 GEN_INT (i / 2));
41021 }
41022 break;
41023 case V16SImode:
41024 case V16SFmode:
41025 case V8DImode:
41026 case V8DFmode:
41027 if (i > 128)
41028 tem = gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode, dest),
41029 gen_lowpart (V16SImode, src),
41030 gen_lowpart (V16SImode, src),
41031 GEN_INT (0x4 + (i == 512 ? 4 : 0)),
41032 GEN_INT (0x5 + (i == 512 ? 4 : 0)),
41033 GEN_INT (0x6 + (i == 512 ? 4 : 0)),
41034 GEN_INT (0x7 + (i == 512 ? 4 : 0)),
41035 GEN_INT (0xC), GEN_INT (0xD),
41036 GEN_INT (0xE), GEN_INT (0xF),
41037 GEN_INT (0x10), GEN_INT (0x11),
41038 GEN_INT (0x12), GEN_INT (0x13),
41039 GEN_INT (0x14), GEN_INT (0x15),
41040 GEN_INT (0x16), GEN_INT (0x17));
41041 else
41042 tem = gen_avx512f_pshufd_1 (gen_lowpart (V16SImode, dest),
41043 gen_lowpart (V16SImode, src),
41044 GEN_INT (i == 128 ? 0x2 : 0x1),
41045 GEN_INT (0x3),
41046 GEN_INT (0x3),
41047 GEN_INT (0x3),
41048 GEN_INT (i == 128 ? 0x6 : 0x5),
41049 GEN_INT (0x7),
41050 GEN_INT (0x7),
41051 GEN_INT (0x7),
41052 GEN_INT (i == 128 ? 0xA : 0x9),
41053 GEN_INT (0xB),
41054 GEN_INT (0xB),
41055 GEN_INT (0xB),
41056 GEN_INT (i == 128 ? 0xE : 0xD),
41057 GEN_INT (0xF),
41058 GEN_INT (0xF),
41059 GEN_INT (0xF));
41060 break;
41061 default:
41062 gcc_unreachable ();
41063 }
41064 emit_insn (tem);
41065 if (d != dest)
41066 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
41067 }
41068
41069 /* Expand a vector reduction. FN is the binary pattern to reduce;
41070 DEST is the destination; IN is the input vector. */
41071
41072 void
41073 ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
41074 {
41075 rtx half, dst, vec = in;
41076 enum machine_mode mode = GET_MODE (in);
41077 int i;
41078
41079 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
41080 if (TARGET_SSE4_1
41081 && mode == V8HImode
41082 && fn == gen_uminv8hi3)
41083 {
41084 emit_insn (gen_sse4_1_phminposuw (dest, in));
41085 return;
41086 }
41087
41088 for (i = GET_MODE_BITSIZE (mode);
41089 i > GET_MODE_BITSIZE (GET_MODE_INNER (mode));
41090 i >>= 1)
41091 {
41092 half = gen_reg_rtx (mode);
41093 emit_reduc_half (half, vec, i);
41094 if (i == GET_MODE_BITSIZE (GET_MODE_INNER (mode)) * 2)
41095 dst = dest;
41096 else
41097 dst = gen_reg_rtx (mode);
41098 emit_insn (fn (dst, half, vec));
41099 vec = dst;
41100 }
41101 }
41102 \f
41103 /* Target hook for scalar_mode_supported_p. */
41104 static bool
41105 ix86_scalar_mode_supported_p (enum machine_mode mode)
41106 {
41107 if (DECIMAL_FLOAT_MODE_P (mode))
41108 return default_decimal_float_supported_p ();
41109 else if (mode == TFmode)
41110 return true;
41111 else
41112 return default_scalar_mode_supported_p (mode);
41113 }
41114
41115 /* Implements target hook vector_mode_supported_p. */
41116 static bool
41117 ix86_vector_mode_supported_p (enum machine_mode mode)
41118 {
41119 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
41120 return true;
41121 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
41122 return true;
41123 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
41124 return true;
41125 if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
41126 return true;
41127 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
41128 return true;
41129 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
41130 return true;
41131 return false;
41132 }
41133
41134 /* Target hook for c_mode_for_suffix. */
41135 static enum machine_mode
41136 ix86_c_mode_for_suffix (char suffix)
41137 {
41138 if (suffix == 'q')
41139 return TFmode;
41140 if (suffix == 'w')
41141 return XFmode;
41142
41143 return VOIDmode;
41144 }
41145
41146 /* Worker function for TARGET_MD_ASM_CLOBBERS.
41147
41148 We do this in the new i386 backend to maintain source compatibility
41149 with the old cc0-based compiler. */
41150
41151 static tree
41152 ix86_md_asm_clobbers (tree, tree, tree clobbers)
41153 {
41154 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
41155 clobbers);
41156 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
41157 clobbers);
41158 return clobbers;
41159 }
41160
41161 /* Implements target vector targetm.asm.encode_section_info. */
41162
41163 static void ATTRIBUTE_UNUSED
41164 ix86_encode_section_info (tree decl, rtx rtl, int first)
41165 {
41166 default_encode_section_info (decl, rtl, first);
41167
41168 if (TREE_CODE (decl) == VAR_DECL
41169 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
41170 && ix86_in_large_data_p (decl))
41171 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
41172 }
41173
41174 /* Worker function for REVERSE_CONDITION. */
41175
41176 enum rtx_code
41177 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
41178 {
41179 return (mode != CCFPmode && mode != CCFPUmode
41180 ? reverse_condition (code)
41181 : reverse_condition_maybe_unordered (code));
41182 }
41183
41184 /* Output code to perform an x87 FP register move, from OPERANDS[1]
41185 to OPERANDS[0]. */
41186
41187 const char *
41188 output_387_reg_move (rtx insn, rtx *operands)
41189 {
41190 if (REG_P (operands[0]))
41191 {
41192 if (REG_P (operands[1])
41193 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
41194 {
41195 if (REGNO (operands[0]) == FIRST_STACK_REG)
41196 return output_387_ffreep (operands, 0);
41197 return "fstp\t%y0";
41198 }
41199 if (STACK_TOP_P (operands[0]))
41200 return "fld%Z1\t%y1";
41201 return "fst\t%y0";
41202 }
41203 else if (MEM_P (operands[0]))
41204 {
41205 gcc_assert (REG_P (operands[1]));
41206 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
41207 return "fstp%Z0\t%y0";
41208 else
41209 {
41210 /* There is no non-popping store to memory for XFmode.
41211 So if we need one, follow the store with a load. */
41212 if (GET_MODE (operands[0]) == XFmode)
41213 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
41214 else
41215 return "fst%Z0\t%y0";
41216 }
41217 }
41218 else
41219 gcc_unreachable();
41220 }
41221
41222 /* Output code to perform a conditional jump to LABEL, if C2 flag in
41223 FP status register is set. */
41224
41225 void
41226 ix86_emit_fp_unordered_jump (rtx label)
41227 {
41228 rtx reg = gen_reg_rtx (HImode);
41229 rtx temp;
41230
41231 emit_insn (gen_x86_fnstsw_1 (reg));
41232
41233 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
41234 {
41235 emit_insn (gen_x86_sahf_1 (reg));
41236
41237 temp = gen_rtx_REG (CCmode, FLAGS_REG);
41238 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
41239 }
41240 else
41241 {
41242 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
41243
41244 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
41245 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
41246 }
41247
41248 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
41249 gen_rtx_LABEL_REF (VOIDmode, label),
41250 pc_rtx);
41251 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
41252
41253 emit_jump_insn (temp);
41254 predict_jump (REG_BR_PROB_BASE * 10 / 100);
41255 }
41256
41257 /* Output code to perform a log1p XFmode calculation. */
41258
41259 void ix86_emit_i387_log1p (rtx op0, rtx op1)
41260 {
41261 rtx label1 = gen_label_rtx ();
41262 rtx label2 = gen_label_rtx ();
41263
41264 rtx tmp = gen_reg_rtx (XFmode);
41265 rtx tmp2 = gen_reg_rtx (XFmode);
41266 rtx test;
41267
41268 emit_insn (gen_absxf2 (tmp, op1));
41269 test = gen_rtx_GE (VOIDmode, tmp,
41270 CONST_DOUBLE_FROM_REAL_VALUE (
41271 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
41272 XFmode));
41273 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
41274
41275 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
41276 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
41277 emit_jump (label2);
41278
41279 emit_label (label1);
41280 emit_move_insn (tmp, CONST1_RTX (XFmode));
41281 emit_insn (gen_addxf3 (tmp, op1, tmp));
41282 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
41283 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
41284
41285 emit_label (label2);
41286 }
41287
41288 /* Emit code for round calculation. */
41289 void ix86_emit_i387_round (rtx op0, rtx op1)
41290 {
41291 enum machine_mode inmode = GET_MODE (op1);
41292 enum machine_mode outmode = GET_MODE (op0);
41293 rtx e1, e2, res, tmp, tmp1, half;
41294 rtx scratch = gen_reg_rtx (HImode);
41295 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
41296 rtx jump_label = gen_label_rtx ();
41297 rtx insn;
41298 rtx (*gen_abs) (rtx, rtx);
41299 rtx (*gen_neg) (rtx, rtx);
41300
41301 switch (inmode)
41302 {
41303 case SFmode:
41304 gen_abs = gen_abssf2;
41305 break;
41306 case DFmode:
41307 gen_abs = gen_absdf2;
41308 break;
41309 case XFmode:
41310 gen_abs = gen_absxf2;
41311 break;
41312 default:
41313 gcc_unreachable ();
41314 }
41315
41316 switch (outmode)
41317 {
41318 case SFmode:
41319 gen_neg = gen_negsf2;
41320 break;
41321 case DFmode:
41322 gen_neg = gen_negdf2;
41323 break;
41324 case XFmode:
41325 gen_neg = gen_negxf2;
41326 break;
41327 case HImode:
41328 gen_neg = gen_neghi2;
41329 break;
41330 case SImode:
41331 gen_neg = gen_negsi2;
41332 break;
41333 case DImode:
41334 gen_neg = gen_negdi2;
41335 break;
41336 default:
41337 gcc_unreachable ();
41338 }
41339
41340 e1 = gen_reg_rtx (inmode);
41341 e2 = gen_reg_rtx (inmode);
41342 res = gen_reg_rtx (outmode);
41343
41344 half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, inmode);
41345
41346 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
41347
41348 /* scratch = fxam(op1) */
41349 emit_insn (gen_rtx_SET (VOIDmode, scratch,
41350 gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
41351 UNSPEC_FXAM)));
41352 /* e1 = fabs(op1) */
41353 emit_insn (gen_abs (e1, op1));
41354
41355 /* e2 = e1 + 0.5 */
41356 half = force_reg (inmode, half);
41357 emit_insn (gen_rtx_SET (VOIDmode, e2,
41358 gen_rtx_PLUS (inmode, e1, half)));
41359
41360 /* res = floor(e2) */
41361 if (inmode != XFmode)
41362 {
41363 tmp1 = gen_reg_rtx (XFmode);
41364
41365 emit_insn (gen_rtx_SET (VOIDmode, tmp1,
41366 gen_rtx_FLOAT_EXTEND (XFmode, e2)));
41367 }
41368 else
41369 tmp1 = e2;
41370
41371 switch (outmode)
41372 {
41373 case SFmode:
41374 case DFmode:
41375 {
41376 rtx tmp0 = gen_reg_rtx (XFmode);
41377
41378 emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
41379
41380 emit_insn (gen_rtx_SET (VOIDmode, res,
41381 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
41382 UNSPEC_TRUNC_NOOP)));
41383 }
41384 break;
41385 case XFmode:
41386 emit_insn (gen_frndintxf2_floor (res, tmp1));
41387 break;
41388 case HImode:
41389 emit_insn (gen_lfloorxfhi2 (res, tmp1));
41390 break;
41391 case SImode:
41392 emit_insn (gen_lfloorxfsi2 (res, tmp1));
41393 break;
41394 case DImode:
41395 emit_insn (gen_lfloorxfdi2 (res, tmp1));
41396 break;
41397 default:
41398 gcc_unreachable ();
41399 }
41400
41401 /* flags = signbit(a) */
41402 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02)));
41403
41404 /* if (flags) then res = -res */
41405 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
41406 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
41407 gen_rtx_LABEL_REF (VOIDmode, jump_label),
41408 pc_rtx);
41409 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
41410 predict_jump (REG_BR_PROB_BASE * 50 / 100);
41411 JUMP_LABEL (insn) = jump_label;
41412
41413 emit_insn (gen_neg (res, res));
41414
41415 emit_label (jump_label);
41416 LABEL_NUSES (jump_label) = 1;
41417
41418 emit_move_insn (op0, res);
41419 }
41420
41421 /* Output code to perform a Newton-Rhapson approximation of a single precision
41422 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
41423
41424 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
41425 {
41426 rtx x0, x1, e0, e1;
41427
41428 x0 = gen_reg_rtx (mode);
41429 e0 = gen_reg_rtx (mode);
41430 e1 = gen_reg_rtx (mode);
41431 x1 = gen_reg_rtx (mode);
41432
41433 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
41434
41435 b = force_reg (mode, b);
41436
41437 /* x0 = rcp(b) estimate */
41438 if (mode == V16SFmode || mode == V8DFmode)
41439 emit_insn (gen_rtx_SET (VOIDmode, x0,
41440 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
41441 UNSPEC_RCP14)));
41442 else
41443 emit_insn (gen_rtx_SET (VOIDmode, x0,
41444 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
41445 UNSPEC_RCP)));
41446
41447 /* e0 = x0 * b */
41448 emit_insn (gen_rtx_SET (VOIDmode, e0,
41449 gen_rtx_MULT (mode, x0, b)));
41450
41451 /* e0 = x0 * e0 */
41452 emit_insn (gen_rtx_SET (VOIDmode, e0,
41453 gen_rtx_MULT (mode, x0, e0)));
41454
41455 /* e1 = x0 + x0 */
41456 emit_insn (gen_rtx_SET (VOIDmode, e1,
41457 gen_rtx_PLUS (mode, x0, x0)));
41458
41459 /* x1 = e1 - e0 */
41460 emit_insn (gen_rtx_SET (VOIDmode, x1,
41461 gen_rtx_MINUS (mode, e1, e0)));
41462
41463 /* res = a * x1 */
41464 emit_insn (gen_rtx_SET (VOIDmode, res,
41465 gen_rtx_MULT (mode, a, x1)));
41466 }
41467
41468 /* Output code to perform a Newton-Rhapson approximation of a
41469 single precision floating point [reciprocal] square root. */
41470
41471 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
41472 bool recip)
41473 {
41474 rtx x0, e0, e1, e2, e3, mthree, mhalf;
41475 REAL_VALUE_TYPE r;
41476 int unspec;
41477
41478 x0 = gen_reg_rtx (mode);
41479 e0 = gen_reg_rtx (mode);
41480 e1 = gen_reg_rtx (mode);
41481 e2 = gen_reg_rtx (mode);
41482 e3 = gen_reg_rtx (mode);
41483
41484 real_from_integer (&r, VOIDmode, -3, SIGNED);
41485 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
41486
41487 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
41488 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
41489 unspec = UNSPEC_RSQRT;
41490
41491 if (VECTOR_MODE_P (mode))
41492 {
41493 mthree = ix86_build_const_vector (mode, true, mthree);
41494 mhalf = ix86_build_const_vector (mode, true, mhalf);
41495 /* There is no 512-bit rsqrt. There is however rsqrt14. */
41496 if (GET_MODE_SIZE (mode) == 64)
41497 unspec = UNSPEC_RSQRT14;
41498 }
41499
41500 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
41501 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
41502
41503 a = force_reg (mode, a);
41504
41505 /* x0 = rsqrt(a) estimate */
41506 emit_insn (gen_rtx_SET (VOIDmode, x0,
41507 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
41508 unspec)));
41509
41510 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
41511 if (!recip)
41512 {
41513 rtx zero, mask;
41514
41515 zero = gen_reg_rtx (mode);
41516 mask = gen_reg_rtx (mode);
41517
41518 zero = force_reg (mode, CONST0_RTX(mode));
41519
41520 /* Handle masked compare. */
41521 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
41522 {
41523 mask = gen_reg_rtx (HImode);
41524 /* Imm value 0x4 corresponds to not-equal comparison. */
41525 emit_insn (gen_avx512f_cmpv16sf3 (mask, zero, a, GEN_INT (0x4)));
41526 emit_insn (gen_avx512f_blendmv16sf (x0, zero, x0, mask));
41527 }
41528 else
41529 {
41530 emit_insn (gen_rtx_SET (VOIDmode, mask,
41531 gen_rtx_NE (mode, zero, a)));
41532
41533 emit_insn (gen_rtx_SET (VOIDmode, x0,
41534 gen_rtx_AND (mode, x0, mask)));
41535 }
41536 }
41537
41538 /* e0 = x0 * a */
41539 emit_insn (gen_rtx_SET (VOIDmode, e0,
41540 gen_rtx_MULT (mode, x0, a)));
41541 /* e1 = e0 * x0 */
41542 emit_insn (gen_rtx_SET (VOIDmode, e1,
41543 gen_rtx_MULT (mode, e0, x0)));
41544
41545 /* e2 = e1 - 3. */
41546 mthree = force_reg (mode, mthree);
41547 emit_insn (gen_rtx_SET (VOIDmode, e2,
41548 gen_rtx_PLUS (mode, e1, mthree)));
41549
41550 mhalf = force_reg (mode, mhalf);
41551 if (recip)
41552 /* e3 = -.5 * x0 */
41553 emit_insn (gen_rtx_SET (VOIDmode, e3,
41554 gen_rtx_MULT (mode, x0, mhalf)));
41555 else
41556 /* e3 = -.5 * e0 */
41557 emit_insn (gen_rtx_SET (VOIDmode, e3,
41558 gen_rtx_MULT (mode, e0, mhalf)));
41559 /* ret = e2 * e3 */
41560 emit_insn (gen_rtx_SET (VOIDmode, res,
41561 gen_rtx_MULT (mode, e2, e3)));
41562 }
41563
41564 #ifdef TARGET_SOLARIS
41565 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
41566
41567 static void
41568 i386_solaris_elf_named_section (const char *name, unsigned int flags,
41569 tree decl)
41570 {
41571 /* With Binutils 2.15, the "@unwind" marker must be specified on
41572 every occurrence of the ".eh_frame" section, not just the first
41573 one. */
41574 if (TARGET_64BIT
41575 && strcmp (name, ".eh_frame") == 0)
41576 {
41577 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
41578 flags & SECTION_WRITE ? "aw" : "a");
41579 return;
41580 }
41581
41582 #ifndef USE_GAS
41583 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
41584 {
41585 solaris_elf_asm_comdat_section (name, flags, decl);
41586 return;
41587 }
41588 #endif
41589
41590 default_elf_asm_named_section (name, flags, decl);
41591 }
41592 #endif /* TARGET_SOLARIS */
41593
41594 /* Return the mangling of TYPE if it is an extended fundamental type. */
41595
41596 static const char *
41597 ix86_mangle_type (const_tree type)
41598 {
41599 type = TYPE_MAIN_VARIANT (type);
41600
41601 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
41602 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
41603 return NULL;
41604
41605 switch (TYPE_MODE (type))
41606 {
41607 case TFmode:
41608 /* __float128 is "g". */
41609 return "g";
41610 case XFmode:
41611 /* "long double" or __float80 is "e". */
41612 return "e";
41613 default:
41614 return NULL;
41615 }
41616 }
41617
41618 /* For 32-bit code we can save PIC register setup by using
41619 __stack_chk_fail_local hidden function instead of calling
41620 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
41621 register, so it is better to call __stack_chk_fail directly. */
41622
41623 static tree ATTRIBUTE_UNUSED
41624 ix86_stack_protect_fail (void)
41625 {
41626 return TARGET_64BIT
41627 ? default_external_stack_protect_fail ()
41628 : default_hidden_stack_protect_fail ();
41629 }
41630
41631 /* Select a format to encode pointers in exception handling data. CODE
41632 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
41633 true if the symbol may be affected by dynamic relocations.
41634
41635 ??? All x86 object file formats are capable of representing this.
41636 After all, the relocation needed is the same as for the call insn.
41637 Whether or not a particular assembler allows us to enter such, I
41638 guess we'll have to see. */
41639 int
41640 asm_preferred_eh_data_format (int code, int global)
41641 {
41642 if (flag_pic)
41643 {
41644 int type = DW_EH_PE_sdata8;
41645 if (!TARGET_64BIT
41646 || ix86_cmodel == CM_SMALL_PIC
41647 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
41648 type = DW_EH_PE_sdata4;
41649 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
41650 }
41651 if (ix86_cmodel == CM_SMALL
41652 || (ix86_cmodel == CM_MEDIUM && code))
41653 return DW_EH_PE_udata4;
41654 return DW_EH_PE_absptr;
41655 }
41656 \f
41657 /* Expand copysign from SIGN to the positive value ABS_VALUE
41658 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
41659 the sign-bit. */
41660 static void
41661 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
41662 {
41663 enum machine_mode mode = GET_MODE (sign);
41664 rtx sgn = gen_reg_rtx (mode);
41665 if (mask == NULL_RTX)
41666 {
41667 enum machine_mode vmode;
41668
41669 if (mode == SFmode)
41670 vmode = V4SFmode;
41671 else if (mode == DFmode)
41672 vmode = V2DFmode;
41673 else
41674 vmode = mode;
41675
41676 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
41677 if (!VECTOR_MODE_P (mode))
41678 {
41679 /* We need to generate a scalar mode mask in this case. */
41680 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
41681 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
41682 mask = gen_reg_rtx (mode);
41683 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
41684 }
41685 }
41686 else
41687 mask = gen_rtx_NOT (mode, mask);
41688 emit_insn (gen_rtx_SET (VOIDmode, sgn,
41689 gen_rtx_AND (mode, mask, sign)));
41690 emit_insn (gen_rtx_SET (VOIDmode, result,
41691 gen_rtx_IOR (mode, abs_value, sgn)));
41692 }
41693
41694 /* Expand fabs (OP0) and return a new rtx that holds the result. The
41695 mask for masking out the sign-bit is stored in *SMASK, if that is
41696 non-null. */
41697 static rtx
41698 ix86_expand_sse_fabs (rtx op0, rtx *smask)
41699 {
41700 enum machine_mode vmode, mode = GET_MODE (op0);
41701 rtx xa, mask;
41702
41703 xa = gen_reg_rtx (mode);
41704 if (mode == SFmode)
41705 vmode = V4SFmode;
41706 else if (mode == DFmode)
41707 vmode = V2DFmode;
41708 else
41709 vmode = mode;
41710 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
41711 if (!VECTOR_MODE_P (mode))
41712 {
41713 /* We need to generate a scalar mode mask in this case. */
41714 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
41715 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
41716 mask = gen_reg_rtx (mode);
41717 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
41718 }
41719 emit_insn (gen_rtx_SET (VOIDmode, xa,
41720 gen_rtx_AND (mode, op0, mask)));
41721
41722 if (smask)
41723 *smask = mask;
41724
41725 return xa;
41726 }
41727
41728 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
41729 swapping the operands if SWAP_OPERANDS is true. The expanded
41730 code is a forward jump to a newly created label in case the
41731 comparison is true. The generated label rtx is returned. */
41732 static rtx
41733 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
41734 bool swap_operands)
41735 {
41736 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
41737 rtx label, tmp;
41738
41739 if (swap_operands)
41740 {
41741 tmp = op0;
41742 op0 = op1;
41743 op1 = tmp;
41744 }
41745
41746 label = gen_label_rtx ();
41747 tmp = gen_rtx_REG (fpcmp_mode, FLAGS_REG);
41748 emit_insn (gen_rtx_SET (VOIDmode, tmp,
41749 gen_rtx_COMPARE (fpcmp_mode, op0, op1)));
41750 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
41751 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
41752 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
41753 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
41754 JUMP_LABEL (tmp) = label;
41755
41756 return label;
41757 }
41758
41759 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
41760 using comparison code CODE. Operands are swapped for the comparison if
41761 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
41762 static rtx
41763 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
41764 bool swap_operands)
41765 {
41766 rtx (*insn)(rtx, rtx, rtx, rtx);
41767 enum machine_mode mode = GET_MODE (op0);
41768 rtx mask = gen_reg_rtx (mode);
41769
41770 if (swap_operands)
41771 {
41772 rtx tmp = op0;
41773 op0 = op1;
41774 op1 = tmp;
41775 }
41776
41777 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
41778
41779 emit_insn (insn (mask, op0, op1,
41780 gen_rtx_fmt_ee (code, mode, op0, op1)));
41781 return mask;
41782 }
41783
41784 /* Generate and return a rtx of mode MODE for 2**n where n is the number
41785 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
41786 static rtx
41787 ix86_gen_TWO52 (enum machine_mode mode)
41788 {
41789 REAL_VALUE_TYPE TWO52r;
41790 rtx TWO52;
41791
41792 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
41793 TWO52 = const_double_from_real_value (TWO52r, mode);
41794 TWO52 = force_reg (mode, TWO52);
41795
41796 return TWO52;
41797 }
41798
41799 /* Expand SSE sequence for computing lround from OP1 storing
41800 into OP0. */
41801 void
41802 ix86_expand_lround (rtx op0, rtx op1)
41803 {
41804 /* C code for the stuff we're doing below:
41805 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
41806 return (long)tmp;
41807 */
41808 enum machine_mode mode = GET_MODE (op1);
41809 const struct real_format *fmt;
41810 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
41811 rtx adj;
41812
41813 /* load nextafter (0.5, 0.0) */
41814 fmt = REAL_MODE_FORMAT (mode);
41815 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
41816 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
41817
41818 /* adj = copysign (0.5, op1) */
41819 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
41820 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
41821
41822 /* adj = op1 + adj */
41823 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
41824
41825 /* op0 = (imode)adj */
41826 expand_fix (op0, adj, 0);
41827 }
41828
41829 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
41830 into OPERAND0. */
41831 void
41832 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
41833 {
41834 /* C code for the stuff we're doing below (for do_floor):
41835 xi = (long)op1;
41836 xi -= (double)xi > op1 ? 1 : 0;
41837 return xi;
41838 */
41839 enum machine_mode fmode = GET_MODE (op1);
41840 enum machine_mode imode = GET_MODE (op0);
41841 rtx ireg, freg, label, tmp;
41842
41843 /* reg = (long)op1 */
41844 ireg = gen_reg_rtx (imode);
41845 expand_fix (ireg, op1, 0);
41846
41847 /* freg = (double)reg */
41848 freg = gen_reg_rtx (fmode);
41849 expand_float (freg, ireg, 0);
41850
41851 /* ireg = (freg > op1) ? ireg - 1 : ireg */
41852 label = ix86_expand_sse_compare_and_jump (UNLE,
41853 freg, op1, !do_floor);
41854 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
41855 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
41856 emit_move_insn (ireg, tmp);
41857
41858 emit_label (label);
41859 LABEL_NUSES (label) = 1;
41860
41861 emit_move_insn (op0, ireg);
41862 }
41863
41864 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
41865 result in OPERAND0. */
41866 void
41867 ix86_expand_rint (rtx operand0, rtx operand1)
41868 {
41869 /* C code for the stuff we're doing below:
41870 xa = fabs (operand1);
41871 if (!isless (xa, 2**52))
41872 return operand1;
41873 xa = xa + 2**52 - 2**52;
41874 return copysign (xa, operand1);
41875 */
41876 enum machine_mode mode = GET_MODE (operand0);
41877 rtx res, xa, label, TWO52, mask;
41878
41879 res = gen_reg_rtx (mode);
41880 emit_move_insn (res, operand1);
41881
41882 /* xa = abs (operand1) */
41883 xa = ix86_expand_sse_fabs (res, &mask);
41884
41885 /* if (!isless (xa, TWO52)) goto label; */
41886 TWO52 = ix86_gen_TWO52 (mode);
41887 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
41888
41889 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
41890 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
41891
41892 ix86_sse_copysign_to_positive (res, xa, res, mask);
41893
41894 emit_label (label);
41895 LABEL_NUSES (label) = 1;
41896
41897 emit_move_insn (operand0, res);
41898 }
41899
41900 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
41901 into OPERAND0. */
41902 void
41903 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
41904 {
41905 /* C code for the stuff we expand below.
41906 double xa = fabs (x), x2;
41907 if (!isless (xa, TWO52))
41908 return x;
41909 xa = xa + TWO52 - TWO52;
41910 x2 = copysign (xa, x);
41911 Compensate. Floor:
41912 if (x2 > x)
41913 x2 -= 1;
41914 Compensate. Ceil:
41915 if (x2 < x)
41916 x2 -= -1;
41917 return x2;
41918 */
41919 enum machine_mode mode = GET_MODE (operand0);
41920 rtx xa, TWO52, tmp, label, one, res, mask;
41921
41922 TWO52 = ix86_gen_TWO52 (mode);
41923
41924 /* Temporary for holding the result, initialized to the input
41925 operand to ease control flow. */
41926 res = gen_reg_rtx (mode);
41927 emit_move_insn (res, operand1);
41928
41929 /* xa = abs (operand1) */
41930 xa = ix86_expand_sse_fabs (res, &mask);
41931
41932 /* if (!isless (xa, TWO52)) goto label; */
41933 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
41934
41935 /* xa = xa + TWO52 - TWO52; */
41936 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
41937 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
41938
41939 /* xa = copysign (xa, operand1) */
41940 ix86_sse_copysign_to_positive (xa, xa, res, mask);
41941
41942 /* generate 1.0 or -1.0 */
41943 one = force_reg (mode,
41944 const_double_from_real_value (do_floor
41945 ? dconst1 : dconstm1, mode));
41946
41947 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
41948 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
41949 emit_insn (gen_rtx_SET (VOIDmode, tmp,
41950 gen_rtx_AND (mode, one, tmp)));
41951 /* We always need to subtract here to preserve signed zero. */
41952 tmp = expand_simple_binop (mode, MINUS,
41953 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
41954 emit_move_insn (res, tmp);
41955
41956 emit_label (label);
41957 LABEL_NUSES (label) = 1;
41958
41959 emit_move_insn (operand0, res);
41960 }
41961
41962 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
41963 into OPERAND0. */
41964 void
41965 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
41966 {
41967 /* C code for the stuff we expand below.
41968 double xa = fabs (x), x2;
41969 if (!isless (xa, TWO52))
41970 return x;
41971 x2 = (double)(long)x;
41972 Compensate. Floor:
41973 if (x2 > x)
41974 x2 -= 1;
41975 Compensate. Ceil:
41976 if (x2 < x)
41977 x2 += 1;
41978 if (HONOR_SIGNED_ZEROS (mode))
41979 return copysign (x2, x);
41980 return x2;
41981 */
41982 enum machine_mode mode = GET_MODE (operand0);
41983 rtx xa, xi, TWO52, tmp, label, one, res, mask;
41984
41985 TWO52 = ix86_gen_TWO52 (mode);
41986
41987 /* Temporary for holding the result, initialized to the input
41988 operand to ease control flow. */
41989 res = gen_reg_rtx (mode);
41990 emit_move_insn (res, operand1);
41991
41992 /* xa = abs (operand1) */
41993 xa = ix86_expand_sse_fabs (res, &mask);
41994
41995 /* if (!isless (xa, TWO52)) goto label; */
41996 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
41997
41998 /* xa = (double)(long)x */
41999 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
42000 expand_fix (xi, res, 0);
42001 expand_float (xa, xi, 0);
42002
42003 /* generate 1.0 */
42004 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
42005
42006 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
42007 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
42008 emit_insn (gen_rtx_SET (VOIDmode, tmp,
42009 gen_rtx_AND (mode, one, tmp)));
42010 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
42011 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
42012 emit_move_insn (res, tmp);
42013
42014 if (HONOR_SIGNED_ZEROS (mode))
42015 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
42016
42017 emit_label (label);
42018 LABEL_NUSES (label) = 1;
42019
42020 emit_move_insn (operand0, res);
42021 }
42022
42023 /* Expand SSE sequence for computing round from OPERAND1 storing
42024 into OPERAND0. Sequence that works without relying on DImode truncation
42025 via cvttsd2siq that is only available on 64bit targets. */
42026 void
42027 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
42028 {
42029 /* C code for the stuff we expand below.
42030 double xa = fabs (x), xa2, x2;
42031 if (!isless (xa, TWO52))
42032 return x;
42033 Using the absolute value and copying back sign makes
42034 -0.0 -> -0.0 correct.
42035 xa2 = xa + TWO52 - TWO52;
42036 Compensate.
42037 dxa = xa2 - xa;
42038 if (dxa <= -0.5)
42039 xa2 += 1;
42040 else if (dxa > 0.5)
42041 xa2 -= 1;
42042 x2 = copysign (xa2, x);
42043 return x2;
42044 */
42045 enum machine_mode mode = GET_MODE (operand0);
42046 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
42047
42048 TWO52 = ix86_gen_TWO52 (mode);
42049
42050 /* Temporary for holding the result, initialized to the input
42051 operand to ease control flow. */
42052 res = gen_reg_rtx (mode);
42053 emit_move_insn (res, operand1);
42054
42055 /* xa = abs (operand1) */
42056 xa = ix86_expand_sse_fabs (res, &mask);
42057
42058 /* if (!isless (xa, TWO52)) goto label; */
42059 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42060
42061 /* xa2 = xa + TWO52 - TWO52; */
42062 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
42063 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
42064
42065 /* dxa = xa2 - xa; */
42066 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
42067
42068 /* generate 0.5, 1.0 and -0.5 */
42069 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
42070 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
42071 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
42072 0, OPTAB_DIRECT);
42073
42074 /* Compensate. */
42075 tmp = gen_reg_rtx (mode);
42076 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
42077 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
42078 emit_insn (gen_rtx_SET (VOIDmode, tmp,
42079 gen_rtx_AND (mode, one, tmp)));
42080 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
42081 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
42082 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
42083 emit_insn (gen_rtx_SET (VOIDmode, tmp,
42084 gen_rtx_AND (mode, one, tmp)));
42085 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
42086
42087 /* res = copysign (xa2, operand1) */
42088 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
42089
42090 emit_label (label);
42091 LABEL_NUSES (label) = 1;
42092
42093 emit_move_insn (operand0, res);
42094 }
42095
42096 /* Expand SSE sequence for computing trunc from OPERAND1 storing
42097 into OPERAND0. */
42098 void
42099 ix86_expand_trunc (rtx operand0, rtx operand1)
42100 {
42101 /* C code for SSE variant we expand below.
42102 double xa = fabs (x), x2;
42103 if (!isless (xa, TWO52))
42104 return x;
42105 x2 = (double)(long)x;
42106 if (HONOR_SIGNED_ZEROS (mode))
42107 return copysign (x2, x);
42108 return x2;
42109 */
42110 enum machine_mode mode = GET_MODE (operand0);
42111 rtx xa, xi, TWO52, label, res, mask;
42112
42113 TWO52 = ix86_gen_TWO52 (mode);
42114
42115 /* Temporary for holding the result, initialized to the input
42116 operand to ease control flow. */
42117 res = gen_reg_rtx (mode);
42118 emit_move_insn (res, operand1);
42119
42120 /* xa = abs (operand1) */
42121 xa = ix86_expand_sse_fabs (res, &mask);
42122
42123 /* if (!isless (xa, TWO52)) goto label; */
42124 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42125
42126 /* x = (double)(long)x */
42127 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
42128 expand_fix (xi, res, 0);
42129 expand_float (res, xi, 0);
42130
42131 if (HONOR_SIGNED_ZEROS (mode))
42132 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
42133
42134 emit_label (label);
42135 LABEL_NUSES (label) = 1;
42136
42137 emit_move_insn (operand0, res);
42138 }
42139
42140 /* Expand SSE sequence for computing trunc from OPERAND1 storing
42141 into OPERAND0. */
42142 void
42143 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
42144 {
42145 enum machine_mode mode = GET_MODE (operand0);
42146 rtx xa, mask, TWO52, label, one, res, smask, tmp;
42147
42148 /* C code for SSE variant we expand below.
42149 double xa = fabs (x), x2;
42150 if (!isless (xa, TWO52))
42151 return x;
42152 xa2 = xa + TWO52 - TWO52;
42153 Compensate:
42154 if (xa2 > xa)
42155 xa2 -= 1.0;
42156 x2 = copysign (xa2, x);
42157 return x2;
42158 */
42159
42160 TWO52 = ix86_gen_TWO52 (mode);
42161
42162 /* Temporary for holding the result, initialized to the input
42163 operand to ease control flow. */
42164 res = gen_reg_rtx (mode);
42165 emit_move_insn (res, operand1);
42166
42167 /* xa = abs (operand1) */
42168 xa = ix86_expand_sse_fabs (res, &smask);
42169
42170 /* if (!isless (xa, TWO52)) goto label; */
42171 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42172
42173 /* res = xa + TWO52 - TWO52; */
42174 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
42175 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
42176 emit_move_insn (res, tmp);
42177
42178 /* generate 1.0 */
42179 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
42180
42181 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
42182 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
42183 emit_insn (gen_rtx_SET (VOIDmode, mask,
42184 gen_rtx_AND (mode, mask, one)));
42185 tmp = expand_simple_binop (mode, MINUS,
42186 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
42187 emit_move_insn (res, tmp);
42188
42189 /* res = copysign (res, operand1) */
42190 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
42191
42192 emit_label (label);
42193 LABEL_NUSES (label) = 1;
42194
42195 emit_move_insn (operand0, res);
42196 }
42197
42198 /* Expand SSE sequence for computing round from OPERAND1 storing
42199 into OPERAND0. */
42200 void
42201 ix86_expand_round (rtx operand0, rtx operand1)
42202 {
42203 /* C code for the stuff we're doing below:
42204 double xa = fabs (x);
42205 if (!isless (xa, TWO52))
42206 return x;
42207 xa = (double)(long)(xa + nextafter (0.5, 0.0));
42208 return copysign (xa, x);
42209 */
42210 enum machine_mode mode = GET_MODE (operand0);
42211 rtx res, TWO52, xa, label, xi, half, mask;
42212 const struct real_format *fmt;
42213 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
42214
42215 /* Temporary for holding the result, initialized to the input
42216 operand to ease control flow. */
42217 res = gen_reg_rtx (mode);
42218 emit_move_insn (res, operand1);
42219
42220 TWO52 = ix86_gen_TWO52 (mode);
42221 xa = ix86_expand_sse_fabs (res, &mask);
42222 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42223
42224 /* load nextafter (0.5, 0.0) */
42225 fmt = REAL_MODE_FORMAT (mode);
42226 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
42227 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
42228
42229 /* xa = xa + 0.5 */
42230 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
42231 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
42232
42233 /* xa = (double)(int64_t)xa */
42234 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
42235 expand_fix (xi, xa, 0);
42236 expand_float (xa, xi, 0);
42237
42238 /* res = copysign (xa, operand1) */
42239 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
42240
42241 emit_label (label);
42242 LABEL_NUSES (label) = 1;
42243
42244 emit_move_insn (operand0, res);
42245 }
42246
42247 /* Expand SSE sequence for computing round
42248 from OP1 storing into OP0 using sse4 round insn. */
42249 void
42250 ix86_expand_round_sse4 (rtx op0, rtx op1)
42251 {
42252 enum machine_mode mode = GET_MODE (op0);
42253 rtx e1, e2, res, half;
42254 const struct real_format *fmt;
42255 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
42256 rtx (*gen_copysign) (rtx, rtx, rtx);
42257 rtx (*gen_round) (rtx, rtx, rtx);
42258
42259 switch (mode)
42260 {
42261 case SFmode:
42262 gen_copysign = gen_copysignsf3;
42263 gen_round = gen_sse4_1_roundsf2;
42264 break;
42265 case DFmode:
42266 gen_copysign = gen_copysigndf3;
42267 gen_round = gen_sse4_1_rounddf2;
42268 break;
42269 default:
42270 gcc_unreachable ();
42271 }
42272
42273 /* round (a) = trunc (a + copysign (0.5, a)) */
42274
42275 /* load nextafter (0.5, 0.0) */
42276 fmt = REAL_MODE_FORMAT (mode);
42277 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
42278 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
42279 half = const_double_from_real_value (pred_half, mode);
42280
42281 /* e1 = copysign (0.5, op1) */
42282 e1 = gen_reg_rtx (mode);
42283 emit_insn (gen_copysign (e1, half, op1));
42284
42285 /* e2 = op1 + e1 */
42286 e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
42287
42288 /* res = trunc (e2) */
42289 res = gen_reg_rtx (mode);
42290 emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
42291
42292 emit_move_insn (op0, res);
42293 }
42294 \f
42295
42296 /* Table of valid machine attributes. */
42297 static const struct attribute_spec ix86_attribute_table[] =
42298 {
42299 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
42300 affects_type_identity } */
42301 /* Stdcall attribute says callee is responsible for popping arguments
42302 if they are not variable. */
42303 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42304 true },
42305 /* Fastcall attribute says callee is responsible for popping arguments
42306 if they are not variable. */
42307 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42308 true },
42309 /* Thiscall attribute says callee is responsible for popping arguments
42310 if they are not variable. */
42311 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42312 true },
42313 /* Cdecl attribute says the callee is a normal C declaration */
42314 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42315 true },
42316 /* Regparm attribute specifies how many integer arguments are to be
42317 passed in registers. */
42318 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
42319 true },
42320 /* Sseregparm attribute says we are using x86_64 calling conventions
42321 for FP arguments. */
42322 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42323 true },
42324 /* The transactional memory builtins are implicitly regparm or fastcall
42325 depending on the ABI. Override the generic do-nothing attribute that
42326 these builtins were declared with. */
42327 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute,
42328 true },
42329 /* force_align_arg_pointer says this function realigns the stack at entry. */
42330 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
42331 false, true, true, ix86_handle_cconv_attribute, false },
42332 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
42333 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
42334 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
42335 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
42336 false },
42337 #endif
42338 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
42339 false },
42340 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
42341 false },
42342 #ifdef SUBTARGET_ATTRIBUTE_TABLE
42343 SUBTARGET_ATTRIBUTE_TABLE,
42344 #endif
42345 /* ms_abi and sysv_abi calling convention function attributes. */
42346 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
42347 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
42348 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
42349 false },
42350 { "callee_pop_aggregate_return", 1, 1, false, true, true,
42351 ix86_handle_callee_pop_aggregate_return, true },
42352 /* End element. */
42353 { NULL, 0, 0, false, false, false, NULL, false }
42354 };
42355
42356 /* Implement targetm.vectorize.builtin_vectorization_cost. */
42357 static int
42358 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
42359 tree vectype, int)
42360 {
42361 unsigned elements;
42362
42363 switch (type_of_cost)
42364 {
42365 case scalar_stmt:
42366 return ix86_cost->scalar_stmt_cost;
42367
42368 case scalar_load:
42369 return ix86_cost->scalar_load_cost;
42370
42371 case scalar_store:
42372 return ix86_cost->scalar_store_cost;
42373
42374 case vector_stmt:
42375 return ix86_cost->vec_stmt_cost;
42376
42377 case vector_load:
42378 return ix86_cost->vec_align_load_cost;
42379
42380 case vector_store:
42381 return ix86_cost->vec_store_cost;
42382
42383 case vec_to_scalar:
42384 return ix86_cost->vec_to_scalar_cost;
42385
42386 case scalar_to_vec:
42387 return ix86_cost->scalar_to_vec_cost;
42388
42389 case unaligned_load:
42390 case unaligned_store:
42391 return ix86_cost->vec_unalign_load_cost;
42392
42393 case cond_branch_taken:
42394 return ix86_cost->cond_taken_branch_cost;
42395
42396 case cond_branch_not_taken:
42397 return ix86_cost->cond_not_taken_branch_cost;
42398
42399 case vec_perm:
42400 case vec_promote_demote:
42401 return ix86_cost->vec_stmt_cost;
42402
42403 case vec_construct:
42404 elements = TYPE_VECTOR_SUBPARTS (vectype);
42405 return elements / 2 + 1;
42406
42407 default:
42408 gcc_unreachable ();
42409 }
42410 }
42411
42412 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
42413 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
42414 insn every time. */
42415
42416 static GTY(()) rtx vselect_insn;
42417
42418 /* Initialize vselect_insn. */
42419
42420 static void
42421 init_vselect_insn (void)
42422 {
42423 unsigned i;
42424 rtx x;
42425
42426 x = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (MAX_VECT_LEN));
42427 for (i = 0; i < MAX_VECT_LEN; ++i)
42428 XVECEXP (x, 0, i) = const0_rtx;
42429 x = gen_rtx_VEC_SELECT (V2DFmode, gen_rtx_VEC_CONCAT (V4DFmode, const0_rtx,
42430 const0_rtx), x);
42431 x = gen_rtx_SET (VOIDmode, const0_rtx, x);
42432 start_sequence ();
42433 vselect_insn = emit_insn (x);
42434 end_sequence ();
42435 }
42436
42437 /* Construct (set target (vec_select op0 (parallel perm))) and
42438 return true if that's a valid instruction in the active ISA. */
42439
42440 static bool
42441 expand_vselect (rtx target, rtx op0, const unsigned char *perm,
42442 unsigned nelt, bool testing_p)
42443 {
42444 unsigned int i;
42445 rtx x, save_vconcat;
42446 int icode;
42447
42448 if (vselect_insn == NULL_RTX)
42449 init_vselect_insn ();
42450
42451 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 1);
42452 PUT_NUM_ELEM (XVEC (x, 0), nelt);
42453 for (i = 0; i < nelt; ++i)
42454 XVECEXP (x, 0, i) = GEN_INT (perm[i]);
42455 save_vconcat = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
42456 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = op0;
42457 PUT_MODE (SET_SRC (PATTERN (vselect_insn)), GET_MODE (target));
42458 SET_DEST (PATTERN (vselect_insn)) = target;
42459 icode = recog_memoized (vselect_insn);
42460
42461 if (icode >= 0 && !testing_p)
42462 emit_insn (copy_rtx (PATTERN (vselect_insn)));
42463
42464 SET_DEST (PATTERN (vselect_insn)) = const0_rtx;
42465 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = save_vconcat;
42466 INSN_CODE (vselect_insn) = -1;
42467
42468 return icode >= 0;
42469 }
42470
42471 /* Similar, but generate a vec_concat from op0 and op1 as well. */
42472
42473 static bool
42474 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
42475 const unsigned char *perm, unsigned nelt,
42476 bool testing_p)
42477 {
42478 enum machine_mode v2mode;
42479 rtx x;
42480 bool ok;
42481
42482 if (vselect_insn == NULL_RTX)
42483 init_vselect_insn ();
42484
42485 v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
42486 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
42487 PUT_MODE (x, v2mode);
42488 XEXP (x, 0) = op0;
42489 XEXP (x, 1) = op1;
42490 ok = expand_vselect (target, x, perm, nelt, testing_p);
42491 XEXP (x, 0) = const0_rtx;
42492 XEXP (x, 1) = const0_rtx;
42493 return ok;
42494 }
42495
42496 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
42497 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
42498
42499 static bool
42500 expand_vec_perm_blend (struct expand_vec_perm_d *d)
42501 {
42502 enum machine_mode vmode = d->vmode;
42503 unsigned i, mask, nelt = d->nelt;
42504 rtx target, op0, op1, x;
42505 rtx rperm[32], vperm;
42506
42507 if (d->one_operand_p)
42508 return false;
42509 if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
42510 ;
42511 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
42512 ;
42513 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
42514 ;
42515 else
42516 return false;
42517
42518 /* This is a blend, not a permute. Elements must stay in their
42519 respective lanes. */
42520 for (i = 0; i < nelt; ++i)
42521 {
42522 unsigned e = d->perm[i];
42523 if (!(e == i || e == i + nelt))
42524 return false;
42525 }
42526
42527 if (d->testing_p)
42528 return true;
42529
42530 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
42531 decision should be extracted elsewhere, so that we only try that
42532 sequence once all budget==3 options have been tried. */
42533 target = d->target;
42534 op0 = d->op0;
42535 op1 = d->op1;
42536 mask = 0;
42537
42538 switch (vmode)
42539 {
42540 case V4DFmode:
42541 case V8SFmode:
42542 case V2DFmode:
42543 case V4SFmode:
42544 case V8HImode:
42545 case V8SImode:
42546 for (i = 0; i < nelt; ++i)
42547 mask |= (d->perm[i] >= nelt) << i;
42548 break;
42549
42550 case V2DImode:
42551 for (i = 0; i < 2; ++i)
42552 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
42553 vmode = V8HImode;
42554 goto do_subreg;
42555
42556 case V4SImode:
42557 for (i = 0; i < 4; ++i)
42558 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
42559 vmode = V8HImode;
42560 goto do_subreg;
42561
42562 case V16QImode:
42563 /* See if bytes move in pairs so we can use pblendw with
42564 an immediate argument, rather than pblendvb with a vector
42565 argument. */
42566 for (i = 0; i < 16; i += 2)
42567 if (d->perm[i] + 1 != d->perm[i + 1])
42568 {
42569 use_pblendvb:
42570 for (i = 0; i < nelt; ++i)
42571 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
42572
42573 finish_pblendvb:
42574 vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
42575 vperm = force_reg (vmode, vperm);
42576
42577 if (GET_MODE_SIZE (vmode) == 16)
42578 emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
42579 else
42580 emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm));
42581 if (target != d->target)
42582 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42583 return true;
42584 }
42585
42586 for (i = 0; i < 8; ++i)
42587 mask |= (d->perm[i * 2] >= 16) << i;
42588 vmode = V8HImode;
42589 /* FALLTHRU */
42590
42591 do_subreg:
42592 target = gen_reg_rtx (vmode);
42593 op0 = gen_lowpart (vmode, op0);
42594 op1 = gen_lowpart (vmode, op1);
42595 break;
42596
42597 case V32QImode:
42598 /* See if bytes move in pairs. If not, vpblendvb must be used. */
42599 for (i = 0; i < 32; i += 2)
42600 if (d->perm[i] + 1 != d->perm[i + 1])
42601 goto use_pblendvb;
42602 /* See if bytes move in quadruplets. If yes, vpblendd
42603 with immediate can be used. */
42604 for (i = 0; i < 32; i += 4)
42605 if (d->perm[i] + 2 != d->perm[i + 2])
42606 break;
42607 if (i < 32)
42608 {
42609 /* See if bytes move the same in both lanes. If yes,
42610 vpblendw with immediate can be used. */
42611 for (i = 0; i < 16; i += 2)
42612 if (d->perm[i] + 16 != d->perm[i + 16])
42613 goto use_pblendvb;
42614
42615 /* Use vpblendw. */
42616 for (i = 0; i < 16; ++i)
42617 mask |= (d->perm[i * 2] >= 32) << i;
42618 vmode = V16HImode;
42619 goto do_subreg;
42620 }
42621
42622 /* Use vpblendd. */
42623 for (i = 0; i < 8; ++i)
42624 mask |= (d->perm[i * 4] >= 32) << i;
42625 vmode = V8SImode;
42626 goto do_subreg;
42627
42628 case V16HImode:
42629 /* See if words move in pairs. If yes, vpblendd can be used. */
42630 for (i = 0; i < 16; i += 2)
42631 if (d->perm[i] + 1 != d->perm[i + 1])
42632 break;
42633 if (i < 16)
42634 {
42635 /* See if words move the same in both lanes. If not,
42636 vpblendvb must be used. */
42637 for (i = 0; i < 8; i++)
42638 if (d->perm[i] + 8 != d->perm[i + 8])
42639 {
42640 /* Use vpblendvb. */
42641 for (i = 0; i < 32; ++i)
42642 rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx);
42643
42644 vmode = V32QImode;
42645 nelt = 32;
42646 target = gen_reg_rtx (vmode);
42647 op0 = gen_lowpart (vmode, op0);
42648 op1 = gen_lowpart (vmode, op1);
42649 goto finish_pblendvb;
42650 }
42651
42652 /* Use vpblendw. */
42653 for (i = 0; i < 16; ++i)
42654 mask |= (d->perm[i] >= 16) << i;
42655 break;
42656 }
42657
42658 /* Use vpblendd. */
42659 for (i = 0; i < 8; ++i)
42660 mask |= (d->perm[i * 2] >= 16) << i;
42661 vmode = V8SImode;
42662 goto do_subreg;
42663
42664 case V4DImode:
42665 /* Use vpblendd. */
42666 for (i = 0; i < 4; ++i)
42667 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
42668 vmode = V8SImode;
42669 goto do_subreg;
42670
42671 default:
42672 gcc_unreachable ();
42673 }
42674
42675 /* This matches five different patterns with the different modes. */
42676 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
42677 x = gen_rtx_SET (VOIDmode, target, x);
42678 emit_insn (x);
42679 if (target != d->target)
42680 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42681
42682 return true;
42683 }
42684
42685 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
42686 in terms of the variable form of vpermilps.
42687
42688 Note that we will have already failed the immediate input vpermilps,
42689 which requires that the high and low part shuffle be identical; the
42690 variable form doesn't require that. */
42691
42692 static bool
42693 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
42694 {
42695 rtx rperm[8], vperm;
42696 unsigned i;
42697
42698 if (!TARGET_AVX || d->vmode != V8SFmode || !d->one_operand_p)
42699 return false;
42700
42701 /* We can only permute within the 128-bit lane. */
42702 for (i = 0; i < 8; ++i)
42703 {
42704 unsigned e = d->perm[i];
42705 if (i < 4 ? e >= 4 : e < 4)
42706 return false;
42707 }
42708
42709 if (d->testing_p)
42710 return true;
42711
42712 for (i = 0; i < 8; ++i)
42713 {
42714 unsigned e = d->perm[i];
42715
42716 /* Within each 128-bit lane, the elements of op0 are numbered
42717 from 0 and the elements of op1 are numbered from 4. */
42718 if (e >= 8 + 4)
42719 e -= 8;
42720 else if (e >= 4)
42721 e -= 4;
42722
42723 rperm[i] = GEN_INT (e);
42724 }
42725
42726 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
42727 vperm = force_reg (V8SImode, vperm);
42728 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
42729
42730 return true;
42731 }
42732
42733 /* Return true if permutation D can be performed as VMODE permutation
42734 instead. */
42735
42736 static bool
42737 valid_perm_using_mode_p (enum machine_mode vmode, struct expand_vec_perm_d *d)
42738 {
42739 unsigned int i, j, chunk;
42740
42741 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT
42742 || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT
42743 || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode))
42744 return false;
42745
42746 if (GET_MODE_NUNITS (vmode) >= d->nelt)
42747 return true;
42748
42749 chunk = d->nelt / GET_MODE_NUNITS (vmode);
42750 for (i = 0; i < d->nelt; i += chunk)
42751 if (d->perm[i] & (chunk - 1))
42752 return false;
42753 else
42754 for (j = 1; j < chunk; ++j)
42755 if (d->perm[i] + j != d->perm[i + j])
42756 return false;
42757
42758 return true;
42759 }
42760
42761 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
42762 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
42763
42764 static bool
42765 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
42766 {
42767 unsigned i, nelt, eltsz, mask;
42768 unsigned char perm[32];
42769 enum machine_mode vmode = V16QImode;
42770 rtx rperm[32], vperm, target, op0, op1;
42771
42772 nelt = d->nelt;
42773
42774 if (!d->one_operand_p)
42775 {
42776 if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
42777 {
42778 if (TARGET_AVX2
42779 && valid_perm_using_mode_p (V2TImode, d))
42780 {
42781 if (d->testing_p)
42782 return true;
42783
42784 /* Use vperm2i128 insn. The pattern uses
42785 V4DImode instead of V2TImode. */
42786 target = d->target;
42787 if (d->vmode != V4DImode)
42788 target = gen_reg_rtx (V4DImode);
42789 op0 = gen_lowpart (V4DImode, d->op0);
42790 op1 = gen_lowpart (V4DImode, d->op1);
42791 rperm[0]
42792 = GEN_INT (((d->perm[0] & (nelt / 2)) ? 1 : 0)
42793 || ((d->perm[nelt / 2] & (nelt / 2)) ? 2 : 0));
42794 emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
42795 if (target != d->target)
42796 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42797 return true;
42798 }
42799 return false;
42800 }
42801 }
42802 else
42803 {
42804 if (GET_MODE_SIZE (d->vmode) == 16)
42805 {
42806 if (!TARGET_SSSE3)
42807 return false;
42808 }
42809 else if (GET_MODE_SIZE (d->vmode) == 32)
42810 {
42811 if (!TARGET_AVX2)
42812 return false;
42813
42814 /* V4DImode should be already handled through
42815 expand_vselect by vpermq instruction. */
42816 gcc_assert (d->vmode != V4DImode);
42817
42818 vmode = V32QImode;
42819 if (d->vmode == V8SImode
42820 || d->vmode == V16HImode
42821 || d->vmode == V32QImode)
42822 {
42823 /* First see if vpermq can be used for
42824 V8SImode/V16HImode/V32QImode. */
42825 if (valid_perm_using_mode_p (V4DImode, d))
42826 {
42827 for (i = 0; i < 4; i++)
42828 perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
42829 if (d->testing_p)
42830 return true;
42831 target = gen_reg_rtx (V4DImode);
42832 if (expand_vselect (target, gen_lowpart (V4DImode, d->op0),
42833 perm, 4, false))
42834 {
42835 emit_move_insn (d->target,
42836 gen_lowpart (d->vmode, target));
42837 return true;
42838 }
42839 return false;
42840 }
42841
42842 /* Next see if vpermd can be used. */
42843 if (valid_perm_using_mode_p (V8SImode, d))
42844 vmode = V8SImode;
42845 }
42846 /* Or if vpermps can be used. */
42847 else if (d->vmode == V8SFmode)
42848 vmode = V8SImode;
42849
42850 if (vmode == V32QImode)
42851 {
42852 /* vpshufb only works intra lanes, it is not
42853 possible to shuffle bytes in between the lanes. */
42854 for (i = 0; i < nelt; ++i)
42855 if ((d->perm[i] ^ i) & (nelt / 2))
42856 return false;
42857 }
42858 }
42859 else
42860 return false;
42861 }
42862
42863 if (d->testing_p)
42864 return true;
42865
42866 if (vmode == V8SImode)
42867 for (i = 0; i < 8; ++i)
42868 rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7);
42869 else
42870 {
42871 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
42872 if (!d->one_operand_p)
42873 mask = 2 * nelt - 1;
42874 else if (vmode == V16QImode)
42875 mask = nelt - 1;
42876 else
42877 mask = nelt / 2 - 1;
42878
42879 for (i = 0; i < nelt; ++i)
42880 {
42881 unsigned j, e = d->perm[i] & mask;
42882 for (j = 0; j < eltsz; ++j)
42883 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
42884 }
42885 }
42886
42887 vperm = gen_rtx_CONST_VECTOR (vmode,
42888 gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
42889 vperm = force_reg (vmode, vperm);
42890
42891 target = d->target;
42892 if (d->vmode != vmode)
42893 target = gen_reg_rtx (vmode);
42894 op0 = gen_lowpart (vmode, d->op0);
42895 if (d->one_operand_p)
42896 {
42897 if (vmode == V16QImode)
42898 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
42899 else if (vmode == V32QImode)
42900 emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
42901 else if (vmode == V8SFmode)
42902 emit_insn (gen_avx2_permvarv8sf (target, op0, vperm));
42903 else
42904 emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
42905 }
42906 else
42907 {
42908 op1 = gen_lowpart (vmode, d->op1);
42909 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
42910 }
42911 if (target != d->target)
42912 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42913
42914 return true;
42915 }
42916
42917 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
42918 in a single instruction. */
42919
42920 static bool
42921 expand_vec_perm_1 (struct expand_vec_perm_d *d)
42922 {
42923 unsigned i, nelt = d->nelt;
42924 unsigned char perm2[MAX_VECT_LEN];
42925
42926 /* Check plain VEC_SELECT first, because AVX has instructions that could
42927 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
42928 input where SEL+CONCAT may not. */
42929 if (d->one_operand_p)
42930 {
42931 int mask = nelt - 1;
42932 bool identity_perm = true;
42933 bool broadcast_perm = true;
42934
42935 for (i = 0; i < nelt; i++)
42936 {
42937 perm2[i] = d->perm[i] & mask;
42938 if (perm2[i] != i)
42939 identity_perm = false;
42940 if (perm2[i])
42941 broadcast_perm = false;
42942 }
42943
42944 if (identity_perm)
42945 {
42946 if (!d->testing_p)
42947 emit_move_insn (d->target, d->op0);
42948 return true;
42949 }
42950 else if (broadcast_perm && TARGET_AVX2)
42951 {
42952 /* Use vpbroadcast{b,w,d}. */
42953 rtx (*gen) (rtx, rtx) = NULL;
42954 switch (d->vmode)
42955 {
42956 case V32QImode:
42957 gen = gen_avx2_pbroadcastv32qi_1;
42958 break;
42959 case V16HImode:
42960 gen = gen_avx2_pbroadcastv16hi_1;
42961 break;
42962 case V8SImode:
42963 gen = gen_avx2_pbroadcastv8si_1;
42964 break;
42965 case V16QImode:
42966 gen = gen_avx2_pbroadcastv16qi;
42967 break;
42968 case V8HImode:
42969 gen = gen_avx2_pbroadcastv8hi;
42970 break;
42971 case V8SFmode:
42972 gen = gen_avx2_vec_dupv8sf_1;
42973 break;
42974 /* For other modes prefer other shuffles this function creates. */
42975 default: break;
42976 }
42977 if (gen != NULL)
42978 {
42979 if (!d->testing_p)
42980 emit_insn (gen (d->target, d->op0));
42981 return true;
42982 }
42983 }
42984
42985 if (expand_vselect (d->target, d->op0, perm2, nelt, d->testing_p))
42986 return true;
42987
42988 /* There are plenty of patterns in sse.md that are written for
42989 SEL+CONCAT and are not replicated for a single op. Perhaps
42990 that should be changed, to avoid the nastiness here. */
42991
42992 /* Recognize interleave style patterns, which means incrementing
42993 every other permutation operand. */
42994 for (i = 0; i < nelt; i += 2)
42995 {
42996 perm2[i] = d->perm[i] & mask;
42997 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
42998 }
42999 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
43000 d->testing_p))
43001 return true;
43002
43003 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
43004 if (nelt >= 4)
43005 {
43006 for (i = 0; i < nelt; i += 4)
43007 {
43008 perm2[i + 0] = d->perm[i + 0] & mask;
43009 perm2[i + 1] = d->perm[i + 1] & mask;
43010 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
43011 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
43012 }
43013
43014 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
43015 d->testing_p))
43016 return true;
43017 }
43018 }
43019
43020 /* Finally, try the fully general two operand permute. */
43021 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt,
43022 d->testing_p))
43023 return true;
43024
43025 /* Recognize interleave style patterns with reversed operands. */
43026 if (!d->one_operand_p)
43027 {
43028 for (i = 0; i < nelt; ++i)
43029 {
43030 unsigned e = d->perm[i];
43031 if (e >= nelt)
43032 e -= nelt;
43033 else
43034 e += nelt;
43035 perm2[i] = e;
43036 }
43037
43038 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt,
43039 d->testing_p))
43040 return true;
43041 }
43042
43043 /* Try the SSE4.1 blend variable merge instructions. */
43044 if (expand_vec_perm_blend (d))
43045 return true;
43046
43047 /* Try one of the AVX vpermil variable permutations. */
43048 if (expand_vec_perm_vpermil (d))
43049 return true;
43050
43051 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
43052 vpshufb, vpermd, vpermps or vpermq variable permutation. */
43053 if (expand_vec_perm_pshufb (d))
43054 return true;
43055
43056 /* Try the AVX512F vpermi2 instructions. */
43057 rtx vec[64];
43058 enum machine_mode mode = d->vmode;
43059 if (mode == V8DFmode)
43060 mode = V8DImode;
43061 else if (mode == V16SFmode)
43062 mode = V16SImode;
43063 for (i = 0; i < nelt; ++i)
43064 vec[i] = GEN_INT (d->perm[i]);
43065 rtx mask = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt, vec));
43066 if (ix86_expand_vec_perm_vpermi2 (d->target, d->op0, mask, d->op1))
43067 return true;
43068
43069 return false;
43070 }
43071
43072 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
43073 in terms of a pair of pshuflw + pshufhw instructions. */
43074
43075 static bool
43076 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
43077 {
43078 unsigned char perm2[MAX_VECT_LEN];
43079 unsigned i;
43080 bool ok;
43081
43082 if (d->vmode != V8HImode || !d->one_operand_p)
43083 return false;
43084
43085 /* The two permutations only operate in 64-bit lanes. */
43086 for (i = 0; i < 4; ++i)
43087 if (d->perm[i] >= 4)
43088 return false;
43089 for (i = 4; i < 8; ++i)
43090 if (d->perm[i] < 4)
43091 return false;
43092
43093 if (d->testing_p)
43094 return true;
43095
43096 /* Emit the pshuflw. */
43097 memcpy (perm2, d->perm, 4);
43098 for (i = 4; i < 8; ++i)
43099 perm2[i] = i;
43100 ok = expand_vselect (d->target, d->op0, perm2, 8, d->testing_p);
43101 gcc_assert (ok);
43102
43103 /* Emit the pshufhw. */
43104 memcpy (perm2 + 4, d->perm + 4, 4);
43105 for (i = 0; i < 4; ++i)
43106 perm2[i] = i;
43107 ok = expand_vselect (d->target, d->target, perm2, 8, d->testing_p);
43108 gcc_assert (ok);
43109
43110 return true;
43111 }
43112
43113 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43114 the permutation using the SSSE3 palignr instruction. This succeeds
43115 when all of the elements in PERM fit within one vector and we merely
43116 need to shift them down so that a single vector permutation has a
43117 chance to succeed. */
43118
43119 static bool
43120 expand_vec_perm_palignr (struct expand_vec_perm_d *d)
43121 {
43122 unsigned i, nelt = d->nelt;
43123 unsigned min, max;
43124 bool in_order, ok;
43125 rtx shift, target;
43126 struct expand_vec_perm_d dcopy;
43127
43128 /* Even with AVX, palignr only operates on 128-bit vectors. */
43129 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
43130 return false;
43131
43132 min = nelt, max = 0;
43133 for (i = 0; i < nelt; ++i)
43134 {
43135 unsigned e = d->perm[i];
43136 if (e < min)
43137 min = e;
43138 if (e > max)
43139 max = e;
43140 }
43141 if (min == 0 || max - min >= nelt)
43142 return false;
43143
43144 /* Given that we have SSSE3, we know we'll be able to implement the
43145 single operand permutation after the palignr with pshufb. */
43146 if (d->testing_p)
43147 return true;
43148
43149 dcopy = *d;
43150 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
43151 target = gen_reg_rtx (TImode);
43152 emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, d->op1),
43153 gen_lowpart (TImode, d->op0), shift));
43154
43155 dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target);
43156 dcopy.one_operand_p = true;
43157
43158 in_order = true;
43159 for (i = 0; i < nelt; ++i)
43160 {
43161 unsigned e = dcopy.perm[i] - min;
43162 if (e != i)
43163 in_order = false;
43164 dcopy.perm[i] = e;
43165 }
43166
43167 /* Test for the degenerate case where the alignment by itself
43168 produces the desired permutation. */
43169 if (in_order)
43170 {
43171 emit_move_insn (d->target, dcopy.op0);
43172 return true;
43173 }
43174
43175 ok = expand_vec_perm_1 (&dcopy);
43176 gcc_assert (ok);
43177
43178 return ok;
43179 }
43180
43181 /* A subroutine of ix86_expand_vec_perm_const_1. Try to simplify
43182 the permutation using the SSE4_1 pblendv instruction. Potentially
43183 reduces permutaion from 2 pshufb and or to 1 pshufb and pblendv. */
43184
43185 static bool
43186 expand_vec_perm_pblendv (struct expand_vec_perm_d *d)
43187 {
43188 unsigned i, which, nelt = d->nelt;
43189 struct expand_vec_perm_d dcopy, dcopy1;
43190 enum machine_mode vmode = d->vmode;
43191 bool ok;
43192
43193 /* Use the same checks as in expand_vec_perm_blend, but skipping
43194 AVX and AVX2 as they require more than 2 instructions. */
43195 if (d->one_operand_p)
43196 return false;
43197 if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
43198 ;
43199 else
43200 return false;
43201
43202 /* Figure out where permutation elements stay not in their
43203 respective lanes. */
43204 for (i = 0, which = 0; i < nelt; ++i)
43205 {
43206 unsigned e = d->perm[i];
43207 if (e != i)
43208 which |= (e < nelt ? 1 : 2);
43209 }
43210 /* We can pblend the part where elements stay not in their
43211 respective lanes only when these elements are all in one
43212 half of a permutation.
43213 {0 1 8 3 4 5 9 7} is ok as 8, 9 are at not at their respective
43214 lanes, but both 8 and 9 >= 8
43215 {0 1 8 3 4 5 2 7} is not ok as 2 and 8 are not at their
43216 respective lanes and 8 >= 8, but 2 not. */
43217 if (which != 1 && which != 2)
43218 return false;
43219 if (d->testing_p)
43220 return true;
43221
43222 /* First we apply one operand permutation to the part where
43223 elements stay not in their respective lanes. */
43224 dcopy = *d;
43225 if (which == 2)
43226 dcopy.op0 = dcopy.op1 = d->op1;
43227 else
43228 dcopy.op0 = dcopy.op1 = d->op0;
43229 dcopy.one_operand_p = true;
43230
43231 for (i = 0; i < nelt; ++i)
43232 dcopy.perm[i] = d->perm[i] & (nelt - 1);
43233
43234 ok = expand_vec_perm_1 (&dcopy);
43235 gcc_assert (ok);
43236
43237 /* Next we put permuted elements into their positions. */
43238 dcopy1 = *d;
43239 if (which == 2)
43240 dcopy1.op1 = dcopy.target;
43241 else
43242 dcopy1.op0 = dcopy.target;
43243
43244 for (i = 0; i < nelt; ++i)
43245 dcopy1.perm[i] = ((d->perm[i] >= nelt) ? (nelt + i) : i);
43246
43247 ok = expand_vec_perm_blend (&dcopy1);
43248 gcc_assert (ok);
43249
43250 return true;
43251 }
43252
43253 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
43254
43255 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43256 a two vector permutation into a single vector permutation by using
43257 an interleave operation to merge the vectors. */
43258
43259 static bool
43260 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
43261 {
43262 struct expand_vec_perm_d dremap, dfinal;
43263 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
43264 unsigned HOST_WIDE_INT contents;
43265 unsigned char remap[2 * MAX_VECT_LEN];
43266 rtx seq;
43267 bool ok, same_halves = false;
43268
43269 if (GET_MODE_SIZE (d->vmode) == 16)
43270 {
43271 if (d->one_operand_p)
43272 return false;
43273 }
43274 else if (GET_MODE_SIZE (d->vmode) == 32)
43275 {
43276 if (!TARGET_AVX)
43277 return false;
43278 /* For 32-byte modes allow even d->one_operand_p.
43279 The lack of cross-lane shuffling in some instructions
43280 might prevent a single insn shuffle. */
43281 dfinal = *d;
43282 dfinal.testing_p = true;
43283 /* If expand_vec_perm_interleave3 can expand this into
43284 a 3 insn sequence, give up and let it be expanded as
43285 3 insn sequence. While that is one insn longer,
43286 it doesn't need a memory operand and in the common
43287 case that both interleave low and high permutations
43288 with the same operands are adjacent needs 4 insns
43289 for both after CSE. */
43290 if (expand_vec_perm_interleave3 (&dfinal))
43291 return false;
43292 }
43293 else
43294 return false;
43295
43296 /* Examine from whence the elements come. */
43297 contents = 0;
43298 for (i = 0; i < nelt; ++i)
43299 contents |= ((unsigned HOST_WIDE_INT) 1) << d->perm[i];
43300
43301 memset (remap, 0xff, sizeof (remap));
43302 dremap = *d;
43303
43304 if (GET_MODE_SIZE (d->vmode) == 16)
43305 {
43306 unsigned HOST_WIDE_INT h1, h2, h3, h4;
43307
43308 /* Split the two input vectors into 4 halves. */
43309 h1 = (((unsigned HOST_WIDE_INT) 1) << nelt2) - 1;
43310 h2 = h1 << nelt2;
43311 h3 = h2 << nelt2;
43312 h4 = h3 << nelt2;
43313
43314 /* If the elements from the low halves use interleave low, and similarly
43315 for interleave high. If the elements are from mis-matched halves, we
43316 can use shufps for V4SF/V4SI or do a DImode shuffle. */
43317 if ((contents & (h1 | h3)) == contents)
43318 {
43319 /* punpckl* */
43320 for (i = 0; i < nelt2; ++i)
43321 {
43322 remap[i] = i * 2;
43323 remap[i + nelt] = i * 2 + 1;
43324 dremap.perm[i * 2] = i;
43325 dremap.perm[i * 2 + 1] = i + nelt;
43326 }
43327 if (!TARGET_SSE2 && d->vmode == V4SImode)
43328 dremap.vmode = V4SFmode;
43329 }
43330 else if ((contents & (h2 | h4)) == contents)
43331 {
43332 /* punpckh* */
43333 for (i = 0; i < nelt2; ++i)
43334 {
43335 remap[i + nelt2] = i * 2;
43336 remap[i + nelt + nelt2] = i * 2 + 1;
43337 dremap.perm[i * 2] = i + nelt2;
43338 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
43339 }
43340 if (!TARGET_SSE2 && d->vmode == V4SImode)
43341 dremap.vmode = V4SFmode;
43342 }
43343 else if ((contents & (h1 | h4)) == contents)
43344 {
43345 /* shufps */
43346 for (i = 0; i < nelt2; ++i)
43347 {
43348 remap[i] = i;
43349 remap[i + nelt + nelt2] = i + nelt2;
43350 dremap.perm[i] = i;
43351 dremap.perm[i + nelt2] = i + nelt + nelt2;
43352 }
43353 if (nelt != 4)
43354 {
43355 /* shufpd */
43356 dremap.vmode = V2DImode;
43357 dremap.nelt = 2;
43358 dremap.perm[0] = 0;
43359 dremap.perm[1] = 3;
43360 }
43361 }
43362 else if ((contents & (h2 | h3)) == contents)
43363 {
43364 /* shufps */
43365 for (i = 0; i < nelt2; ++i)
43366 {
43367 remap[i + nelt2] = i;
43368 remap[i + nelt] = i + nelt2;
43369 dremap.perm[i] = i + nelt2;
43370 dremap.perm[i + nelt2] = i + nelt;
43371 }
43372 if (nelt != 4)
43373 {
43374 /* shufpd */
43375 dremap.vmode = V2DImode;
43376 dremap.nelt = 2;
43377 dremap.perm[0] = 1;
43378 dremap.perm[1] = 2;
43379 }
43380 }
43381 else
43382 return false;
43383 }
43384 else
43385 {
43386 unsigned int nelt4 = nelt / 4, nzcnt = 0;
43387 unsigned HOST_WIDE_INT q[8];
43388 unsigned int nonzero_halves[4];
43389
43390 /* Split the two input vectors into 8 quarters. */
43391 q[0] = (((unsigned HOST_WIDE_INT) 1) << nelt4) - 1;
43392 for (i = 1; i < 8; ++i)
43393 q[i] = q[0] << (nelt4 * i);
43394 for (i = 0; i < 4; ++i)
43395 if (((q[2 * i] | q[2 * i + 1]) & contents) != 0)
43396 {
43397 nonzero_halves[nzcnt] = i;
43398 ++nzcnt;
43399 }
43400
43401 if (nzcnt == 1)
43402 {
43403 gcc_assert (d->one_operand_p);
43404 nonzero_halves[1] = nonzero_halves[0];
43405 same_halves = true;
43406 }
43407 else if (d->one_operand_p)
43408 {
43409 gcc_assert (nonzero_halves[0] == 0);
43410 gcc_assert (nonzero_halves[1] == 1);
43411 }
43412
43413 if (nzcnt <= 2)
43414 {
43415 if (d->perm[0] / nelt2 == nonzero_halves[1])
43416 {
43417 /* Attempt to increase the likelihood that dfinal
43418 shuffle will be intra-lane. */
43419 char tmph = nonzero_halves[0];
43420 nonzero_halves[0] = nonzero_halves[1];
43421 nonzero_halves[1] = tmph;
43422 }
43423
43424 /* vperm2f128 or vperm2i128. */
43425 for (i = 0; i < nelt2; ++i)
43426 {
43427 remap[i + nonzero_halves[1] * nelt2] = i + nelt2;
43428 remap[i + nonzero_halves[0] * nelt2] = i;
43429 dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2;
43430 dremap.perm[i] = i + nonzero_halves[0] * nelt2;
43431 }
43432
43433 if (d->vmode != V8SFmode
43434 && d->vmode != V4DFmode
43435 && d->vmode != V8SImode)
43436 {
43437 dremap.vmode = V8SImode;
43438 dremap.nelt = 8;
43439 for (i = 0; i < 4; ++i)
43440 {
43441 dremap.perm[i] = i + nonzero_halves[0] * 4;
43442 dremap.perm[i + 4] = i + nonzero_halves[1] * 4;
43443 }
43444 }
43445 }
43446 else if (d->one_operand_p)
43447 return false;
43448 else if (TARGET_AVX2
43449 && (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
43450 {
43451 /* vpunpckl* */
43452 for (i = 0; i < nelt4; ++i)
43453 {
43454 remap[i] = i * 2;
43455 remap[i + nelt] = i * 2 + 1;
43456 remap[i + nelt2] = i * 2 + nelt2;
43457 remap[i + nelt + nelt2] = i * 2 + nelt2 + 1;
43458 dremap.perm[i * 2] = i;
43459 dremap.perm[i * 2 + 1] = i + nelt;
43460 dremap.perm[i * 2 + nelt2] = i + nelt2;
43461 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2;
43462 }
43463 }
43464 else if (TARGET_AVX2
43465 && (contents & (q[1] | q[3] | q[5] | q[7])) == contents)
43466 {
43467 /* vpunpckh* */
43468 for (i = 0; i < nelt4; ++i)
43469 {
43470 remap[i + nelt4] = i * 2;
43471 remap[i + nelt + nelt4] = i * 2 + 1;
43472 remap[i + nelt2 + nelt4] = i * 2 + nelt2;
43473 remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1;
43474 dremap.perm[i * 2] = i + nelt4;
43475 dremap.perm[i * 2 + 1] = i + nelt + nelt4;
43476 dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4;
43477 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4;
43478 }
43479 }
43480 else
43481 return false;
43482 }
43483
43484 /* Use the remapping array set up above to move the elements from their
43485 swizzled locations into their final destinations. */
43486 dfinal = *d;
43487 for (i = 0; i < nelt; ++i)
43488 {
43489 unsigned e = remap[d->perm[i]];
43490 gcc_assert (e < nelt);
43491 /* If same_halves is true, both halves of the remapped vector are the
43492 same. Avoid cross-lane accesses if possible. */
43493 if (same_halves && i >= nelt2)
43494 {
43495 gcc_assert (e < nelt2);
43496 dfinal.perm[i] = e + nelt2;
43497 }
43498 else
43499 dfinal.perm[i] = e;
43500 }
43501 if (!d->testing_p)
43502 {
43503 dremap.target = gen_reg_rtx (dremap.vmode);
43504 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
43505 }
43506 dfinal.op1 = dfinal.op0;
43507 dfinal.one_operand_p = true;
43508
43509 /* Test if the final remap can be done with a single insn. For V4SFmode or
43510 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
43511 start_sequence ();
43512 ok = expand_vec_perm_1 (&dfinal);
43513 seq = get_insns ();
43514 end_sequence ();
43515
43516 if (!ok)
43517 return false;
43518
43519 if (d->testing_p)
43520 return true;
43521
43522 if (dremap.vmode != dfinal.vmode)
43523 {
43524 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
43525 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
43526 }
43527
43528 ok = expand_vec_perm_1 (&dremap);
43529 gcc_assert (ok);
43530
43531 emit_insn (seq);
43532 return true;
43533 }
43534
43535 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43536 a single vector cross-lane permutation into vpermq followed
43537 by any of the single insn permutations. */
43538
43539 static bool
43540 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
43541 {
43542 struct expand_vec_perm_d dremap, dfinal;
43543 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4;
43544 unsigned contents[2];
43545 bool ok;
43546
43547 if (!(TARGET_AVX2
43548 && (d->vmode == V32QImode || d->vmode == V16HImode)
43549 && d->one_operand_p))
43550 return false;
43551
43552 contents[0] = 0;
43553 contents[1] = 0;
43554 for (i = 0; i < nelt2; ++i)
43555 {
43556 contents[0] |= 1u << (d->perm[i] / nelt4);
43557 contents[1] |= 1u << (d->perm[i + nelt2] / nelt4);
43558 }
43559
43560 for (i = 0; i < 2; ++i)
43561 {
43562 unsigned int cnt = 0;
43563 for (j = 0; j < 4; ++j)
43564 if ((contents[i] & (1u << j)) != 0 && ++cnt > 2)
43565 return false;
43566 }
43567
43568 if (d->testing_p)
43569 return true;
43570
43571 dremap = *d;
43572 dremap.vmode = V4DImode;
43573 dremap.nelt = 4;
43574 dremap.target = gen_reg_rtx (V4DImode);
43575 dremap.op0 = gen_lowpart (V4DImode, d->op0);
43576 dremap.op1 = dremap.op0;
43577 dremap.one_operand_p = true;
43578 for (i = 0; i < 2; ++i)
43579 {
43580 unsigned int cnt = 0;
43581 for (j = 0; j < 4; ++j)
43582 if ((contents[i] & (1u << j)) != 0)
43583 dremap.perm[2 * i + cnt++] = j;
43584 for (; cnt < 2; ++cnt)
43585 dremap.perm[2 * i + cnt] = 0;
43586 }
43587
43588 dfinal = *d;
43589 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
43590 dfinal.op1 = dfinal.op0;
43591 dfinal.one_operand_p = true;
43592 for (i = 0, j = 0; i < nelt; ++i)
43593 {
43594 if (i == nelt2)
43595 j = 2;
43596 dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0);
43597 if ((d->perm[i] / nelt4) == dremap.perm[j])
43598 ;
43599 else if ((d->perm[i] / nelt4) == dremap.perm[j + 1])
43600 dfinal.perm[i] |= nelt4;
43601 else
43602 gcc_unreachable ();
43603 }
43604
43605 ok = expand_vec_perm_1 (&dremap);
43606 gcc_assert (ok);
43607
43608 ok = expand_vec_perm_1 (&dfinal);
43609 gcc_assert (ok);
43610
43611 return true;
43612 }
43613
43614 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
43615 a vector permutation using two instructions, vperm2f128 resp.
43616 vperm2i128 followed by any single in-lane permutation. */
43617
43618 static bool
43619 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d)
43620 {
43621 struct expand_vec_perm_d dfirst, dsecond;
43622 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, perm;
43623 bool ok;
43624
43625 if (!TARGET_AVX
43626 || GET_MODE_SIZE (d->vmode) != 32
43627 || (d->vmode != V8SFmode && d->vmode != V4DFmode && !TARGET_AVX2))
43628 return false;
43629
43630 dsecond = *d;
43631 dsecond.one_operand_p = false;
43632 dsecond.testing_p = true;
43633
43634 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
43635 immediate. For perm < 16 the second permutation uses
43636 d->op0 as first operand, for perm >= 16 it uses d->op1
43637 as first operand. The second operand is the result of
43638 vperm2[fi]128. */
43639 for (perm = 0; perm < 32; perm++)
43640 {
43641 /* Ignore permutations which do not move anything cross-lane. */
43642 if (perm < 16)
43643 {
43644 /* The second shuffle for e.g. V4DFmode has
43645 0123 and ABCD operands.
43646 Ignore AB23, as 23 is already in the second lane
43647 of the first operand. */
43648 if ((perm & 0xc) == (1 << 2)) continue;
43649 /* And 01CD, as 01 is in the first lane of the first
43650 operand. */
43651 if ((perm & 3) == 0) continue;
43652 /* And 4567, as then the vperm2[fi]128 doesn't change
43653 anything on the original 4567 second operand. */
43654 if ((perm & 0xf) == ((3 << 2) | 2)) continue;
43655 }
43656 else
43657 {
43658 /* The second shuffle for e.g. V4DFmode has
43659 4567 and ABCD operands.
43660 Ignore AB67, as 67 is already in the second lane
43661 of the first operand. */
43662 if ((perm & 0xc) == (3 << 2)) continue;
43663 /* And 45CD, as 45 is in the first lane of the first
43664 operand. */
43665 if ((perm & 3) == 2) continue;
43666 /* And 0123, as then the vperm2[fi]128 doesn't change
43667 anything on the original 0123 first operand. */
43668 if ((perm & 0xf) == (1 << 2)) continue;
43669 }
43670
43671 for (i = 0; i < nelt; i++)
43672 {
43673 j = d->perm[i] / nelt2;
43674 if (j == ((perm >> (2 * (i >= nelt2))) & 3))
43675 dsecond.perm[i] = nelt + (i & nelt2) + (d->perm[i] & (nelt2 - 1));
43676 else if (j == (unsigned) (i >= nelt2) + 2 * (perm >= 16))
43677 dsecond.perm[i] = d->perm[i] & (nelt - 1);
43678 else
43679 break;
43680 }
43681
43682 if (i == nelt)
43683 {
43684 start_sequence ();
43685 ok = expand_vec_perm_1 (&dsecond);
43686 end_sequence ();
43687 }
43688 else
43689 ok = false;
43690
43691 if (ok)
43692 {
43693 if (d->testing_p)
43694 return true;
43695
43696 /* Found a usable second shuffle. dfirst will be
43697 vperm2f128 on d->op0 and d->op1. */
43698 dsecond.testing_p = false;
43699 dfirst = *d;
43700 dfirst.target = gen_reg_rtx (d->vmode);
43701 for (i = 0; i < nelt; i++)
43702 dfirst.perm[i] = (i & (nelt2 - 1))
43703 + ((perm >> (2 * (i >= nelt2))) & 3) * nelt2;
43704
43705 ok = expand_vec_perm_1 (&dfirst);
43706 gcc_assert (ok);
43707
43708 /* And dsecond is some single insn shuffle, taking
43709 d->op0 and result of vperm2f128 (if perm < 16) or
43710 d->op1 and result of vperm2f128 (otherwise). */
43711 dsecond.op1 = dfirst.target;
43712 if (perm >= 16)
43713 dsecond.op0 = dfirst.op1;
43714
43715 ok = expand_vec_perm_1 (&dsecond);
43716 gcc_assert (ok);
43717
43718 return true;
43719 }
43720
43721 /* For one operand, the only useful vperm2f128 permutation is 0x10. */
43722 if (d->one_operand_p)
43723 return false;
43724 }
43725
43726 return false;
43727 }
43728
43729 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43730 a two vector permutation using 2 intra-lane interleave insns
43731 and cross-lane shuffle for 32-byte vectors. */
43732
43733 static bool
43734 expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
43735 {
43736 unsigned i, nelt;
43737 rtx (*gen) (rtx, rtx, rtx);
43738
43739 if (d->one_operand_p)
43740 return false;
43741 if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
43742 ;
43743 else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
43744 ;
43745 else
43746 return false;
43747
43748 nelt = d->nelt;
43749 if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
43750 return false;
43751 for (i = 0; i < nelt; i += 2)
43752 if (d->perm[i] != d->perm[0] + i / 2
43753 || d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
43754 return false;
43755
43756 if (d->testing_p)
43757 return true;
43758
43759 switch (d->vmode)
43760 {
43761 case V32QImode:
43762 if (d->perm[0])
43763 gen = gen_vec_interleave_highv32qi;
43764 else
43765 gen = gen_vec_interleave_lowv32qi;
43766 break;
43767 case V16HImode:
43768 if (d->perm[0])
43769 gen = gen_vec_interleave_highv16hi;
43770 else
43771 gen = gen_vec_interleave_lowv16hi;
43772 break;
43773 case V8SImode:
43774 if (d->perm[0])
43775 gen = gen_vec_interleave_highv8si;
43776 else
43777 gen = gen_vec_interleave_lowv8si;
43778 break;
43779 case V4DImode:
43780 if (d->perm[0])
43781 gen = gen_vec_interleave_highv4di;
43782 else
43783 gen = gen_vec_interleave_lowv4di;
43784 break;
43785 case V8SFmode:
43786 if (d->perm[0])
43787 gen = gen_vec_interleave_highv8sf;
43788 else
43789 gen = gen_vec_interleave_lowv8sf;
43790 break;
43791 case V4DFmode:
43792 if (d->perm[0])
43793 gen = gen_vec_interleave_highv4df;
43794 else
43795 gen = gen_vec_interleave_lowv4df;
43796 break;
43797 default:
43798 gcc_unreachable ();
43799 }
43800
43801 emit_insn (gen (d->target, d->op0, d->op1));
43802 return true;
43803 }
43804
43805 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
43806 a single vector permutation using a single intra-lane vector
43807 permutation, vperm2f128 swapping the lanes and vblend* insn blending
43808 the non-swapped and swapped vectors together. */
43809
43810 static bool
43811 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
43812 {
43813 struct expand_vec_perm_d dfirst, dsecond;
43814 unsigned i, j, msk, nelt = d->nelt, nelt2 = nelt / 2;
43815 rtx seq;
43816 bool ok;
43817 rtx (*blend) (rtx, rtx, rtx, rtx) = NULL;
43818
43819 if (!TARGET_AVX
43820 || TARGET_AVX2
43821 || (d->vmode != V8SFmode && d->vmode != V4DFmode)
43822 || !d->one_operand_p)
43823 return false;
43824
43825 dfirst = *d;
43826 for (i = 0; i < nelt; i++)
43827 dfirst.perm[i] = 0xff;
43828 for (i = 0, msk = 0; i < nelt; i++)
43829 {
43830 j = (d->perm[i] & nelt2) ? i | nelt2 : i & ~nelt2;
43831 if (dfirst.perm[j] != 0xff && dfirst.perm[j] != d->perm[i])
43832 return false;
43833 dfirst.perm[j] = d->perm[i];
43834 if (j != i)
43835 msk |= (1 << i);
43836 }
43837 for (i = 0; i < nelt; i++)
43838 if (dfirst.perm[i] == 0xff)
43839 dfirst.perm[i] = i;
43840
43841 if (!d->testing_p)
43842 dfirst.target = gen_reg_rtx (dfirst.vmode);
43843
43844 start_sequence ();
43845 ok = expand_vec_perm_1 (&dfirst);
43846 seq = get_insns ();
43847 end_sequence ();
43848
43849 if (!ok)
43850 return false;
43851
43852 if (d->testing_p)
43853 return true;
43854
43855 emit_insn (seq);
43856
43857 dsecond = *d;
43858 dsecond.op0 = dfirst.target;
43859 dsecond.op1 = dfirst.target;
43860 dsecond.one_operand_p = true;
43861 dsecond.target = gen_reg_rtx (dsecond.vmode);
43862 for (i = 0; i < nelt; i++)
43863 dsecond.perm[i] = i ^ nelt2;
43864
43865 ok = expand_vec_perm_1 (&dsecond);
43866 gcc_assert (ok);
43867
43868 blend = d->vmode == V8SFmode ? gen_avx_blendps256 : gen_avx_blendpd256;
43869 emit_insn (blend (d->target, dfirst.target, dsecond.target, GEN_INT (msk)));
43870 return true;
43871 }
43872
43873 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
43874 permutation using two vperm2f128, followed by a vshufpd insn blending
43875 the two vectors together. */
43876
43877 static bool
43878 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d *d)
43879 {
43880 struct expand_vec_perm_d dfirst, dsecond, dthird;
43881 bool ok;
43882
43883 if (!TARGET_AVX || (d->vmode != V4DFmode))
43884 return false;
43885
43886 if (d->testing_p)
43887 return true;
43888
43889 dfirst = *d;
43890 dsecond = *d;
43891 dthird = *d;
43892
43893 dfirst.perm[0] = (d->perm[0] & ~1);
43894 dfirst.perm[1] = (d->perm[0] & ~1) + 1;
43895 dfirst.perm[2] = (d->perm[2] & ~1);
43896 dfirst.perm[3] = (d->perm[2] & ~1) + 1;
43897 dsecond.perm[0] = (d->perm[1] & ~1);
43898 dsecond.perm[1] = (d->perm[1] & ~1) + 1;
43899 dsecond.perm[2] = (d->perm[3] & ~1);
43900 dsecond.perm[3] = (d->perm[3] & ~1) + 1;
43901 dthird.perm[0] = (d->perm[0] % 2);
43902 dthird.perm[1] = (d->perm[1] % 2) + 4;
43903 dthird.perm[2] = (d->perm[2] % 2) + 2;
43904 dthird.perm[3] = (d->perm[3] % 2) + 6;
43905
43906 dfirst.target = gen_reg_rtx (dfirst.vmode);
43907 dsecond.target = gen_reg_rtx (dsecond.vmode);
43908 dthird.op0 = dfirst.target;
43909 dthird.op1 = dsecond.target;
43910 dthird.one_operand_p = false;
43911
43912 canonicalize_perm (&dfirst);
43913 canonicalize_perm (&dsecond);
43914
43915 ok = expand_vec_perm_1 (&dfirst)
43916 && expand_vec_perm_1 (&dsecond)
43917 && expand_vec_perm_1 (&dthird);
43918
43919 gcc_assert (ok);
43920
43921 return true;
43922 }
43923
43924 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
43925 permutation with two pshufb insns and an ior. We should have already
43926 failed all two instruction sequences. */
43927
43928 static bool
43929 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
43930 {
43931 rtx rperm[2][16], vperm, l, h, op, m128;
43932 unsigned int i, nelt, eltsz;
43933
43934 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
43935 return false;
43936 gcc_assert (!d->one_operand_p);
43937
43938 if (d->testing_p)
43939 return true;
43940
43941 nelt = d->nelt;
43942 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
43943
43944 /* Generate two permutation masks. If the required element is within
43945 the given vector it is shuffled into the proper lane. If the required
43946 element is in the other vector, force a zero into the lane by setting
43947 bit 7 in the permutation mask. */
43948 m128 = GEN_INT (-128);
43949 for (i = 0; i < nelt; ++i)
43950 {
43951 unsigned j, e = d->perm[i];
43952 unsigned which = (e >= nelt);
43953 if (e >= nelt)
43954 e -= nelt;
43955
43956 for (j = 0; j < eltsz; ++j)
43957 {
43958 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
43959 rperm[1-which][i*eltsz + j] = m128;
43960 }
43961 }
43962
43963 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
43964 vperm = force_reg (V16QImode, vperm);
43965
43966 l = gen_reg_rtx (V16QImode);
43967 op = gen_lowpart (V16QImode, d->op0);
43968 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
43969
43970 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
43971 vperm = force_reg (V16QImode, vperm);
43972
43973 h = gen_reg_rtx (V16QImode);
43974 op = gen_lowpart (V16QImode, d->op1);
43975 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
43976
43977 op = d->target;
43978 if (d->vmode != V16QImode)
43979 op = gen_reg_rtx (V16QImode);
43980 emit_insn (gen_iorv16qi3 (op, l, h));
43981 if (op != d->target)
43982 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
43983
43984 return true;
43985 }
43986
43987 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
43988 with two vpshufb insns, vpermq and vpor. We should have already failed
43989 all two or three instruction sequences. */
43990
43991 static bool
43992 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
43993 {
43994 rtx rperm[2][32], vperm, l, h, hp, op, m128;
43995 unsigned int i, nelt, eltsz;
43996
43997 if (!TARGET_AVX2
43998 || !d->one_operand_p
43999 || (d->vmode != V32QImode && d->vmode != V16HImode))
44000 return false;
44001
44002 if (d->testing_p)
44003 return true;
44004
44005 nelt = d->nelt;
44006 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
44007
44008 /* Generate two permutation masks. If the required element is within
44009 the same lane, it is shuffled in. If the required element from the
44010 other lane, force a zero by setting bit 7 in the permutation mask.
44011 In the other mask the mask has non-negative elements if element
44012 is requested from the other lane, but also moved to the other lane,
44013 so that the result of vpshufb can have the two V2TImode halves
44014 swapped. */
44015 m128 = GEN_INT (-128);
44016 for (i = 0; i < nelt; ++i)
44017 {
44018 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
44019 unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
44020
44021 for (j = 0; j < eltsz; ++j)
44022 {
44023 rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j);
44024 rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128;
44025 }
44026 }
44027
44028 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
44029 vperm = force_reg (V32QImode, vperm);
44030
44031 h = gen_reg_rtx (V32QImode);
44032 op = gen_lowpart (V32QImode, d->op0);
44033 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
44034
44035 /* Swap the 128-byte lanes of h into hp. */
44036 hp = gen_reg_rtx (V4DImode);
44037 op = gen_lowpart (V4DImode, h);
44038 emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx,
44039 const1_rtx));
44040
44041 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
44042 vperm = force_reg (V32QImode, vperm);
44043
44044 l = gen_reg_rtx (V32QImode);
44045 op = gen_lowpart (V32QImode, d->op0);
44046 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
44047
44048 op = d->target;
44049 if (d->vmode != V32QImode)
44050 op = gen_reg_rtx (V32QImode);
44051 emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp)));
44052 if (op != d->target)
44053 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
44054
44055 return true;
44056 }
44057
44058 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
44059 and extract-odd permutations of two V32QImode and V16QImode operand
44060 with two vpshufb insns, vpor and vpermq. We should have already
44061 failed all two or three instruction sequences. */
44062
44063 static bool
44064 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
44065 {
44066 rtx rperm[2][32], vperm, l, h, ior, op, m128;
44067 unsigned int i, nelt, eltsz;
44068
44069 if (!TARGET_AVX2
44070 || d->one_operand_p
44071 || (d->vmode != V32QImode && d->vmode != V16HImode))
44072 return false;
44073
44074 for (i = 0; i < d->nelt; ++i)
44075 if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2))
44076 return false;
44077
44078 if (d->testing_p)
44079 return true;
44080
44081 nelt = d->nelt;
44082 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
44083
44084 /* Generate two permutation masks. In the first permutation mask
44085 the first quarter will contain indexes for the first half
44086 of the op0, the second quarter will contain bit 7 set, third quarter
44087 will contain indexes for the second half of the op0 and the
44088 last quarter bit 7 set. In the second permutation mask
44089 the first quarter will contain bit 7 set, the second quarter
44090 indexes for the first half of the op1, the third quarter bit 7 set
44091 and last quarter indexes for the second half of the op1.
44092 I.e. the first mask e.g. for V32QImode extract even will be:
44093 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
44094 (all values masked with 0xf except for -128) and second mask
44095 for extract even will be
44096 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
44097 m128 = GEN_INT (-128);
44098 for (i = 0; i < nelt; ++i)
44099 {
44100 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
44101 unsigned which = d->perm[i] >= nelt;
44102 unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0;
44103
44104 for (j = 0; j < eltsz; ++j)
44105 {
44106 rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j);
44107 rperm[1 - which][(i * eltsz + j) ^ xorv] = m128;
44108 }
44109 }
44110
44111 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
44112 vperm = force_reg (V32QImode, vperm);
44113
44114 l = gen_reg_rtx (V32QImode);
44115 op = gen_lowpart (V32QImode, d->op0);
44116 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
44117
44118 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
44119 vperm = force_reg (V32QImode, vperm);
44120
44121 h = gen_reg_rtx (V32QImode);
44122 op = gen_lowpart (V32QImode, d->op1);
44123 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
44124
44125 ior = gen_reg_rtx (V32QImode);
44126 emit_insn (gen_iorv32qi3 (ior, l, h));
44127
44128 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
44129 op = gen_reg_rtx (V4DImode);
44130 ior = gen_lowpart (V4DImode, ior);
44131 emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx,
44132 const1_rtx, GEN_INT (3)));
44133 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
44134
44135 return true;
44136 }
44137
44138 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
44139 and extract-odd permutations. */
44140
44141 static bool
44142 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
44143 {
44144 rtx t1, t2, t3, t4, t5;
44145
44146 switch (d->vmode)
44147 {
44148 case V4DFmode:
44149 if (d->testing_p)
44150 break;
44151 t1 = gen_reg_rtx (V4DFmode);
44152 t2 = gen_reg_rtx (V4DFmode);
44153
44154 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
44155 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
44156 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
44157
44158 /* Now an unpck[lh]pd will produce the result required. */
44159 if (odd)
44160 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
44161 else
44162 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
44163 emit_insn (t3);
44164 break;
44165
44166 case V8SFmode:
44167 {
44168 int mask = odd ? 0xdd : 0x88;
44169
44170 if (d->testing_p)
44171 break;
44172 t1 = gen_reg_rtx (V8SFmode);
44173 t2 = gen_reg_rtx (V8SFmode);
44174 t3 = gen_reg_rtx (V8SFmode);
44175
44176 /* Shuffle within the 128-bit lanes to produce:
44177 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
44178 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
44179 GEN_INT (mask)));
44180
44181 /* Shuffle the lanes around to produce:
44182 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
44183 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
44184 GEN_INT (0x3)));
44185
44186 /* Shuffle within the 128-bit lanes to produce:
44187 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
44188 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
44189
44190 /* Shuffle within the 128-bit lanes to produce:
44191 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
44192 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
44193
44194 /* Shuffle the lanes around to produce:
44195 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
44196 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
44197 GEN_INT (0x20)));
44198 }
44199 break;
44200
44201 case V2DFmode:
44202 case V4SFmode:
44203 case V2DImode:
44204 case V4SImode:
44205 /* These are always directly implementable by expand_vec_perm_1. */
44206 gcc_unreachable ();
44207
44208 case V8HImode:
44209 if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB)
44210 return expand_vec_perm_pshufb2 (d);
44211 else
44212 {
44213 if (d->testing_p)
44214 break;
44215 /* We need 2*log2(N)-1 operations to achieve odd/even
44216 with interleave. */
44217 t1 = gen_reg_rtx (V8HImode);
44218 t2 = gen_reg_rtx (V8HImode);
44219 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
44220 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
44221 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
44222 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
44223 if (odd)
44224 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
44225 else
44226 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
44227 emit_insn (t3);
44228 }
44229 break;
44230
44231 case V16QImode:
44232 if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB)
44233 return expand_vec_perm_pshufb2 (d);
44234 else
44235 {
44236 if (d->testing_p)
44237 break;
44238 t1 = gen_reg_rtx (V16QImode);
44239 t2 = gen_reg_rtx (V16QImode);
44240 t3 = gen_reg_rtx (V16QImode);
44241 emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
44242 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
44243 emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
44244 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
44245 emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
44246 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
44247 if (odd)
44248 t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
44249 else
44250 t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
44251 emit_insn (t3);
44252 }
44253 break;
44254
44255 case V16HImode:
44256 case V32QImode:
44257 return expand_vec_perm_vpshufb2_vpermq_even_odd (d);
44258
44259 case V4DImode:
44260 if (!TARGET_AVX2)
44261 {
44262 struct expand_vec_perm_d d_copy = *d;
44263 d_copy.vmode = V4DFmode;
44264 if (d->testing_p)
44265 d_copy.target = gen_lowpart (V4DFmode, d->target);
44266 else
44267 d_copy.target = gen_reg_rtx (V4DFmode);
44268 d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
44269 d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
44270 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
44271 {
44272 if (!d->testing_p)
44273 emit_move_insn (d->target,
44274 gen_lowpart (V4DImode, d_copy.target));
44275 return true;
44276 }
44277 return false;
44278 }
44279
44280 if (d->testing_p)
44281 break;
44282
44283 t1 = gen_reg_rtx (V4DImode);
44284 t2 = gen_reg_rtx (V4DImode);
44285
44286 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
44287 emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20)));
44288 emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31)));
44289
44290 /* Now an vpunpck[lh]qdq will produce the result required. */
44291 if (odd)
44292 t3 = gen_avx2_interleave_highv4di (d->target, t1, t2);
44293 else
44294 t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2);
44295 emit_insn (t3);
44296 break;
44297
44298 case V8SImode:
44299 if (!TARGET_AVX2)
44300 {
44301 struct expand_vec_perm_d d_copy = *d;
44302 d_copy.vmode = V8SFmode;
44303 if (d->testing_p)
44304 d_copy.target = gen_lowpart (V8SFmode, d->target);
44305 else
44306 d_copy.target = gen_reg_rtx (V8SFmode);
44307 d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
44308 d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
44309 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
44310 {
44311 if (!d->testing_p)
44312 emit_move_insn (d->target,
44313 gen_lowpart (V8SImode, d_copy.target));
44314 return true;
44315 }
44316 return false;
44317 }
44318
44319 if (d->testing_p)
44320 break;
44321
44322 t1 = gen_reg_rtx (V8SImode);
44323 t2 = gen_reg_rtx (V8SImode);
44324 t3 = gen_reg_rtx (V4DImode);
44325 t4 = gen_reg_rtx (V4DImode);
44326 t5 = gen_reg_rtx (V4DImode);
44327
44328 /* Shuffle the lanes around into
44329 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
44330 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, d->op0),
44331 gen_lowpart (V4DImode, d->op1),
44332 GEN_INT (0x20)));
44333 emit_insn (gen_avx2_permv2ti (t4, gen_lowpart (V4DImode, d->op0),
44334 gen_lowpart (V4DImode, d->op1),
44335 GEN_INT (0x31)));
44336
44337 /* Swap the 2nd and 3rd position in each lane into
44338 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
44339 emit_insn (gen_avx2_pshufdv3 (t1, gen_lowpart (V8SImode, t3),
44340 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
44341 emit_insn (gen_avx2_pshufdv3 (t2, gen_lowpart (V8SImode, t4),
44342 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
44343
44344 /* Now an vpunpck[lh]qdq will produce
44345 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
44346 if (odd)
44347 t3 = gen_avx2_interleave_highv4di (t5, gen_lowpart (V4DImode, t1),
44348 gen_lowpart (V4DImode, t2));
44349 else
44350 t3 = gen_avx2_interleave_lowv4di (t5, gen_lowpart (V4DImode, t1),
44351 gen_lowpart (V4DImode, t2));
44352 emit_insn (t3);
44353 emit_move_insn (d->target, gen_lowpart (V8SImode, t5));
44354 break;
44355
44356 default:
44357 gcc_unreachable ();
44358 }
44359
44360 return true;
44361 }
44362
44363 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
44364 extract-even and extract-odd permutations. */
44365
44366 static bool
44367 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
44368 {
44369 unsigned i, odd, nelt = d->nelt;
44370
44371 odd = d->perm[0];
44372 if (odd != 0 && odd != 1)
44373 return false;
44374
44375 for (i = 1; i < nelt; ++i)
44376 if (d->perm[i] != 2 * i + odd)
44377 return false;
44378
44379 return expand_vec_perm_even_odd_1 (d, odd);
44380 }
44381
44382 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
44383 permutations. We assume that expand_vec_perm_1 has already failed. */
44384
44385 static bool
44386 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
44387 {
44388 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
44389 enum machine_mode vmode = d->vmode;
44390 unsigned char perm2[4];
44391 rtx op0 = d->op0, dest;
44392 bool ok;
44393
44394 switch (vmode)
44395 {
44396 case V4DFmode:
44397 case V8SFmode:
44398 /* These are special-cased in sse.md so that we can optionally
44399 use the vbroadcast instruction. They expand to two insns
44400 if the input happens to be in a register. */
44401 gcc_unreachable ();
44402
44403 case V2DFmode:
44404 case V2DImode:
44405 case V4SFmode:
44406 case V4SImode:
44407 /* These are always implementable using standard shuffle patterns. */
44408 gcc_unreachable ();
44409
44410 case V8HImode:
44411 case V16QImode:
44412 /* These can be implemented via interleave. We save one insn by
44413 stopping once we have promoted to V4SImode and then use pshufd. */
44414 if (d->testing_p)
44415 return true;
44416 do
44417 {
44418 rtx dest;
44419 rtx (*gen) (rtx, rtx, rtx)
44420 = vmode == V16QImode ? gen_vec_interleave_lowv16qi
44421 : gen_vec_interleave_lowv8hi;
44422
44423 if (elt >= nelt2)
44424 {
44425 gen = vmode == V16QImode ? gen_vec_interleave_highv16qi
44426 : gen_vec_interleave_highv8hi;
44427 elt -= nelt2;
44428 }
44429 nelt2 /= 2;
44430
44431 dest = gen_reg_rtx (vmode);
44432 emit_insn (gen (dest, op0, op0));
44433 vmode = get_mode_wider_vector (vmode);
44434 op0 = gen_lowpart (vmode, dest);
44435 }
44436 while (vmode != V4SImode);
44437
44438 memset (perm2, elt, 4);
44439 dest = gen_reg_rtx (V4SImode);
44440 ok = expand_vselect (dest, op0, perm2, 4, d->testing_p);
44441 gcc_assert (ok);
44442 if (!d->testing_p)
44443 emit_move_insn (d->target, gen_lowpart (d->vmode, dest));
44444 return true;
44445
44446 case V32QImode:
44447 case V16HImode:
44448 case V8SImode:
44449 case V4DImode:
44450 /* For AVX2 broadcasts of the first element vpbroadcast* or
44451 vpermq should be used by expand_vec_perm_1. */
44452 gcc_assert (!TARGET_AVX2 || d->perm[0]);
44453 return false;
44454
44455 default:
44456 gcc_unreachable ();
44457 }
44458 }
44459
44460 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
44461 broadcast permutations. */
44462
44463 static bool
44464 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
44465 {
44466 unsigned i, elt, nelt = d->nelt;
44467
44468 if (!d->one_operand_p)
44469 return false;
44470
44471 elt = d->perm[0];
44472 for (i = 1; i < nelt; ++i)
44473 if (d->perm[i] != elt)
44474 return false;
44475
44476 return expand_vec_perm_broadcast_1 (d);
44477 }
44478
44479 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
44480 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
44481 all the shorter instruction sequences. */
44482
44483 static bool
44484 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
44485 {
44486 rtx rperm[4][32], vperm, l[2], h[2], op, m128;
44487 unsigned int i, nelt, eltsz;
44488 bool used[4];
44489
44490 if (!TARGET_AVX2
44491 || d->one_operand_p
44492 || (d->vmode != V32QImode && d->vmode != V16HImode))
44493 return false;
44494
44495 if (d->testing_p)
44496 return true;
44497
44498 nelt = d->nelt;
44499 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
44500
44501 /* Generate 4 permutation masks. If the required element is within
44502 the same lane, it is shuffled in. If the required element from the
44503 other lane, force a zero by setting bit 7 in the permutation mask.
44504 In the other mask the mask has non-negative elements if element
44505 is requested from the other lane, but also moved to the other lane,
44506 so that the result of vpshufb can have the two V2TImode halves
44507 swapped. */
44508 m128 = GEN_INT (-128);
44509 for (i = 0; i < 32; ++i)
44510 {
44511 rperm[0][i] = m128;
44512 rperm[1][i] = m128;
44513 rperm[2][i] = m128;
44514 rperm[3][i] = m128;
44515 }
44516 used[0] = false;
44517 used[1] = false;
44518 used[2] = false;
44519 used[3] = false;
44520 for (i = 0; i < nelt; ++i)
44521 {
44522 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
44523 unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
44524 unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0);
44525
44526 for (j = 0; j < eltsz; ++j)
44527 rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j);
44528 used[which] = true;
44529 }
44530
44531 for (i = 0; i < 2; ++i)
44532 {
44533 if (!used[2 * i + 1])
44534 {
44535 h[i] = NULL_RTX;
44536 continue;
44537 }
44538 vperm = gen_rtx_CONST_VECTOR (V32QImode,
44539 gen_rtvec_v (32, rperm[2 * i + 1]));
44540 vperm = force_reg (V32QImode, vperm);
44541 h[i] = gen_reg_rtx (V32QImode);
44542 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
44543 emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm));
44544 }
44545
44546 /* Swap the 128-byte lanes of h[X]. */
44547 for (i = 0; i < 2; ++i)
44548 {
44549 if (h[i] == NULL_RTX)
44550 continue;
44551 op = gen_reg_rtx (V4DImode);
44552 emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]),
44553 const2_rtx, GEN_INT (3), const0_rtx,
44554 const1_rtx));
44555 h[i] = gen_lowpart (V32QImode, op);
44556 }
44557
44558 for (i = 0; i < 2; ++i)
44559 {
44560 if (!used[2 * i])
44561 {
44562 l[i] = NULL_RTX;
44563 continue;
44564 }
44565 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i]));
44566 vperm = force_reg (V32QImode, vperm);
44567 l[i] = gen_reg_rtx (V32QImode);
44568 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
44569 emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm));
44570 }
44571
44572 for (i = 0; i < 2; ++i)
44573 {
44574 if (h[i] && l[i])
44575 {
44576 op = gen_reg_rtx (V32QImode);
44577 emit_insn (gen_iorv32qi3 (op, l[i], h[i]));
44578 l[i] = op;
44579 }
44580 else if (h[i])
44581 l[i] = h[i];
44582 }
44583
44584 gcc_assert (l[0] && l[1]);
44585 op = d->target;
44586 if (d->vmode != V32QImode)
44587 op = gen_reg_rtx (V32QImode);
44588 emit_insn (gen_iorv32qi3 (op, l[0], l[1]));
44589 if (op != d->target)
44590 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
44591 return true;
44592 }
44593
44594 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
44595 With all of the interface bits taken care of, perform the expansion
44596 in D and return true on success. */
44597
44598 static bool
44599 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
44600 {
44601 /* Try a single instruction expansion. */
44602 if (expand_vec_perm_1 (d))
44603 return true;
44604
44605 /* Try sequences of two instructions. */
44606
44607 if (expand_vec_perm_pshuflw_pshufhw (d))
44608 return true;
44609
44610 if (expand_vec_perm_palignr (d))
44611 return true;
44612
44613 if (expand_vec_perm_interleave2 (d))
44614 return true;
44615
44616 if (expand_vec_perm_broadcast (d))
44617 return true;
44618
44619 if (expand_vec_perm_vpermq_perm_1 (d))
44620 return true;
44621
44622 if (expand_vec_perm_vperm2f128 (d))
44623 return true;
44624
44625 if (expand_vec_perm_pblendv (d))
44626 return true;
44627
44628 /* Try sequences of three instructions. */
44629
44630 if (expand_vec_perm_2vperm2f128_vshuf (d))
44631 return true;
44632
44633 if (expand_vec_perm_pshufb2 (d))
44634 return true;
44635
44636 if (expand_vec_perm_interleave3 (d))
44637 return true;
44638
44639 if (expand_vec_perm_vperm2f128_vblend (d))
44640 return true;
44641
44642 /* Try sequences of four instructions. */
44643
44644 if (expand_vec_perm_vpshufb2_vpermq (d))
44645 return true;
44646
44647 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
44648 return true;
44649
44650 /* ??? Look for narrow permutations whose element orderings would
44651 allow the promotion to a wider mode. */
44652
44653 /* ??? Look for sequences of interleave or a wider permute that place
44654 the data into the correct lanes for a half-vector shuffle like
44655 pshuf[lh]w or vpermilps. */
44656
44657 /* ??? Look for sequences of interleave that produce the desired results.
44658 The combinatorics of punpck[lh] get pretty ugly... */
44659
44660 if (expand_vec_perm_even_odd (d))
44661 return true;
44662
44663 /* Even longer sequences. */
44664 if (expand_vec_perm_vpshufb4_vpermq2 (d))
44665 return true;
44666
44667 return false;
44668 }
44669
44670 /* If a permutation only uses one operand, make it clear. Returns true
44671 if the permutation references both operands. */
44672
44673 static bool
44674 canonicalize_perm (struct expand_vec_perm_d *d)
44675 {
44676 int i, which, nelt = d->nelt;
44677
44678 for (i = which = 0; i < nelt; ++i)
44679 which |= (d->perm[i] < nelt ? 1 : 2);
44680
44681 d->one_operand_p = true;
44682 switch (which)
44683 {
44684 default:
44685 gcc_unreachable();
44686
44687 case 3:
44688 if (!rtx_equal_p (d->op0, d->op1))
44689 {
44690 d->one_operand_p = false;
44691 break;
44692 }
44693 /* The elements of PERM do not suggest that only the first operand
44694 is used, but both operands are identical. Allow easier matching
44695 of the permutation by folding the permutation into the single
44696 input vector. */
44697 /* FALLTHRU */
44698
44699 case 2:
44700 for (i = 0; i < nelt; ++i)
44701 d->perm[i] &= nelt - 1;
44702 d->op0 = d->op1;
44703 break;
44704
44705 case 1:
44706 d->op1 = d->op0;
44707 break;
44708 }
44709
44710 return (which == 3);
44711 }
44712
44713 bool
44714 ix86_expand_vec_perm_const (rtx operands[4])
44715 {
44716 struct expand_vec_perm_d d;
44717 unsigned char perm[MAX_VECT_LEN];
44718 int i, nelt;
44719 bool two_args;
44720 rtx sel;
44721
44722 d.target = operands[0];
44723 d.op0 = operands[1];
44724 d.op1 = operands[2];
44725 sel = operands[3];
44726
44727 d.vmode = GET_MODE (d.target);
44728 gcc_assert (VECTOR_MODE_P (d.vmode));
44729 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
44730 d.testing_p = false;
44731
44732 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
44733 gcc_assert (XVECLEN (sel, 0) == nelt);
44734 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
44735
44736 for (i = 0; i < nelt; ++i)
44737 {
44738 rtx e = XVECEXP (sel, 0, i);
44739 int ei = INTVAL (e) & (2 * nelt - 1);
44740 d.perm[i] = ei;
44741 perm[i] = ei;
44742 }
44743
44744 two_args = canonicalize_perm (&d);
44745
44746 if (ix86_expand_vec_perm_const_1 (&d))
44747 return true;
44748
44749 /* If the selector says both arguments are needed, but the operands are the
44750 same, the above tried to expand with one_operand_p and flattened selector.
44751 If that didn't work, retry without one_operand_p; we succeeded with that
44752 during testing. */
44753 if (two_args && d.one_operand_p)
44754 {
44755 d.one_operand_p = false;
44756 memcpy (d.perm, perm, sizeof (perm));
44757 return ix86_expand_vec_perm_const_1 (&d);
44758 }
44759
44760 return false;
44761 }
44762
44763 /* Implement targetm.vectorize.vec_perm_const_ok. */
44764
44765 static bool
44766 ix86_vectorize_vec_perm_const_ok (enum machine_mode vmode,
44767 const unsigned char *sel)
44768 {
44769 struct expand_vec_perm_d d;
44770 unsigned int i, nelt, which;
44771 bool ret;
44772
44773 d.vmode = vmode;
44774 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
44775 d.testing_p = true;
44776
44777 /* Given sufficient ISA support we can just return true here
44778 for selected vector modes. */
44779 if (d.vmode == V16SImode || d.vmode == V16SFmode
44780 || d.vmode == V8DFmode || d.vmode == V8DImode)
44781 /* All implementable with a single vpermi2 insn. */
44782 return true;
44783 if (GET_MODE_SIZE (d.vmode) == 16)
44784 {
44785 /* All implementable with a single vpperm insn. */
44786 if (TARGET_XOP)
44787 return true;
44788 /* All implementable with 2 pshufb + 1 ior. */
44789 if (TARGET_SSSE3)
44790 return true;
44791 /* All implementable with shufpd or unpck[lh]pd. */
44792 if (d.nelt == 2)
44793 return true;
44794 }
44795
44796 /* Extract the values from the vector CST into the permutation
44797 array in D. */
44798 memcpy (d.perm, sel, nelt);
44799 for (i = which = 0; i < nelt; ++i)
44800 {
44801 unsigned char e = d.perm[i];
44802 gcc_assert (e < 2 * nelt);
44803 which |= (e < nelt ? 1 : 2);
44804 }
44805
44806 /* For all elements from second vector, fold the elements to first. */
44807 if (which == 2)
44808 for (i = 0; i < nelt; ++i)
44809 d.perm[i] -= nelt;
44810
44811 /* Check whether the mask can be applied to the vector type. */
44812 d.one_operand_p = (which != 3);
44813
44814 /* Implementable with shufps or pshufd. */
44815 if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode))
44816 return true;
44817
44818 /* Otherwise we have to go through the motions and see if we can
44819 figure out how to generate the requested permutation. */
44820 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
44821 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
44822 if (!d.one_operand_p)
44823 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
44824
44825 start_sequence ();
44826 ret = ix86_expand_vec_perm_const_1 (&d);
44827 end_sequence ();
44828
44829 return ret;
44830 }
44831
44832 void
44833 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
44834 {
44835 struct expand_vec_perm_d d;
44836 unsigned i, nelt;
44837
44838 d.target = targ;
44839 d.op0 = op0;
44840 d.op1 = op1;
44841 d.vmode = GET_MODE (targ);
44842 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
44843 d.one_operand_p = false;
44844 d.testing_p = false;
44845
44846 for (i = 0; i < nelt; ++i)
44847 d.perm[i] = i * 2 + odd;
44848
44849 /* We'll either be able to implement the permutation directly... */
44850 if (expand_vec_perm_1 (&d))
44851 return;
44852
44853 /* ... or we use the special-case patterns. */
44854 expand_vec_perm_even_odd_1 (&d, odd);
44855 }
44856
44857 static void
44858 ix86_expand_vec_interleave (rtx targ, rtx op0, rtx op1, bool high_p)
44859 {
44860 struct expand_vec_perm_d d;
44861 unsigned i, nelt, base;
44862 bool ok;
44863
44864 d.target = targ;
44865 d.op0 = op0;
44866 d.op1 = op1;
44867 d.vmode = GET_MODE (targ);
44868 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
44869 d.one_operand_p = false;
44870 d.testing_p = false;
44871
44872 base = high_p ? nelt / 2 : 0;
44873 for (i = 0; i < nelt / 2; ++i)
44874 {
44875 d.perm[i * 2] = i + base;
44876 d.perm[i * 2 + 1] = i + base + nelt;
44877 }
44878
44879 /* Note that for AVX this isn't one instruction. */
44880 ok = ix86_expand_vec_perm_const_1 (&d);
44881 gcc_assert (ok);
44882 }
44883
44884
44885 /* Expand a vector operation CODE for a V*QImode in terms of the
44886 same operation on V*HImode. */
44887
44888 void
44889 ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
44890 {
44891 enum machine_mode qimode = GET_MODE (dest);
44892 enum machine_mode himode;
44893 rtx (*gen_il) (rtx, rtx, rtx);
44894 rtx (*gen_ih) (rtx, rtx, rtx);
44895 rtx op1_l, op1_h, op2_l, op2_h, res_l, res_h;
44896 struct expand_vec_perm_d d;
44897 bool ok, full_interleave;
44898 bool uns_p = false;
44899 int i;
44900
44901 switch (qimode)
44902 {
44903 case V16QImode:
44904 himode = V8HImode;
44905 gen_il = gen_vec_interleave_lowv16qi;
44906 gen_ih = gen_vec_interleave_highv16qi;
44907 break;
44908 case V32QImode:
44909 himode = V16HImode;
44910 gen_il = gen_avx2_interleave_lowv32qi;
44911 gen_ih = gen_avx2_interleave_highv32qi;
44912 break;
44913 default:
44914 gcc_unreachable ();
44915 }
44916
44917 op2_l = op2_h = op2;
44918 switch (code)
44919 {
44920 case MULT:
44921 /* Unpack data such that we've got a source byte in each low byte of
44922 each word. We don't care what goes into the high byte of each word.
44923 Rather than trying to get zero in there, most convenient is to let
44924 it be a copy of the low byte. */
44925 op2_l = gen_reg_rtx (qimode);
44926 op2_h = gen_reg_rtx (qimode);
44927 emit_insn (gen_il (op2_l, op2, op2));
44928 emit_insn (gen_ih (op2_h, op2, op2));
44929 /* FALLTHRU */
44930
44931 op1_l = gen_reg_rtx (qimode);
44932 op1_h = gen_reg_rtx (qimode);
44933 emit_insn (gen_il (op1_l, op1, op1));
44934 emit_insn (gen_ih (op1_h, op1, op1));
44935 full_interleave = qimode == V16QImode;
44936 break;
44937
44938 case ASHIFT:
44939 case LSHIFTRT:
44940 uns_p = true;
44941 /* FALLTHRU */
44942 case ASHIFTRT:
44943 op1_l = gen_reg_rtx (himode);
44944 op1_h = gen_reg_rtx (himode);
44945 ix86_expand_sse_unpack (op1_l, op1, uns_p, false);
44946 ix86_expand_sse_unpack (op1_h, op1, uns_p, true);
44947 full_interleave = true;
44948 break;
44949 default:
44950 gcc_unreachable ();
44951 }
44952
44953 /* Perform the operation. */
44954 res_l = expand_simple_binop (himode, code, op1_l, op2_l, NULL_RTX,
44955 1, OPTAB_DIRECT);
44956 res_h = expand_simple_binop (himode, code, op1_h, op2_h, NULL_RTX,
44957 1, OPTAB_DIRECT);
44958 gcc_assert (res_l && res_h);
44959
44960 /* Merge the data back into the right place. */
44961 d.target = dest;
44962 d.op0 = gen_lowpart (qimode, res_l);
44963 d.op1 = gen_lowpart (qimode, res_h);
44964 d.vmode = qimode;
44965 d.nelt = GET_MODE_NUNITS (qimode);
44966 d.one_operand_p = false;
44967 d.testing_p = false;
44968
44969 if (full_interleave)
44970 {
44971 /* For SSE2, we used an full interleave, so the desired
44972 results are in the even elements. */
44973 for (i = 0; i < 32; ++i)
44974 d.perm[i] = i * 2;
44975 }
44976 else
44977 {
44978 /* For AVX, the interleave used above was not cross-lane. So the
44979 extraction is evens but with the second and third quarter swapped.
44980 Happily, that is even one insn shorter than even extraction. */
44981 for (i = 0; i < 32; ++i)
44982 d.perm[i] = i * 2 + ((i & 24) == 8 ? 16 : (i & 24) == 16 ? -16 : 0);
44983 }
44984
44985 ok = ix86_expand_vec_perm_const_1 (&d);
44986 gcc_assert (ok);
44987
44988 set_unique_reg_note (get_last_insn (), REG_EQUAL,
44989 gen_rtx_fmt_ee (code, qimode, op1, op2));
44990 }
44991
44992 /* Helper function of ix86_expand_mul_widen_evenodd. Return true
44993 if op is CONST_VECTOR with all odd elements equal to their
44994 preceding element. */
44995
44996 static bool
44997 const_vector_equal_evenodd_p (rtx op)
44998 {
44999 enum machine_mode mode = GET_MODE (op);
45000 int i, nunits = GET_MODE_NUNITS (mode);
45001 if (GET_CODE (op) != CONST_VECTOR
45002 || nunits != CONST_VECTOR_NUNITS (op))
45003 return false;
45004 for (i = 0; i < nunits; i += 2)
45005 if (CONST_VECTOR_ELT (op, i) != CONST_VECTOR_ELT (op, i + 1))
45006 return false;
45007 return true;
45008 }
45009
45010 void
45011 ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
45012 bool uns_p, bool odd_p)
45013 {
45014 enum machine_mode mode = GET_MODE (op1);
45015 enum machine_mode wmode = GET_MODE (dest);
45016 rtx x;
45017 rtx orig_op1 = op1, orig_op2 = op2;
45018
45019 if (!nonimmediate_operand (op1, mode))
45020 op1 = force_reg (mode, op1);
45021 if (!nonimmediate_operand (op2, mode))
45022 op2 = force_reg (mode, op2);
45023
45024 /* We only play even/odd games with vectors of SImode. */
45025 gcc_assert (mode == V4SImode || mode == V8SImode || mode == V16SImode);
45026
45027 /* If we're looking for the odd results, shift those members down to
45028 the even slots. For some cpus this is faster than a PSHUFD. */
45029 if (odd_p)
45030 {
45031 /* For XOP use vpmacsdqh, but only for smult, as it is only
45032 signed. */
45033 if (TARGET_XOP && mode == V4SImode && !uns_p)
45034 {
45035 x = force_reg (wmode, CONST0_RTX (wmode));
45036 emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x));
45037 return;
45038 }
45039
45040 x = GEN_INT (GET_MODE_UNIT_BITSIZE (mode));
45041 if (!const_vector_equal_evenodd_p (orig_op1))
45042 op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1),
45043 x, NULL, 1, OPTAB_DIRECT);
45044 if (!const_vector_equal_evenodd_p (orig_op2))
45045 op2 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op2),
45046 x, NULL, 1, OPTAB_DIRECT);
45047 op1 = gen_lowpart (mode, op1);
45048 op2 = gen_lowpart (mode, op2);
45049 }
45050
45051 if (mode == V16SImode)
45052 {
45053 if (uns_p)
45054 x = gen_vec_widen_umult_even_v16si (dest, op1, op2);
45055 else
45056 x = gen_vec_widen_smult_even_v16si (dest, op1, op2);
45057 }
45058 else if (mode == V8SImode)
45059 {
45060 if (uns_p)
45061 x = gen_vec_widen_umult_even_v8si (dest, op1, op2);
45062 else
45063 x = gen_vec_widen_smult_even_v8si (dest, op1, op2);
45064 }
45065 else if (uns_p)
45066 x = gen_vec_widen_umult_even_v4si (dest, op1, op2);
45067 else if (TARGET_SSE4_1)
45068 x = gen_sse4_1_mulv2siv2di3 (dest, op1, op2);
45069 else
45070 {
45071 rtx s1, s2, t0, t1, t2;
45072
45073 /* The easiest way to implement this without PMULDQ is to go through
45074 the motions as if we are performing a full 64-bit multiply. With
45075 the exception that we need to do less shuffling of the elements. */
45076
45077 /* Compute the sign-extension, aka highparts, of the two operands. */
45078 s1 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
45079 op1, pc_rtx, pc_rtx);
45080 s2 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
45081 op2, pc_rtx, pc_rtx);
45082
45083 /* Multiply LO(A) * HI(B), and vice-versa. */
45084 t1 = gen_reg_rtx (wmode);
45085 t2 = gen_reg_rtx (wmode);
45086 emit_insn (gen_vec_widen_umult_even_v4si (t1, s1, op2));
45087 emit_insn (gen_vec_widen_umult_even_v4si (t2, s2, op1));
45088
45089 /* Multiply LO(A) * LO(B). */
45090 t0 = gen_reg_rtx (wmode);
45091 emit_insn (gen_vec_widen_umult_even_v4si (t0, op1, op2));
45092
45093 /* Combine and shift the highparts into place. */
45094 t1 = expand_binop (wmode, add_optab, t1, t2, t1, 1, OPTAB_DIRECT);
45095 t1 = expand_binop (wmode, ashl_optab, t1, GEN_INT (32), t1,
45096 1, OPTAB_DIRECT);
45097
45098 /* Combine high and low parts. */
45099 force_expand_binop (wmode, add_optab, t0, t1, dest, 1, OPTAB_DIRECT);
45100 return;
45101 }
45102 emit_insn (x);
45103 }
45104
45105 void
45106 ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2,
45107 bool uns_p, bool high_p)
45108 {
45109 enum machine_mode wmode = GET_MODE (dest);
45110 enum machine_mode mode = GET_MODE (op1);
45111 rtx t1, t2, t3, t4, mask;
45112
45113 switch (mode)
45114 {
45115 case V4SImode:
45116 t1 = gen_reg_rtx (mode);
45117 t2 = gen_reg_rtx (mode);
45118 if (TARGET_XOP && !uns_p)
45119 {
45120 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
45121 shuffle the elements once so that all elements are in the right
45122 place for immediate use: { A C B D }. */
45123 emit_insn (gen_sse2_pshufd_1 (t1, op1, const0_rtx, const2_rtx,
45124 const1_rtx, GEN_INT (3)));
45125 emit_insn (gen_sse2_pshufd_1 (t2, op2, const0_rtx, const2_rtx,
45126 const1_rtx, GEN_INT (3)));
45127 }
45128 else
45129 {
45130 /* Put the elements into place for the multiply. */
45131 ix86_expand_vec_interleave (t1, op1, op1, high_p);
45132 ix86_expand_vec_interleave (t2, op2, op2, high_p);
45133 high_p = false;
45134 }
45135 ix86_expand_mul_widen_evenodd (dest, t1, t2, uns_p, high_p);
45136 break;
45137
45138 case V8SImode:
45139 /* Shuffle the elements between the lanes. After this we
45140 have { A B E F | C D G H } for each operand. */
45141 t1 = gen_reg_rtx (V4DImode);
45142 t2 = gen_reg_rtx (V4DImode);
45143 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, op1),
45144 const0_rtx, const2_rtx,
45145 const1_rtx, GEN_INT (3)));
45146 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, op2),
45147 const0_rtx, const2_rtx,
45148 const1_rtx, GEN_INT (3)));
45149
45150 /* Shuffle the elements within the lanes. After this we
45151 have { A A B B | C C D D } or { E E F F | G G H H }. */
45152 t3 = gen_reg_rtx (V8SImode);
45153 t4 = gen_reg_rtx (V8SImode);
45154 mask = GEN_INT (high_p
45155 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
45156 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
45157 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), mask));
45158 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), mask));
45159
45160 ix86_expand_mul_widen_evenodd (dest, t3, t4, uns_p, false);
45161 break;
45162
45163 case V8HImode:
45164 case V16HImode:
45165 t1 = expand_binop (mode, smul_optab, op1, op2, NULL_RTX,
45166 uns_p, OPTAB_DIRECT);
45167 t2 = expand_binop (mode,
45168 uns_p ? umul_highpart_optab : smul_highpart_optab,
45169 op1, op2, NULL_RTX, uns_p, OPTAB_DIRECT);
45170 gcc_assert (t1 && t2);
45171
45172 t3 = gen_reg_rtx (mode);
45173 ix86_expand_vec_interleave (t3, t1, t2, high_p);
45174 emit_move_insn (dest, gen_lowpart (wmode, t3));
45175 break;
45176
45177 case V16QImode:
45178 case V32QImode:
45179 t1 = gen_reg_rtx (wmode);
45180 t2 = gen_reg_rtx (wmode);
45181 ix86_expand_sse_unpack (t1, op1, uns_p, high_p);
45182 ix86_expand_sse_unpack (t2, op2, uns_p, high_p);
45183
45184 emit_insn (gen_rtx_SET (VOIDmode, dest, gen_rtx_MULT (wmode, t1, t2)));
45185 break;
45186
45187 default:
45188 gcc_unreachable ();
45189 }
45190 }
45191
45192 void
45193 ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2)
45194 {
45195 rtx res_1, res_2, res_3, res_4;
45196
45197 res_1 = gen_reg_rtx (V4SImode);
45198 res_2 = gen_reg_rtx (V4SImode);
45199 res_3 = gen_reg_rtx (V2DImode);
45200 res_4 = gen_reg_rtx (V2DImode);
45201 ix86_expand_mul_widen_evenodd (res_3, op1, op2, true, false);
45202 ix86_expand_mul_widen_evenodd (res_4, op1, op2, true, true);
45203
45204 /* Move the results in element 2 down to element 1; we don't care
45205 what goes in elements 2 and 3. Then we can merge the parts
45206 back together with an interleave.
45207
45208 Note that two other sequences were tried:
45209 (1) Use interleaves at the start instead of psrldq, which allows
45210 us to use a single shufps to merge things back at the end.
45211 (2) Use shufps here to combine the two vectors, then pshufd to
45212 put the elements in the correct order.
45213 In both cases the cost of the reformatting stall was too high
45214 and the overall sequence slower. */
45215
45216 emit_insn (gen_sse2_pshufd_1 (res_1, gen_lowpart (V4SImode, res_3),
45217 const0_rtx, const2_rtx,
45218 const0_rtx, const0_rtx));
45219 emit_insn (gen_sse2_pshufd_1 (res_2, gen_lowpart (V4SImode, res_4),
45220 const0_rtx, const2_rtx,
45221 const0_rtx, const0_rtx));
45222 res_1 = emit_insn (gen_vec_interleave_lowv4si (op0, res_1, res_2));
45223
45224 set_unique_reg_note (res_1, REG_EQUAL, gen_rtx_MULT (V4SImode, op1, op2));
45225 }
45226
45227 void
45228 ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
45229 {
45230 enum machine_mode mode = GET_MODE (op0);
45231 rtx t1, t2, t3, t4, t5, t6;
45232
45233 if (TARGET_XOP && mode == V2DImode)
45234 {
45235 /* op1: A,B,C,D, op2: E,F,G,H */
45236 op1 = gen_lowpart (V4SImode, op1);
45237 op2 = gen_lowpart (V4SImode, op2);
45238
45239 t1 = gen_reg_rtx (V4SImode);
45240 t2 = gen_reg_rtx (V4SImode);
45241 t3 = gen_reg_rtx (V2DImode);
45242 t4 = gen_reg_rtx (V2DImode);
45243
45244 /* t1: B,A,D,C */
45245 emit_insn (gen_sse2_pshufd_1 (t1, op1,
45246 GEN_INT (1),
45247 GEN_INT (0),
45248 GEN_INT (3),
45249 GEN_INT (2)));
45250
45251 /* t2: (B*E),(A*F),(D*G),(C*H) */
45252 emit_insn (gen_mulv4si3 (t2, t1, op2));
45253
45254 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
45255 emit_insn (gen_xop_phadddq (t3, t2));
45256
45257 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
45258 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
45259
45260 /* Multiply lower parts and add all */
45261 t5 = gen_reg_rtx (V2DImode);
45262 emit_insn (gen_vec_widen_umult_even_v4si (t5,
45263 gen_lowpart (V4SImode, op1),
45264 gen_lowpart (V4SImode, op2)));
45265 op0 = expand_binop (mode, add_optab, t5, t4, op0, 1, OPTAB_DIRECT);
45266
45267 }
45268 else
45269 {
45270 enum machine_mode nmode;
45271 rtx (*umul) (rtx, rtx, rtx);
45272
45273 if (mode == V2DImode)
45274 {
45275 umul = gen_vec_widen_umult_even_v4si;
45276 nmode = V4SImode;
45277 }
45278 else if (mode == V4DImode)
45279 {
45280 umul = gen_vec_widen_umult_even_v8si;
45281 nmode = V8SImode;
45282 }
45283 else if (mode == V8DImode)
45284 {
45285 umul = gen_vec_widen_umult_even_v16si;
45286 nmode = V16SImode;
45287 }
45288 else
45289 gcc_unreachable ();
45290
45291
45292 /* Multiply low parts. */
45293 t1 = gen_reg_rtx (mode);
45294 emit_insn (umul (t1, gen_lowpart (nmode, op1), gen_lowpart (nmode, op2)));
45295
45296 /* Shift input vectors right 32 bits so we can multiply high parts. */
45297 t6 = GEN_INT (32);
45298 t2 = expand_binop (mode, lshr_optab, op1, t6, NULL, 1, OPTAB_DIRECT);
45299 t3 = expand_binop (mode, lshr_optab, op2, t6, NULL, 1, OPTAB_DIRECT);
45300
45301 /* Multiply high parts by low parts. */
45302 t4 = gen_reg_rtx (mode);
45303 t5 = gen_reg_rtx (mode);
45304 emit_insn (umul (t4, gen_lowpart (nmode, t2), gen_lowpart (nmode, op2)));
45305 emit_insn (umul (t5, gen_lowpart (nmode, t3), gen_lowpart (nmode, op1)));
45306
45307 /* Combine and shift the highparts back. */
45308 t4 = expand_binop (mode, add_optab, t4, t5, t4, 1, OPTAB_DIRECT);
45309 t4 = expand_binop (mode, ashl_optab, t4, t6, t4, 1, OPTAB_DIRECT);
45310
45311 /* Combine high and low parts. */
45312 force_expand_binop (mode, add_optab, t1, t4, op0, 1, OPTAB_DIRECT);
45313 }
45314
45315 set_unique_reg_note (get_last_insn (), REG_EQUAL,
45316 gen_rtx_MULT (mode, op1, op2));
45317 }
45318
45319 /* Calculate integer abs() using only SSE2 instructions. */
45320
45321 void
45322 ix86_expand_sse2_abs (rtx target, rtx input)
45323 {
45324 enum machine_mode mode = GET_MODE (target);
45325 rtx tmp0, tmp1, x;
45326
45327 switch (mode)
45328 {
45329 /* For 32-bit signed integer X, the best way to calculate the absolute
45330 value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)). */
45331 case V4SImode:
45332 tmp0 = expand_simple_binop (mode, ASHIFTRT, input,
45333 GEN_INT (GET_MODE_BITSIZE
45334 (GET_MODE_INNER (mode)) - 1),
45335 NULL, 0, OPTAB_DIRECT);
45336 tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
45337 NULL, 0, OPTAB_DIRECT);
45338 x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
45339 target, 0, OPTAB_DIRECT);
45340 break;
45341
45342 /* For 16-bit signed integer X, the best way to calculate the absolute
45343 value of X is max (X, -X), as SSE2 provides the PMAXSW insn. */
45344 case V8HImode:
45345 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
45346
45347 x = expand_simple_binop (mode, SMAX, tmp0, input,
45348 target, 0, OPTAB_DIRECT);
45349 break;
45350
45351 /* For 8-bit signed integer X, the best way to calculate the absolute
45352 value of X is min ((unsigned char) X, (unsigned char) (-X)),
45353 as SSE2 provides the PMINUB insn. */
45354 case V16QImode:
45355 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
45356
45357 x = expand_simple_binop (V16QImode, UMIN, tmp0, input,
45358 target, 0, OPTAB_DIRECT);
45359 break;
45360
45361 default:
45362 gcc_unreachable ();
45363 }
45364
45365 if (x != target)
45366 emit_move_insn (target, x);
45367 }
45368
45369 /* Expand an insert into a vector register through pinsr insn.
45370 Return true if successful. */
45371
45372 bool
45373 ix86_expand_pinsr (rtx *operands)
45374 {
45375 rtx dst = operands[0];
45376 rtx src = operands[3];
45377
45378 unsigned int size = INTVAL (operands[1]);
45379 unsigned int pos = INTVAL (operands[2]);
45380
45381 if (GET_CODE (dst) == SUBREG)
45382 {
45383 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
45384 dst = SUBREG_REG (dst);
45385 }
45386
45387 if (GET_CODE (src) == SUBREG)
45388 src = SUBREG_REG (src);
45389
45390 switch (GET_MODE (dst))
45391 {
45392 case V16QImode:
45393 case V8HImode:
45394 case V4SImode:
45395 case V2DImode:
45396 {
45397 enum machine_mode srcmode, dstmode;
45398 rtx (*pinsr)(rtx, rtx, rtx, rtx);
45399
45400 srcmode = mode_for_size (size, MODE_INT, 0);
45401
45402 switch (srcmode)
45403 {
45404 case QImode:
45405 if (!TARGET_SSE4_1)
45406 return false;
45407 dstmode = V16QImode;
45408 pinsr = gen_sse4_1_pinsrb;
45409 break;
45410
45411 case HImode:
45412 if (!TARGET_SSE2)
45413 return false;
45414 dstmode = V8HImode;
45415 pinsr = gen_sse2_pinsrw;
45416 break;
45417
45418 case SImode:
45419 if (!TARGET_SSE4_1)
45420 return false;
45421 dstmode = V4SImode;
45422 pinsr = gen_sse4_1_pinsrd;
45423 break;
45424
45425 case DImode:
45426 gcc_assert (TARGET_64BIT);
45427 if (!TARGET_SSE4_1)
45428 return false;
45429 dstmode = V2DImode;
45430 pinsr = gen_sse4_1_pinsrq;
45431 break;
45432
45433 default:
45434 return false;
45435 }
45436
45437 rtx d = dst;
45438 if (GET_MODE (dst) != dstmode)
45439 d = gen_reg_rtx (dstmode);
45440 src = gen_lowpart (srcmode, src);
45441
45442 pos /= size;
45443
45444 emit_insn (pinsr (d, gen_lowpart (dstmode, dst), src,
45445 GEN_INT (1 << pos)));
45446 if (d != dst)
45447 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
45448 return true;
45449 }
45450
45451 default:
45452 return false;
45453 }
45454 }
45455 \f
45456 /* This function returns the calling abi specific va_list type node.
45457 It returns the FNDECL specific va_list type. */
45458
45459 static tree
45460 ix86_fn_abi_va_list (tree fndecl)
45461 {
45462 if (!TARGET_64BIT)
45463 return va_list_type_node;
45464 gcc_assert (fndecl != NULL_TREE);
45465
45466 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
45467 return ms_va_list_type_node;
45468 else
45469 return sysv_va_list_type_node;
45470 }
45471
45472 /* Returns the canonical va_list type specified by TYPE. If there
45473 is no valid TYPE provided, it return NULL_TREE. */
45474
45475 static tree
45476 ix86_canonical_va_list_type (tree type)
45477 {
45478 tree wtype, htype;
45479
45480 /* Resolve references and pointers to va_list type. */
45481 if (TREE_CODE (type) == MEM_REF)
45482 type = TREE_TYPE (type);
45483 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
45484 type = TREE_TYPE (type);
45485 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
45486 type = TREE_TYPE (type);
45487
45488 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
45489 {
45490 wtype = va_list_type_node;
45491 gcc_assert (wtype != NULL_TREE);
45492 htype = type;
45493 if (TREE_CODE (wtype) == ARRAY_TYPE)
45494 {
45495 /* If va_list is an array type, the argument may have decayed
45496 to a pointer type, e.g. by being passed to another function.
45497 In that case, unwrap both types so that we can compare the
45498 underlying records. */
45499 if (TREE_CODE (htype) == ARRAY_TYPE
45500 || POINTER_TYPE_P (htype))
45501 {
45502 wtype = TREE_TYPE (wtype);
45503 htype = TREE_TYPE (htype);
45504 }
45505 }
45506 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
45507 return va_list_type_node;
45508 wtype = sysv_va_list_type_node;
45509 gcc_assert (wtype != NULL_TREE);
45510 htype = type;
45511 if (TREE_CODE (wtype) == ARRAY_TYPE)
45512 {
45513 /* If va_list is an array type, the argument may have decayed
45514 to a pointer type, e.g. by being passed to another function.
45515 In that case, unwrap both types so that we can compare the
45516 underlying records. */
45517 if (TREE_CODE (htype) == ARRAY_TYPE
45518 || POINTER_TYPE_P (htype))
45519 {
45520 wtype = TREE_TYPE (wtype);
45521 htype = TREE_TYPE (htype);
45522 }
45523 }
45524 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
45525 return sysv_va_list_type_node;
45526 wtype = ms_va_list_type_node;
45527 gcc_assert (wtype != NULL_TREE);
45528 htype = type;
45529 if (TREE_CODE (wtype) == ARRAY_TYPE)
45530 {
45531 /* If va_list is an array type, the argument may have decayed
45532 to a pointer type, e.g. by being passed to another function.
45533 In that case, unwrap both types so that we can compare the
45534 underlying records. */
45535 if (TREE_CODE (htype) == ARRAY_TYPE
45536 || POINTER_TYPE_P (htype))
45537 {
45538 wtype = TREE_TYPE (wtype);
45539 htype = TREE_TYPE (htype);
45540 }
45541 }
45542 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
45543 return ms_va_list_type_node;
45544 return NULL_TREE;
45545 }
45546 return std_canonical_va_list_type (type);
45547 }
45548
45549 /* Iterate through the target-specific builtin types for va_list.
45550 IDX denotes the iterator, *PTREE is set to the result type of
45551 the va_list builtin, and *PNAME to its internal type.
45552 Returns zero if there is no element for this index, otherwise
45553 IDX should be increased upon the next call.
45554 Note, do not iterate a base builtin's name like __builtin_va_list.
45555 Used from c_common_nodes_and_builtins. */
45556
45557 static int
45558 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
45559 {
45560 if (TARGET_64BIT)
45561 {
45562 switch (idx)
45563 {
45564 default:
45565 break;
45566
45567 case 0:
45568 *ptree = ms_va_list_type_node;
45569 *pname = "__builtin_ms_va_list";
45570 return 1;
45571
45572 case 1:
45573 *ptree = sysv_va_list_type_node;
45574 *pname = "__builtin_sysv_va_list";
45575 return 1;
45576 }
45577 }
45578
45579 return 0;
45580 }
45581
45582 #undef TARGET_SCHED_DISPATCH
45583 #define TARGET_SCHED_DISPATCH has_dispatch
45584 #undef TARGET_SCHED_DISPATCH_DO
45585 #define TARGET_SCHED_DISPATCH_DO do_dispatch
45586 #undef TARGET_SCHED_REASSOCIATION_WIDTH
45587 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
45588 #undef TARGET_SCHED_REORDER
45589 #define TARGET_SCHED_REORDER ix86_sched_reorder
45590 #undef TARGET_SCHED_ADJUST_PRIORITY
45591 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
45592 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
45593 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
45594 ix86_dependencies_evaluation_hook
45595
45596 /* The size of the dispatch window is the total number of bytes of
45597 object code allowed in a window. */
45598 #define DISPATCH_WINDOW_SIZE 16
45599
45600 /* Number of dispatch windows considered for scheduling. */
45601 #define MAX_DISPATCH_WINDOWS 3
45602
45603 /* Maximum number of instructions in a window. */
45604 #define MAX_INSN 4
45605
45606 /* Maximum number of immediate operands in a window. */
45607 #define MAX_IMM 4
45608
45609 /* Maximum number of immediate bits allowed in a window. */
45610 #define MAX_IMM_SIZE 128
45611
45612 /* Maximum number of 32 bit immediates allowed in a window. */
45613 #define MAX_IMM_32 4
45614
45615 /* Maximum number of 64 bit immediates allowed in a window. */
45616 #define MAX_IMM_64 2
45617
45618 /* Maximum total of loads or prefetches allowed in a window. */
45619 #define MAX_LOAD 2
45620
45621 /* Maximum total of stores allowed in a window. */
45622 #define MAX_STORE 1
45623
45624 #undef BIG
45625 #define BIG 100
45626
45627
45628 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
45629 enum dispatch_group {
45630 disp_no_group = 0,
45631 disp_load,
45632 disp_store,
45633 disp_load_store,
45634 disp_prefetch,
45635 disp_imm,
45636 disp_imm_32,
45637 disp_imm_64,
45638 disp_branch,
45639 disp_cmp,
45640 disp_jcc,
45641 disp_last
45642 };
45643
45644 /* Number of allowable groups in a dispatch window. It is an array
45645 indexed by dispatch_group enum. 100 is used as a big number,
45646 because the number of these kind of operations does not have any
45647 effect in dispatch window, but we need them for other reasons in
45648 the table. */
45649 static unsigned int num_allowable_groups[disp_last] = {
45650 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
45651 };
45652
45653 char group_name[disp_last + 1][16] = {
45654 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
45655 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
45656 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
45657 };
45658
45659 /* Instruction path. */
45660 enum insn_path {
45661 no_path = 0,
45662 path_single, /* Single micro op. */
45663 path_double, /* Double micro op. */
45664 path_multi, /* Instructions with more than 2 micro op.. */
45665 last_path
45666 };
45667
45668 /* sched_insn_info defines a window to the instructions scheduled in
45669 the basic block. It contains a pointer to the insn_info table and
45670 the instruction scheduled.
45671
45672 Windows are allocated for each basic block and are linked
45673 together. */
45674 typedef struct sched_insn_info_s {
45675 rtx insn;
45676 enum dispatch_group group;
45677 enum insn_path path;
45678 int byte_len;
45679 int imm_bytes;
45680 } sched_insn_info;
45681
45682 /* Linked list of dispatch windows. This is a two way list of
45683 dispatch windows of a basic block. It contains information about
45684 the number of uops in the window and the total number of
45685 instructions and of bytes in the object code for this dispatch
45686 window. */
45687 typedef struct dispatch_windows_s {
45688 int num_insn; /* Number of insn in the window. */
45689 int num_uops; /* Number of uops in the window. */
45690 int window_size; /* Number of bytes in the window. */
45691 int window_num; /* Window number between 0 or 1. */
45692 int num_imm; /* Number of immediates in an insn. */
45693 int num_imm_32; /* Number of 32 bit immediates in an insn. */
45694 int num_imm_64; /* Number of 64 bit immediates in an insn. */
45695 int imm_size; /* Total immediates in the window. */
45696 int num_loads; /* Total memory loads in the window. */
45697 int num_stores; /* Total memory stores in the window. */
45698 int violation; /* Violation exists in window. */
45699 sched_insn_info *window; /* Pointer to the window. */
45700 struct dispatch_windows_s *next;
45701 struct dispatch_windows_s *prev;
45702 } dispatch_windows;
45703
45704 /* Immediate valuse used in an insn. */
45705 typedef struct imm_info_s
45706 {
45707 int imm;
45708 int imm32;
45709 int imm64;
45710 } imm_info;
45711
45712 static dispatch_windows *dispatch_window_list;
45713 static dispatch_windows *dispatch_window_list1;
45714
45715 /* Get dispatch group of insn. */
45716
45717 static enum dispatch_group
45718 get_mem_group (rtx insn)
45719 {
45720 enum attr_memory memory;
45721
45722 if (INSN_CODE (insn) < 0)
45723 return disp_no_group;
45724 memory = get_attr_memory (insn);
45725 if (memory == MEMORY_STORE)
45726 return disp_store;
45727
45728 if (memory == MEMORY_LOAD)
45729 return disp_load;
45730
45731 if (memory == MEMORY_BOTH)
45732 return disp_load_store;
45733
45734 return disp_no_group;
45735 }
45736
45737 /* Return true if insn is a compare instruction. */
45738
45739 static bool
45740 is_cmp (rtx insn)
45741 {
45742 enum attr_type type;
45743
45744 type = get_attr_type (insn);
45745 return (type == TYPE_TEST
45746 || type == TYPE_ICMP
45747 || type == TYPE_FCMP
45748 || GET_CODE (PATTERN (insn)) == COMPARE);
45749 }
45750
45751 /* Return true if a dispatch violation encountered. */
45752
45753 static bool
45754 dispatch_violation (void)
45755 {
45756 if (dispatch_window_list->next)
45757 return dispatch_window_list->next->violation;
45758 return dispatch_window_list->violation;
45759 }
45760
45761 /* Return true if insn is a branch instruction. */
45762
45763 static bool
45764 is_branch (rtx insn)
45765 {
45766 return (CALL_P (insn) || JUMP_P (insn));
45767 }
45768
45769 /* Return true if insn is a prefetch instruction. */
45770
45771 static bool
45772 is_prefetch (rtx insn)
45773 {
45774 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
45775 }
45776
45777 /* This function initializes a dispatch window and the list container holding a
45778 pointer to the window. */
45779
45780 static void
45781 init_window (int window_num)
45782 {
45783 int i;
45784 dispatch_windows *new_list;
45785
45786 if (window_num == 0)
45787 new_list = dispatch_window_list;
45788 else
45789 new_list = dispatch_window_list1;
45790
45791 new_list->num_insn = 0;
45792 new_list->num_uops = 0;
45793 new_list->window_size = 0;
45794 new_list->next = NULL;
45795 new_list->prev = NULL;
45796 new_list->window_num = window_num;
45797 new_list->num_imm = 0;
45798 new_list->num_imm_32 = 0;
45799 new_list->num_imm_64 = 0;
45800 new_list->imm_size = 0;
45801 new_list->num_loads = 0;
45802 new_list->num_stores = 0;
45803 new_list->violation = false;
45804
45805 for (i = 0; i < MAX_INSN; i++)
45806 {
45807 new_list->window[i].insn = NULL;
45808 new_list->window[i].group = disp_no_group;
45809 new_list->window[i].path = no_path;
45810 new_list->window[i].byte_len = 0;
45811 new_list->window[i].imm_bytes = 0;
45812 }
45813 return;
45814 }
45815
45816 /* This function allocates and initializes a dispatch window and the
45817 list container holding a pointer to the window. */
45818
45819 static dispatch_windows *
45820 allocate_window (void)
45821 {
45822 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
45823 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
45824
45825 return new_list;
45826 }
45827
45828 /* This routine initializes the dispatch scheduling information. It
45829 initiates building dispatch scheduler tables and constructs the
45830 first dispatch window. */
45831
45832 static void
45833 init_dispatch_sched (void)
45834 {
45835 /* Allocate a dispatch list and a window. */
45836 dispatch_window_list = allocate_window ();
45837 dispatch_window_list1 = allocate_window ();
45838 init_window (0);
45839 init_window (1);
45840 }
45841
45842 /* This function returns true if a branch is detected. End of a basic block
45843 does not have to be a branch, but here we assume only branches end a
45844 window. */
45845
45846 static bool
45847 is_end_basic_block (enum dispatch_group group)
45848 {
45849 return group == disp_branch;
45850 }
45851
45852 /* This function is called when the end of a window processing is reached. */
45853
45854 static void
45855 process_end_window (void)
45856 {
45857 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
45858 if (dispatch_window_list->next)
45859 {
45860 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
45861 gcc_assert (dispatch_window_list->window_size
45862 + dispatch_window_list1->window_size <= 48);
45863 init_window (1);
45864 }
45865 init_window (0);
45866 }
45867
45868 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
45869 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
45870 for 48 bytes of instructions. Note that these windows are not dispatch
45871 windows that their sizes are DISPATCH_WINDOW_SIZE. */
45872
45873 static dispatch_windows *
45874 allocate_next_window (int window_num)
45875 {
45876 if (window_num == 0)
45877 {
45878 if (dispatch_window_list->next)
45879 init_window (1);
45880 init_window (0);
45881 return dispatch_window_list;
45882 }
45883
45884 dispatch_window_list->next = dispatch_window_list1;
45885 dispatch_window_list1->prev = dispatch_window_list;
45886
45887 return dispatch_window_list1;
45888 }
45889
45890 /* Increment the number of immediate operands of an instruction. */
45891
45892 static int
45893 find_constant_1 (rtx *in_rtx, imm_info *imm_values)
45894 {
45895 if (*in_rtx == 0)
45896 return 0;
45897
45898 switch ( GET_CODE (*in_rtx))
45899 {
45900 case CONST:
45901 case SYMBOL_REF:
45902 case CONST_INT:
45903 (imm_values->imm)++;
45904 if (x86_64_immediate_operand (*in_rtx, SImode))
45905 (imm_values->imm32)++;
45906 else
45907 (imm_values->imm64)++;
45908 break;
45909
45910 case CONST_DOUBLE:
45911 (imm_values->imm)++;
45912 (imm_values->imm64)++;
45913 break;
45914
45915 case CODE_LABEL:
45916 if (LABEL_KIND (*in_rtx) == LABEL_NORMAL)
45917 {
45918 (imm_values->imm)++;
45919 (imm_values->imm32)++;
45920 }
45921 break;
45922
45923 default:
45924 break;
45925 }
45926
45927 return 0;
45928 }
45929
45930 /* Compute number of immediate operands of an instruction. */
45931
45932 static void
45933 find_constant (rtx in_rtx, imm_info *imm_values)
45934 {
45935 for_each_rtx (INSN_P (in_rtx) ? &PATTERN (in_rtx) : &in_rtx,
45936 (rtx_function) find_constant_1, (void *) imm_values);
45937 }
45938
45939 /* Return total size of immediate operands of an instruction along with number
45940 of corresponding immediate-operands. It initializes its parameters to zero
45941 befor calling FIND_CONSTANT.
45942 INSN is the input instruction. IMM is the total of immediates.
45943 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
45944 bit immediates. */
45945
45946 static int
45947 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
45948 {
45949 imm_info imm_values = {0, 0, 0};
45950
45951 find_constant (insn, &imm_values);
45952 *imm = imm_values.imm;
45953 *imm32 = imm_values.imm32;
45954 *imm64 = imm_values.imm64;
45955 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
45956 }
45957
45958 /* This function indicates if an operand of an instruction is an
45959 immediate. */
45960
45961 static bool
45962 has_immediate (rtx insn)
45963 {
45964 int num_imm_operand;
45965 int num_imm32_operand;
45966 int num_imm64_operand;
45967
45968 if (insn)
45969 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
45970 &num_imm64_operand);
45971 return false;
45972 }
45973
45974 /* Return single or double path for instructions. */
45975
45976 static enum insn_path
45977 get_insn_path (rtx insn)
45978 {
45979 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
45980
45981 if ((int)path == 0)
45982 return path_single;
45983
45984 if ((int)path == 1)
45985 return path_double;
45986
45987 return path_multi;
45988 }
45989
45990 /* Return insn dispatch group. */
45991
45992 static enum dispatch_group
45993 get_insn_group (rtx insn)
45994 {
45995 enum dispatch_group group = get_mem_group (insn);
45996 if (group)
45997 return group;
45998
45999 if (is_branch (insn))
46000 return disp_branch;
46001
46002 if (is_cmp (insn))
46003 return disp_cmp;
46004
46005 if (has_immediate (insn))
46006 return disp_imm;
46007
46008 if (is_prefetch (insn))
46009 return disp_prefetch;
46010
46011 return disp_no_group;
46012 }
46013
46014 /* Count number of GROUP restricted instructions in a dispatch
46015 window WINDOW_LIST. */
46016
46017 static int
46018 count_num_restricted (rtx insn, dispatch_windows *window_list)
46019 {
46020 enum dispatch_group group = get_insn_group (insn);
46021 int imm_size;
46022 int num_imm_operand;
46023 int num_imm32_operand;
46024 int num_imm64_operand;
46025
46026 if (group == disp_no_group)
46027 return 0;
46028
46029 if (group == disp_imm)
46030 {
46031 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
46032 &num_imm64_operand);
46033 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
46034 || num_imm_operand + window_list->num_imm > MAX_IMM
46035 || (num_imm32_operand > 0
46036 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
46037 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
46038 || (num_imm64_operand > 0
46039 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
46040 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
46041 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
46042 && num_imm64_operand > 0
46043 && ((window_list->num_imm_64 > 0
46044 && window_list->num_insn >= 2)
46045 || window_list->num_insn >= 3)))
46046 return BIG;
46047
46048 return 1;
46049 }
46050
46051 if ((group == disp_load_store
46052 && (window_list->num_loads >= MAX_LOAD
46053 || window_list->num_stores >= MAX_STORE))
46054 || ((group == disp_load
46055 || group == disp_prefetch)
46056 && window_list->num_loads >= MAX_LOAD)
46057 || (group == disp_store
46058 && window_list->num_stores >= MAX_STORE))
46059 return BIG;
46060
46061 return 1;
46062 }
46063
46064 /* This function returns true if insn satisfies dispatch rules on the
46065 last window scheduled. */
46066
46067 static bool
46068 fits_dispatch_window (rtx insn)
46069 {
46070 dispatch_windows *window_list = dispatch_window_list;
46071 dispatch_windows *window_list_next = dispatch_window_list->next;
46072 unsigned int num_restrict;
46073 enum dispatch_group group = get_insn_group (insn);
46074 enum insn_path path = get_insn_path (insn);
46075 int sum;
46076
46077 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
46078 instructions should be given the lowest priority in the
46079 scheduling process in Haifa scheduler to make sure they will be
46080 scheduled in the same dispatch window as the reference to them. */
46081 if (group == disp_jcc || group == disp_cmp)
46082 return false;
46083
46084 /* Check nonrestricted. */
46085 if (group == disp_no_group || group == disp_branch)
46086 return true;
46087
46088 /* Get last dispatch window. */
46089 if (window_list_next)
46090 window_list = window_list_next;
46091
46092 if (window_list->window_num == 1)
46093 {
46094 sum = window_list->prev->window_size + window_list->window_size;
46095
46096 if (sum == 32
46097 || (min_insn_size (insn) + sum) >= 48)
46098 /* Window 1 is full. Go for next window. */
46099 return true;
46100 }
46101
46102 num_restrict = count_num_restricted (insn, window_list);
46103
46104 if (num_restrict > num_allowable_groups[group])
46105 return false;
46106
46107 /* See if it fits in the first window. */
46108 if (window_list->window_num == 0)
46109 {
46110 /* The first widow should have only single and double path
46111 uops. */
46112 if (path == path_double
46113 && (window_list->num_uops + 2) > MAX_INSN)
46114 return false;
46115 else if (path != path_single)
46116 return false;
46117 }
46118 return true;
46119 }
46120
46121 /* Add an instruction INSN with NUM_UOPS micro-operations to the
46122 dispatch window WINDOW_LIST. */
46123
46124 static void
46125 add_insn_window (rtx insn, dispatch_windows *window_list, int num_uops)
46126 {
46127 int byte_len = min_insn_size (insn);
46128 int num_insn = window_list->num_insn;
46129 int imm_size;
46130 sched_insn_info *window = window_list->window;
46131 enum dispatch_group group = get_insn_group (insn);
46132 enum insn_path path = get_insn_path (insn);
46133 int num_imm_operand;
46134 int num_imm32_operand;
46135 int num_imm64_operand;
46136
46137 if (!window_list->violation && group != disp_cmp
46138 && !fits_dispatch_window (insn))
46139 window_list->violation = true;
46140
46141 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
46142 &num_imm64_operand);
46143
46144 /* Initialize window with new instruction. */
46145 window[num_insn].insn = insn;
46146 window[num_insn].byte_len = byte_len;
46147 window[num_insn].group = group;
46148 window[num_insn].path = path;
46149 window[num_insn].imm_bytes = imm_size;
46150
46151 window_list->window_size += byte_len;
46152 window_list->num_insn = num_insn + 1;
46153 window_list->num_uops = window_list->num_uops + num_uops;
46154 window_list->imm_size += imm_size;
46155 window_list->num_imm += num_imm_operand;
46156 window_list->num_imm_32 += num_imm32_operand;
46157 window_list->num_imm_64 += num_imm64_operand;
46158
46159 if (group == disp_store)
46160 window_list->num_stores += 1;
46161 else if (group == disp_load
46162 || group == disp_prefetch)
46163 window_list->num_loads += 1;
46164 else if (group == disp_load_store)
46165 {
46166 window_list->num_stores += 1;
46167 window_list->num_loads += 1;
46168 }
46169 }
46170
46171 /* Adds a scheduled instruction, INSN, to the current dispatch window.
46172 If the total bytes of instructions or the number of instructions in
46173 the window exceed allowable, it allocates a new window. */
46174
46175 static void
46176 add_to_dispatch_window (rtx insn)
46177 {
46178 int byte_len;
46179 dispatch_windows *window_list;
46180 dispatch_windows *next_list;
46181 dispatch_windows *window0_list;
46182 enum insn_path path;
46183 enum dispatch_group insn_group;
46184 bool insn_fits;
46185 int num_insn;
46186 int num_uops;
46187 int window_num;
46188 int insn_num_uops;
46189 int sum;
46190
46191 if (INSN_CODE (insn) < 0)
46192 return;
46193
46194 byte_len = min_insn_size (insn);
46195 window_list = dispatch_window_list;
46196 next_list = window_list->next;
46197 path = get_insn_path (insn);
46198 insn_group = get_insn_group (insn);
46199
46200 /* Get the last dispatch window. */
46201 if (next_list)
46202 window_list = dispatch_window_list->next;
46203
46204 if (path == path_single)
46205 insn_num_uops = 1;
46206 else if (path == path_double)
46207 insn_num_uops = 2;
46208 else
46209 insn_num_uops = (int) path;
46210
46211 /* If current window is full, get a new window.
46212 Window number zero is full, if MAX_INSN uops are scheduled in it.
46213 Window number one is full, if window zero's bytes plus window
46214 one's bytes is 32, or if the bytes of the new instruction added
46215 to the total makes it greater than 48, or it has already MAX_INSN
46216 instructions in it. */
46217 num_insn = window_list->num_insn;
46218 num_uops = window_list->num_uops;
46219 window_num = window_list->window_num;
46220 insn_fits = fits_dispatch_window (insn);
46221
46222 if (num_insn >= MAX_INSN
46223 || num_uops + insn_num_uops > MAX_INSN
46224 || !(insn_fits))
46225 {
46226 window_num = ~window_num & 1;
46227 window_list = allocate_next_window (window_num);
46228 }
46229
46230 if (window_num == 0)
46231 {
46232 add_insn_window (insn, window_list, insn_num_uops);
46233 if (window_list->num_insn >= MAX_INSN
46234 && insn_group == disp_branch)
46235 {
46236 process_end_window ();
46237 return;
46238 }
46239 }
46240 else if (window_num == 1)
46241 {
46242 window0_list = window_list->prev;
46243 sum = window0_list->window_size + window_list->window_size;
46244 if (sum == 32
46245 || (byte_len + sum) >= 48)
46246 {
46247 process_end_window ();
46248 window_list = dispatch_window_list;
46249 }
46250
46251 add_insn_window (insn, window_list, insn_num_uops);
46252 }
46253 else
46254 gcc_unreachable ();
46255
46256 if (is_end_basic_block (insn_group))
46257 {
46258 /* End of basic block is reached do end-basic-block process. */
46259 process_end_window ();
46260 return;
46261 }
46262 }
46263
46264 /* Print the dispatch window, WINDOW_NUM, to FILE. */
46265
46266 DEBUG_FUNCTION static void
46267 debug_dispatch_window_file (FILE *file, int window_num)
46268 {
46269 dispatch_windows *list;
46270 int i;
46271
46272 if (window_num == 0)
46273 list = dispatch_window_list;
46274 else
46275 list = dispatch_window_list1;
46276
46277 fprintf (file, "Window #%d:\n", list->window_num);
46278 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
46279 list->num_insn, list->num_uops, list->window_size);
46280 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
46281 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
46282
46283 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
46284 list->num_stores);
46285 fprintf (file, " insn info:\n");
46286
46287 for (i = 0; i < MAX_INSN; i++)
46288 {
46289 if (!list->window[i].insn)
46290 break;
46291 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
46292 i, group_name[list->window[i].group],
46293 i, (void *)list->window[i].insn,
46294 i, list->window[i].path,
46295 i, list->window[i].byte_len,
46296 i, list->window[i].imm_bytes);
46297 }
46298 }
46299
46300 /* Print to stdout a dispatch window. */
46301
46302 DEBUG_FUNCTION void
46303 debug_dispatch_window (int window_num)
46304 {
46305 debug_dispatch_window_file (stdout, window_num);
46306 }
46307
46308 /* Print INSN dispatch information to FILE. */
46309
46310 DEBUG_FUNCTION static void
46311 debug_insn_dispatch_info_file (FILE *file, rtx insn)
46312 {
46313 int byte_len;
46314 enum insn_path path;
46315 enum dispatch_group group;
46316 int imm_size;
46317 int num_imm_operand;
46318 int num_imm32_operand;
46319 int num_imm64_operand;
46320
46321 if (INSN_CODE (insn) < 0)
46322 return;
46323
46324 byte_len = min_insn_size (insn);
46325 path = get_insn_path (insn);
46326 group = get_insn_group (insn);
46327 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
46328 &num_imm64_operand);
46329
46330 fprintf (file, " insn info:\n");
46331 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
46332 group_name[group], path, byte_len);
46333 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
46334 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
46335 }
46336
46337 /* Print to STDERR the status of the ready list with respect to
46338 dispatch windows. */
46339
46340 DEBUG_FUNCTION void
46341 debug_ready_dispatch (void)
46342 {
46343 int i;
46344 int no_ready = number_in_ready ();
46345
46346 fprintf (stdout, "Number of ready: %d\n", no_ready);
46347
46348 for (i = 0; i < no_ready; i++)
46349 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
46350 }
46351
46352 /* This routine is the driver of the dispatch scheduler. */
46353
46354 static void
46355 do_dispatch (rtx insn, int mode)
46356 {
46357 if (mode == DISPATCH_INIT)
46358 init_dispatch_sched ();
46359 else if (mode == ADD_TO_DISPATCH_WINDOW)
46360 add_to_dispatch_window (insn);
46361 }
46362
46363 /* Return TRUE if Dispatch Scheduling is supported. */
46364
46365 static bool
46366 has_dispatch (rtx insn, int action)
46367 {
46368 if ((TARGET_BDVER1 || TARGET_BDVER2 || TARGET_BDVER3 || TARGET_BDVER4)
46369 && flag_dispatch_scheduler)
46370 switch (action)
46371 {
46372 default:
46373 return false;
46374
46375 case IS_DISPATCH_ON:
46376 return true;
46377 break;
46378
46379 case IS_CMP:
46380 return is_cmp (insn);
46381
46382 case DISPATCH_VIOLATION:
46383 return dispatch_violation ();
46384
46385 case FITS_DISPATCH_WINDOW:
46386 return fits_dispatch_window (insn);
46387 }
46388
46389 return false;
46390 }
46391
46392 /* Implementation of reassociation_width target hook used by
46393 reassoc phase to identify parallelism level in reassociated
46394 tree. Statements tree_code is passed in OPC. Arguments type
46395 is passed in MODE.
46396
46397 Currently parallel reassociation is enabled for Atom
46398 processors only and we set reassociation width to be 2
46399 because Atom may issue up to 2 instructions per cycle.
46400
46401 Return value should be fixed if parallel reassociation is
46402 enabled for other processors. */
46403
46404 static int
46405 ix86_reassociation_width (unsigned int, enum machine_mode mode)
46406 {
46407 int res = 1;
46408
46409 /* Vector part. */
46410 if (VECTOR_MODE_P (mode))
46411 {
46412 if (TARGET_VECTOR_PARALLEL_EXECUTION)
46413 return 2;
46414 else
46415 return 1;
46416 }
46417
46418 /* Scalar part. */
46419 if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL)
46420 res = 2;
46421 else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL)
46422 res = 2;
46423
46424 return res;
46425 }
46426
46427 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
46428 place emms and femms instructions. */
46429
46430 static enum machine_mode
46431 ix86_preferred_simd_mode (enum machine_mode mode)
46432 {
46433 if (!TARGET_SSE)
46434 return word_mode;
46435
46436 switch (mode)
46437 {
46438 case QImode:
46439 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V32QImode : V16QImode;
46440 case HImode:
46441 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V16HImode : V8HImode;
46442 case SImode:
46443 return TARGET_AVX512F ? V16SImode :
46444 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode;
46445 case DImode:
46446 return TARGET_AVX512F ? V8DImode :
46447 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V4DImode : V2DImode;
46448
46449 case SFmode:
46450 if (TARGET_AVX512F)
46451 return V16SFmode;
46452 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
46453 return V8SFmode;
46454 else
46455 return V4SFmode;
46456
46457 case DFmode:
46458 if (!TARGET_VECTORIZE_DOUBLE)
46459 return word_mode;
46460 else if (TARGET_AVX512F)
46461 return V8DFmode;
46462 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
46463 return V4DFmode;
46464 else if (TARGET_SSE2)
46465 return V2DFmode;
46466 /* FALLTHRU */
46467
46468 default:
46469 return word_mode;
46470 }
46471 }
46472
46473 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
46474 vectors. If AVX512F is enabled then try vectorizing with 512bit,
46475 256bit and 128bit vectors. */
46476
46477 static unsigned int
46478 ix86_autovectorize_vector_sizes (void)
46479 {
46480 return TARGET_AVX512F ? 64 | 32 | 16 :
46481 (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
46482 }
46483
46484 \f
46485
46486 /* Return class of registers which could be used for pseudo of MODE
46487 and of class RCLASS for spilling instead of memory. Return NO_REGS
46488 if it is not possible or non-profitable. */
46489 static reg_class_t
46490 ix86_spill_class (reg_class_t rclass, enum machine_mode mode)
46491 {
46492 if (TARGET_SSE && TARGET_GENERAL_REGS_SSE_SPILL && ! TARGET_MMX
46493 && (mode == SImode || (TARGET_64BIT && mode == DImode))
46494 && rclass != NO_REGS && INTEGER_CLASS_P (rclass))
46495 return ALL_SSE_REGS;
46496 return NO_REGS;
46497 }
46498
46499 /* Implement targetm.vectorize.init_cost. */
46500
46501 static void *
46502 ix86_init_cost (struct loop *)
46503 {
46504 unsigned *cost = XNEWVEC (unsigned, 3);
46505 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
46506 return cost;
46507 }
46508
46509 /* Implement targetm.vectorize.add_stmt_cost. */
46510
46511 static unsigned
46512 ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
46513 struct _stmt_vec_info *stmt_info, int misalign,
46514 enum vect_cost_model_location where)
46515 {
46516 unsigned *cost = (unsigned *) data;
46517 unsigned retval = 0;
46518
46519 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
46520 int stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
46521
46522 /* Statements in an inner loop relative to the loop being
46523 vectorized are weighted more heavily. The value here is
46524 arbitrary and could potentially be improved with analysis. */
46525 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
46526 count *= 50; /* FIXME. */
46527
46528 retval = (unsigned) (count * stmt_cost);
46529
46530 /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
46531 for Silvermont as it has out of order integer pipeline and can execute
46532 2 scalar instruction per tick, but has in order SIMD pipeline. */
46533 if (TARGET_SILVERMONT || TARGET_INTEL)
46534 if (stmt_info && stmt_info->stmt)
46535 {
46536 tree lhs_op = gimple_get_lhs (stmt_info->stmt);
46537 if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
46538 retval = (retval * 17) / 10;
46539 }
46540
46541 cost[where] += retval;
46542
46543 return retval;
46544 }
46545
46546 /* Implement targetm.vectorize.finish_cost. */
46547
46548 static void
46549 ix86_finish_cost (void *data, unsigned *prologue_cost,
46550 unsigned *body_cost, unsigned *epilogue_cost)
46551 {
46552 unsigned *cost = (unsigned *) data;
46553 *prologue_cost = cost[vect_prologue];
46554 *body_cost = cost[vect_body];
46555 *epilogue_cost = cost[vect_epilogue];
46556 }
46557
46558 /* Implement targetm.vectorize.destroy_cost_data. */
46559
46560 static void
46561 ix86_destroy_cost_data (void *data)
46562 {
46563 free (data);
46564 }
46565
46566 /* Validate target specific memory model bits in VAL. */
46567
46568 static unsigned HOST_WIDE_INT
46569 ix86_memmodel_check (unsigned HOST_WIDE_INT val)
46570 {
46571 unsigned HOST_WIDE_INT model = val & MEMMODEL_MASK;
46572 bool strong;
46573
46574 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
46575 |MEMMODEL_MASK)
46576 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
46577 {
46578 warning (OPT_Winvalid_memory_model,
46579 "Unknown architecture specific memory model");
46580 return MEMMODEL_SEQ_CST;
46581 }
46582 strong = (model == MEMMODEL_ACQ_REL || model == MEMMODEL_SEQ_CST);
46583 if (val & IX86_HLE_ACQUIRE && !(model == MEMMODEL_ACQUIRE || strong))
46584 {
46585 warning (OPT_Winvalid_memory_model,
46586 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
46587 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
46588 }
46589 if (val & IX86_HLE_RELEASE && !(model == MEMMODEL_RELEASE || strong))
46590 {
46591 warning (OPT_Winvalid_memory_model,
46592 "HLE_RELEASE not used with RELEASE or stronger memory model");
46593 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
46594 }
46595 return val;
46596 }
46597
46598 /* Set CLONEI->vecsize_mangle, CLONEI->vecsize_int,
46599 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
46600 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
46601 or number of vecsize_mangle variants that should be emitted. */
46602
46603 static int
46604 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
46605 struct cgraph_simd_clone *clonei,
46606 tree base_type, int num)
46607 {
46608 int ret = 1;
46609
46610 if (clonei->simdlen
46611 && (clonei->simdlen < 2
46612 || clonei->simdlen > 16
46613 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
46614 {
46615 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
46616 "unsupported simdlen %d", clonei->simdlen);
46617 return 0;
46618 }
46619
46620 tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
46621 if (TREE_CODE (ret_type) != VOID_TYPE)
46622 switch (TYPE_MODE (ret_type))
46623 {
46624 case QImode:
46625 case HImode:
46626 case SImode:
46627 case DImode:
46628 case SFmode:
46629 case DFmode:
46630 /* case SCmode: */
46631 /* case DCmode: */
46632 break;
46633 default:
46634 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
46635 "unsupported return type %qT for simd\n", ret_type);
46636 return 0;
46637 }
46638
46639 tree t;
46640 int i;
46641
46642 for (t = DECL_ARGUMENTS (node->decl), i = 0; t; t = DECL_CHAIN (t), i++)
46643 /* FIXME: Shouldn't we allow such arguments if they are uniform? */
46644 switch (TYPE_MODE (TREE_TYPE (t)))
46645 {
46646 case QImode:
46647 case HImode:
46648 case SImode:
46649 case DImode:
46650 case SFmode:
46651 case DFmode:
46652 /* case SCmode: */
46653 /* case DCmode: */
46654 break;
46655 default:
46656 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
46657 "unsupported argument type %qT for simd\n", TREE_TYPE (t));
46658 return 0;
46659 }
46660
46661 if (clonei->cilk_elemental)
46662 {
46663 /* Parse here processor clause. If not present, default to 'b'. */
46664 clonei->vecsize_mangle = 'b';
46665 }
46666 else if (!TREE_PUBLIC (node->decl))
46667 {
46668 /* If the function isn't exported, we can pick up just one ISA
46669 for the clones. */
46670 if (TARGET_AVX2)
46671 clonei->vecsize_mangle = 'd';
46672 else if (TARGET_AVX)
46673 clonei->vecsize_mangle = 'c';
46674 else
46675 clonei->vecsize_mangle = 'b';
46676 ret = 1;
46677 }
46678 else
46679 {
46680 clonei->vecsize_mangle = "bcd"[num];
46681 ret = 3;
46682 }
46683 switch (clonei->vecsize_mangle)
46684 {
46685 case 'b':
46686 clonei->vecsize_int = 128;
46687 clonei->vecsize_float = 128;
46688 break;
46689 case 'c':
46690 clonei->vecsize_int = 128;
46691 clonei->vecsize_float = 256;
46692 break;
46693 case 'd':
46694 clonei->vecsize_int = 256;
46695 clonei->vecsize_float = 256;
46696 break;
46697 }
46698 if (clonei->simdlen == 0)
46699 {
46700 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
46701 clonei->simdlen = clonei->vecsize_int;
46702 else
46703 clonei->simdlen = clonei->vecsize_float;
46704 clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
46705 if (clonei->simdlen > 16)
46706 clonei->simdlen = 16;
46707 }
46708 return ret;
46709 }
46710
46711 /* Add target attribute to SIMD clone NODE if needed. */
46712
46713 static void
46714 ix86_simd_clone_adjust (struct cgraph_node *node)
46715 {
46716 const char *str = NULL;
46717 gcc_assert (node->decl == cfun->decl);
46718 switch (node->simdclone->vecsize_mangle)
46719 {
46720 case 'b':
46721 if (!TARGET_SSE2)
46722 str = "sse2";
46723 break;
46724 case 'c':
46725 if (!TARGET_AVX)
46726 str = "avx";
46727 break;
46728 case 'd':
46729 if (!TARGET_AVX2)
46730 str = "avx2";
46731 break;
46732 default:
46733 gcc_unreachable ();
46734 }
46735 if (str == NULL)
46736 return;
46737 push_cfun (NULL);
46738 tree args = build_tree_list (NULL_TREE, build_string (strlen (str), str));
46739 bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0);
46740 gcc_assert (ok);
46741 pop_cfun ();
46742 ix86_previous_fndecl = NULL_TREE;
46743 ix86_set_current_function (node->decl);
46744 }
46745
46746 /* If SIMD clone NODE can't be used in a vectorized loop
46747 in current function, return -1, otherwise return a badness of using it
46748 (0 if it is most desirable from vecsize_mangle point of view, 1
46749 slightly less desirable, etc.). */
46750
46751 static int
46752 ix86_simd_clone_usable (struct cgraph_node *node)
46753 {
46754 switch (node->simdclone->vecsize_mangle)
46755 {
46756 case 'b':
46757 if (!TARGET_SSE2)
46758 return -1;
46759 if (!TARGET_AVX)
46760 return 0;
46761 return TARGET_AVX2 ? 2 : 1;
46762 case 'c':
46763 if (!TARGET_AVX)
46764 return -1;
46765 return TARGET_AVX2 ? 1 : 0;
46766 break;
46767 case 'd':
46768 if (!TARGET_AVX2)
46769 return -1;
46770 return 0;
46771 default:
46772 gcc_unreachable ();
46773 }
46774 }
46775
46776 /* This function gives out the number of memory references.
46777 This value determines the unrolling factor for
46778 bdver3 and bdver4 architectures. */
46779
46780 static int
46781 ix86_loop_memcount (rtx *x, unsigned *mem_count)
46782 {
46783 if (*x != NULL_RTX && MEM_P (*x))
46784 {
46785 enum machine_mode mode;
46786 unsigned int n_words;
46787
46788 mode = GET_MODE (*x);
46789 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
46790
46791 if (n_words > 4)
46792 (*mem_count)+=2;
46793 else
46794 (*mem_count)+=1;
46795 }
46796 return 0;
46797 }
46798
46799 /* This function adjusts the unroll factor based on
46800 the hardware capabilities. For ex, bdver3 has
46801 a loop buffer which makes unrolling of smaller
46802 loops less important. This function decides the
46803 unroll factor using number of memory references
46804 (value 32 is used) as a heuristic. */
46805
46806 static unsigned
46807 ix86_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
46808 {
46809 basic_block *bbs;
46810 rtx insn;
46811 unsigned i;
46812 unsigned mem_count = 0;
46813
46814 if (!TARGET_ADJUST_UNROLL)
46815 return nunroll;
46816
46817 /* Count the number of memory references within the loop body. */
46818 bbs = get_loop_body (loop);
46819 for (i = 0; i < loop->num_nodes; i++)
46820 {
46821 for (insn = BB_HEAD (bbs[i]); insn != BB_END (bbs[i]); insn = NEXT_INSN (insn))
46822 if (NONDEBUG_INSN_P (insn))
46823 for_each_rtx (&insn, (rtx_function) ix86_loop_memcount, &mem_count);
46824 }
46825 free (bbs);
46826
46827 if (mem_count && mem_count <=32)
46828 return 32/mem_count;
46829
46830 return nunroll;
46831 }
46832
46833
46834 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
46835
46836 static bool
46837 ix86_float_exceptions_rounding_supported_p (void)
46838 {
46839 /* For x87 floating point with standard excess precision handling,
46840 there is no adddf3 pattern (since x87 floating point only has
46841 XFmode operations) so the default hook implementation gets this
46842 wrong. */
46843 return TARGET_80387 || TARGET_SSE_MATH;
46844 }
46845
46846 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
46847
46848 static void
46849 ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
46850 {
46851 if (!TARGET_80387 && !TARGET_SSE_MATH)
46852 return;
46853 tree exceptions_var = create_tmp_var (integer_type_node, NULL);
46854 if (TARGET_80387)
46855 {
46856 tree fenv_index_type = build_index_type (size_int (6));
46857 tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
46858 tree fenv_var = create_tmp_var (fenv_type, NULL);
46859 mark_addressable (fenv_var);
46860 tree fenv_ptr = build_pointer_type (fenv_type);
46861 tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
46862 fenv_addr = fold_convert (ptr_type_node, fenv_addr);
46863 tree fnstenv = ix86_builtins[IX86_BUILTIN_FNSTENV];
46864 tree fldenv = ix86_builtins[IX86_BUILTIN_FLDENV];
46865 tree fnstsw = ix86_builtins[IX86_BUILTIN_FNSTSW];
46866 tree fnclex = ix86_builtins[IX86_BUILTIN_FNCLEX];
46867 tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
46868 tree hold_fnclex = build_call_expr (fnclex, 0);
46869 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_fnstenv,
46870 hold_fnclex);
46871 *clear = build_call_expr (fnclex, 0);
46872 tree sw_var = create_tmp_var (short_unsigned_type_node, NULL);
46873 tree fnstsw_call = build_call_expr (fnstsw, 0);
46874 tree sw_mod = build2 (MODIFY_EXPR, short_unsigned_type_node,
46875 sw_var, fnstsw_call);
46876 tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
46877 tree update_mod = build2 (MODIFY_EXPR, integer_type_node,
46878 exceptions_var, exceptions_x87);
46879 *update = build2 (COMPOUND_EXPR, integer_type_node,
46880 sw_mod, update_mod);
46881 tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
46882 *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
46883 }
46884 if (TARGET_SSE_MATH)
46885 {
46886 tree mxcsr_orig_var = create_tmp_var (unsigned_type_node, NULL);
46887 tree mxcsr_mod_var = create_tmp_var (unsigned_type_node, NULL);
46888 tree stmxcsr = ix86_builtins[IX86_BUILTIN_STMXCSR];
46889 tree ldmxcsr = ix86_builtins[IX86_BUILTIN_LDMXCSR];
46890 tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
46891 tree hold_assign_orig = build2 (MODIFY_EXPR, unsigned_type_node,
46892 mxcsr_orig_var, stmxcsr_hold_call);
46893 tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
46894 mxcsr_orig_var,
46895 build_int_cst (unsigned_type_node, 0x1f80));
46896 hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
46897 build_int_cst (unsigned_type_node, 0xffffffc0));
46898 tree hold_assign_mod = build2 (MODIFY_EXPR, unsigned_type_node,
46899 mxcsr_mod_var, hold_mod_val);
46900 tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
46901 tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
46902 hold_assign_orig, hold_assign_mod);
46903 hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
46904 ldmxcsr_hold_call);
46905 if (*hold)
46906 *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
46907 else
46908 *hold = hold_all;
46909 tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
46910 if (*clear)
46911 *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
46912 ldmxcsr_clear_call);
46913 else
46914 *clear = ldmxcsr_clear_call;
46915 tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
46916 tree exceptions_sse = fold_convert (integer_type_node,
46917 stxmcsr_update_call);
46918 if (*update)
46919 {
46920 tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
46921 exceptions_var, exceptions_sse);
46922 tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
46923 exceptions_var, exceptions_mod);
46924 *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
46925 exceptions_assign);
46926 }
46927 else
46928 *update = build2 (MODIFY_EXPR, integer_type_node,
46929 exceptions_var, exceptions_sse);
46930 tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
46931 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
46932 ldmxcsr_update_call);
46933 }
46934 tree atomic_feraiseexcept
46935 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
46936 tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
46937 1, exceptions_var);
46938 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
46939 atomic_feraiseexcept_call);
46940 }
46941
46942 /* Initialize the GCC target structure. */
46943 #undef TARGET_RETURN_IN_MEMORY
46944 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
46945
46946 #undef TARGET_LEGITIMIZE_ADDRESS
46947 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
46948
46949 #undef TARGET_ATTRIBUTE_TABLE
46950 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
46951 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
46952 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
46953 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
46954 # undef TARGET_MERGE_DECL_ATTRIBUTES
46955 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
46956 #endif
46957
46958 #undef TARGET_COMP_TYPE_ATTRIBUTES
46959 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
46960
46961 #undef TARGET_INIT_BUILTINS
46962 #define TARGET_INIT_BUILTINS ix86_init_builtins
46963 #undef TARGET_BUILTIN_DECL
46964 #define TARGET_BUILTIN_DECL ix86_builtin_decl
46965 #undef TARGET_EXPAND_BUILTIN
46966 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
46967
46968 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
46969 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
46970 ix86_builtin_vectorized_function
46971
46972 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
46973 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
46974
46975 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
46976 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
46977
46978 #undef TARGET_VECTORIZE_BUILTIN_GATHER
46979 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
46980
46981 #undef TARGET_BUILTIN_RECIPROCAL
46982 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
46983
46984 #undef TARGET_ASM_FUNCTION_EPILOGUE
46985 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
46986
46987 #undef TARGET_ENCODE_SECTION_INFO
46988 #ifndef SUBTARGET_ENCODE_SECTION_INFO
46989 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
46990 #else
46991 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
46992 #endif
46993
46994 #undef TARGET_ASM_OPEN_PAREN
46995 #define TARGET_ASM_OPEN_PAREN ""
46996 #undef TARGET_ASM_CLOSE_PAREN
46997 #define TARGET_ASM_CLOSE_PAREN ""
46998
46999 #undef TARGET_ASM_BYTE_OP
47000 #define TARGET_ASM_BYTE_OP ASM_BYTE
47001
47002 #undef TARGET_ASM_ALIGNED_HI_OP
47003 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
47004 #undef TARGET_ASM_ALIGNED_SI_OP
47005 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
47006 #ifdef ASM_QUAD
47007 #undef TARGET_ASM_ALIGNED_DI_OP
47008 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
47009 #endif
47010
47011 #undef TARGET_PROFILE_BEFORE_PROLOGUE
47012 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
47013
47014 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
47015 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
47016
47017 #undef TARGET_ASM_UNALIGNED_HI_OP
47018 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
47019 #undef TARGET_ASM_UNALIGNED_SI_OP
47020 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
47021 #undef TARGET_ASM_UNALIGNED_DI_OP
47022 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
47023
47024 #undef TARGET_PRINT_OPERAND
47025 #define TARGET_PRINT_OPERAND ix86_print_operand
47026 #undef TARGET_PRINT_OPERAND_ADDRESS
47027 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
47028 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
47029 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
47030 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
47031 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
47032
47033 #undef TARGET_SCHED_INIT_GLOBAL
47034 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
47035 #undef TARGET_SCHED_ADJUST_COST
47036 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
47037 #undef TARGET_SCHED_ISSUE_RATE
47038 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
47039 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
47040 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
47041 ia32_multipass_dfa_lookahead
47042 #undef TARGET_SCHED_MACRO_FUSION_P
47043 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
47044 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
47045 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
47046
47047 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
47048 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
47049
47050 #undef TARGET_MEMMODEL_CHECK
47051 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
47052
47053 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
47054 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
47055
47056 #ifdef HAVE_AS_TLS
47057 #undef TARGET_HAVE_TLS
47058 #define TARGET_HAVE_TLS true
47059 #endif
47060 #undef TARGET_CANNOT_FORCE_CONST_MEM
47061 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
47062 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
47063 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
47064
47065 #undef TARGET_DELEGITIMIZE_ADDRESS
47066 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
47067
47068 #undef TARGET_MS_BITFIELD_LAYOUT_P
47069 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
47070
47071 #if TARGET_MACHO
47072 #undef TARGET_BINDS_LOCAL_P
47073 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
47074 #endif
47075 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
47076 #undef TARGET_BINDS_LOCAL_P
47077 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
47078 #endif
47079
47080 #undef TARGET_ASM_OUTPUT_MI_THUNK
47081 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
47082 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
47083 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
47084
47085 #undef TARGET_ASM_FILE_START
47086 #define TARGET_ASM_FILE_START x86_file_start
47087
47088 #undef TARGET_OPTION_OVERRIDE
47089 #define TARGET_OPTION_OVERRIDE ix86_option_override
47090
47091 #undef TARGET_REGISTER_MOVE_COST
47092 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
47093 #undef TARGET_MEMORY_MOVE_COST
47094 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
47095 #undef TARGET_RTX_COSTS
47096 #define TARGET_RTX_COSTS ix86_rtx_costs
47097 #undef TARGET_ADDRESS_COST
47098 #define TARGET_ADDRESS_COST ix86_address_cost
47099
47100 #undef TARGET_FIXED_CONDITION_CODE_REGS
47101 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
47102 #undef TARGET_CC_MODES_COMPATIBLE
47103 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
47104
47105 #undef TARGET_MACHINE_DEPENDENT_REORG
47106 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
47107
47108 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
47109 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
47110
47111 #undef TARGET_BUILD_BUILTIN_VA_LIST
47112 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
47113
47114 #undef TARGET_FOLD_BUILTIN
47115 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
47116
47117 #undef TARGET_COMPARE_VERSION_PRIORITY
47118 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
47119
47120 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
47121 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
47122 ix86_generate_version_dispatcher_body
47123
47124 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
47125 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
47126 ix86_get_function_versions_dispatcher
47127
47128 #undef TARGET_ENUM_VA_LIST_P
47129 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
47130
47131 #undef TARGET_FN_ABI_VA_LIST
47132 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
47133
47134 #undef TARGET_CANONICAL_VA_LIST_TYPE
47135 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
47136
47137 #undef TARGET_EXPAND_BUILTIN_VA_START
47138 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
47139
47140 #undef TARGET_MD_ASM_CLOBBERS
47141 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
47142
47143 #undef TARGET_PROMOTE_PROTOTYPES
47144 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
47145 #undef TARGET_SETUP_INCOMING_VARARGS
47146 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
47147 #undef TARGET_MUST_PASS_IN_STACK
47148 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
47149 #undef TARGET_FUNCTION_ARG_ADVANCE
47150 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
47151 #undef TARGET_FUNCTION_ARG
47152 #define TARGET_FUNCTION_ARG ix86_function_arg
47153 #undef TARGET_FUNCTION_ARG_BOUNDARY
47154 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
47155 #undef TARGET_PASS_BY_REFERENCE
47156 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
47157 #undef TARGET_INTERNAL_ARG_POINTER
47158 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
47159 #undef TARGET_UPDATE_STACK_BOUNDARY
47160 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
47161 #undef TARGET_GET_DRAP_RTX
47162 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
47163 #undef TARGET_STRICT_ARGUMENT_NAMING
47164 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
47165 #undef TARGET_STATIC_CHAIN
47166 #define TARGET_STATIC_CHAIN ix86_static_chain
47167 #undef TARGET_TRAMPOLINE_INIT
47168 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
47169 #undef TARGET_RETURN_POPS_ARGS
47170 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
47171
47172 #undef TARGET_LEGITIMATE_COMBINED_INSN
47173 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
47174
47175 #undef TARGET_ASAN_SHADOW_OFFSET
47176 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
47177
47178 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
47179 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
47180
47181 #undef TARGET_SCALAR_MODE_SUPPORTED_P
47182 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
47183
47184 #undef TARGET_VECTOR_MODE_SUPPORTED_P
47185 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
47186
47187 #undef TARGET_C_MODE_FOR_SUFFIX
47188 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
47189
47190 #ifdef HAVE_AS_TLS
47191 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
47192 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
47193 #endif
47194
47195 #ifdef SUBTARGET_INSERT_ATTRIBUTES
47196 #undef TARGET_INSERT_ATTRIBUTES
47197 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
47198 #endif
47199
47200 #undef TARGET_MANGLE_TYPE
47201 #define TARGET_MANGLE_TYPE ix86_mangle_type
47202
47203 #if !TARGET_MACHO
47204 #undef TARGET_STACK_PROTECT_FAIL
47205 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
47206 #endif
47207
47208 #undef TARGET_FUNCTION_VALUE
47209 #define TARGET_FUNCTION_VALUE ix86_function_value
47210
47211 #undef TARGET_FUNCTION_VALUE_REGNO_P
47212 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
47213
47214 #undef TARGET_PROMOTE_FUNCTION_MODE
47215 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
47216
47217 #undef TARGET_MEMBER_TYPE_FORCES_BLK
47218 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
47219
47220 #undef TARGET_INSTANTIATE_DECLS
47221 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
47222
47223 #undef TARGET_SECONDARY_RELOAD
47224 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
47225
47226 #undef TARGET_CLASS_MAX_NREGS
47227 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
47228
47229 #undef TARGET_PREFERRED_RELOAD_CLASS
47230 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
47231 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
47232 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
47233 #undef TARGET_CLASS_LIKELY_SPILLED_P
47234 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
47235
47236 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
47237 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
47238 ix86_builtin_vectorization_cost
47239 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
47240 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
47241 ix86_vectorize_vec_perm_const_ok
47242 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
47243 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
47244 ix86_preferred_simd_mode
47245 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
47246 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
47247 ix86_autovectorize_vector_sizes
47248 #undef TARGET_VECTORIZE_INIT_COST
47249 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
47250 #undef TARGET_VECTORIZE_ADD_STMT_COST
47251 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
47252 #undef TARGET_VECTORIZE_FINISH_COST
47253 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
47254 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
47255 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
47256
47257 #undef TARGET_SET_CURRENT_FUNCTION
47258 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
47259
47260 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
47261 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
47262
47263 #undef TARGET_OPTION_SAVE
47264 #define TARGET_OPTION_SAVE ix86_function_specific_save
47265
47266 #undef TARGET_OPTION_RESTORE
47267 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
47268
47269 #undef TARGET_OPTION_PRINT
47270 #define TARGET_OPTION_PRINT ix86_function_specific_print
47271
47272 #undef TARGET_OPTION_FUNCTION_VERSIONS
47273 #define TARGET_OPTION_FUNCTION_VERSIONS ix86_function_versions
47274
47275 #undef TARGET_CAN_INLINE_P
47276 #define TARGET_CAN_INLINE_P ix86_can_inline_p
47277
47278 #undef TARGET_EXPAND_TO_RTL_HOOK
47279 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
47280
47281 #undef TARGET_LEGITIMATE_ADDRESS_P
47282 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
47283
47284 #undef TARGET_LRA_P
47285 #define TARGET_LRA_P hook_bool_void_true
47286
47287 #undef TARGET_REGISTER_PRIORITY
47288 #define TARGET_REGISTER_PRIORITY ix86_register_priority
47289
47290 #undef TARGET_REGISTER_USAGE_LEVELING_P
47291 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
47292
47293 #undef TARGET_LEGITIMATE_CONSTANT_P
47294 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
47295
47296 #undef TARGET_FRAME_POINTER_REQUIRED
47297 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
47298
47299 #undef TARGET_CAN_ELIMINATE
47300 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
47301
47302 #undef TARGET_EXTRA_LIVE_ON_ENTRY
47303 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
47304
47305 #undef TARGET_ASM_CODE_END
47306 #define TARGET_ASM_CODE_END ix86_code_end
47307
47308 #undef TARGET_CONDITIONAL_REGISTER_USAGE
47309 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
47310
47311 #if TARGET_MACHO
47312 #undef TARGET_INIT_LIBFUNCS
47313 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
47314 #endif
47315
47316 #undef TARGET_LOOP_UNROLL_ADJUST
47317 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
47318
47319 #undef TARGET_SPILL_CLASS
47320 #define TARGET_SPILL_CLASS ix86_spill_class
47321
47322 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
47323 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
47324 ix86_simd_clone_compute_vecsize_and_simdlen
47325
47326 #undef TARGET_SIMD_CLONE_ADJUST
47327 #define TARGET_SIMD_CLONE_ADJUST \
47328 ix86_simd_clone_adjust
47329
47330 #undef TARGET_SIMD_CLONE_USABLE
47331 #define TARGET_SIMD_CLONE_USABLE \
47332 ix86_simd_clone_usable
47333
47334 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
47335 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
47336 ix86_float_exceptions_rounding_supported_p
47337
47338 #undef TARGET_MODE_EMIT
47339 #define TARGET_MODE_EMIT ix86_emit_mode_set
47340
47341 #undef TARGET_MODE_NEEDED
47342 #define TARGET_MODE_NEEDED ix86_mode_needed
47343
47344 #undef TARGET_MODE_AFTER
47345 #define TARGET_MODE_AFTER ix86_mode_after
47346
47347 #undef TARGET_MODE_ENTRY
47348 #define TARGET_MODE_ENTRY ix86_mode_entry
47349
47350 #undef TARGET_MODE_EXIT
47351 #define TARGET_MODE_EXIT ix86_mode_exit
47352
47353 #undef TARGET_MODE_PRIORITY
47354 #define TARGET_MODE_PRIORITY ix86_mode_priority
47355
47356 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
47357 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
47358
47359 struct gcc_target targetm = TARGET_INITIALIZER;
47360 \f
47361 #include "gt-i386.h"