b40852d3b6bb0f8ce3256ab518124a441a6cf92b
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2014 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
10
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "stringpool.h"
27 #include "attribs.h"
28 #include "calls.h"
29 #include "stor-layout.h"
30 #include "varasm.h"
31 #include "tm_p.h"
32 #include "regs.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
36 #include "output.h"
37 #include "insn-codes.h"
38 #include "insn-attr.h"
39 #include "flags.h"
40 #include "except.h"
41 #include "function.h"
42 #include "recog.h"
43 #include "expr.h"
44 #include "optabs.h"
45 #include "diagnostic-core.h"
46 #include "toplev.h"
47 #include "basic-block.h"
48 #include "ggc.h"
49 #include "target.h"
50 #include "target-def.h"
51 #include "common/common-target.h"
52 #include "langhooks.h"
53 #include "reload.h"
54 #include "cgraph.h"
55 #include "hash-table.h"
56 #include "vec.h"
57 #include "basic-block.h"
58 #include "tree-ssa-alias.h"
59 #include "internal-fn.h"
60 #include "gimple-fold.h"
61 #include "tree-eh.h"
62 #include "gimple-expr.h"
63 #include "is-a.h"
64 #include "gimple.h"
65 #include "gimplify.h"
66 #include "cfgloop.h"
67 #include "dwarf2.h"
68 #include "df.h"
69 #include "tm-constrs.h"
70 #include "params.h"
71 #include "cselib.h"
72 #include "debug.h"
73 #include "sched-int.h"
74 #include "sbitmap.h"
75 #include "fibheap.h"
76 #include "opts.h"
77 #include "diagnostic.h"
78 #include "dumpfile.h"
79 #include "tree-pass.h"
80 #include "wide-int.h"
81 #include "context.h"
82 #include "pass_manager.h"
83 #include "target-globals.h"
84 #include "tree-vectorizer.h"
85 #include "shrink-wrap.h"
86 #include "builtins.h"
87
88 static rtx legitimize_dllimport_symbol (rtx, bool);
89 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
90 static rtx legitimize_pe_coff_symbol (rtx, bool);
91
92 #ifndef CHECK_STACK_LIMIT
93 #define CHECK_STACK_LIMIT (-1)
94 #endif
95
96 /* Return index of given mode in mult and division cost tables. */
97 #define MODE_INDEX(mode) \
98 ((mode) == QImode ? 0 \
99 : (mode) == HImode ? 1 \
100 : (mode) == SImode ? 2 \
101 : (mode) == DImode ? 3 \
102 : 4)
103
104 /* Processor costs (relative to an add) */
105 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
106 #define COSTS_N_BYTES(N) ((N) * 2)
107
108 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
109
110 static stringop_algs ix86_size_memcpy[2] = {
111 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
112 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
113 static stringop_algs ix86_size_memset[2] = {
114 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
115 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
116
117 const
118 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
119 COSTS_N_BYTES (2), /* cost of an add instruction */
120 COSTS_N_BYTES (3), /* cost of a lea instruction */
121 COSTS_N_BYTES (2), /* variable shift costs */
122 COSTS_N_BYTES (3), /* constant shift costs */
123 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
124 COSTS_N_BYTES (3), /* HI */
125 COSTS_N_BYTES (3), /* SI */
126 COSTS_N_BYTES (3), /* DI */
127 COSTS_N_BYTES (5)}, /* other */
128 0, /* cost of multiply per each bit set */
129 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
130 COSTS_N_BYTES (3), /* HI */
131 COSTS_N_BYTES (3), /* SI */
132 COSTS_N_BYTES (3), /* DI */
133 COSTS_N_BYTES (5)}, /* other */
134 COSTS_N_BYTES (3), /* cost of movsx */
135 COSTS_N_BYTES (3), /* cost of movzx */
136 0, /* "large" insn */
137 2, /* MOVE_RATIO */
138 2, /* cost for loading QImode using movzbl */
139 {2, 2, 2}, /* cost of loading integer registers
140 in QImode, HImode and SImode.
141 Relative to reg-reg move (2). */
142 {2, 2, 2}, /* cost of storing integer registers */
143 2, /* cost of reg,reg fld/fst */
144 {2, 2, 2}, /* cost of loading fp registers
145 in SFmode, DFmode and XFmode */
146 {2, 2, 2}, /* cost of storing fp registers
147 in SFmode, DFmode and XFmode */
148 3, /* cost of moving MMX register */
149 {3, 3}, /* cost of loading MMX registers
150 in SImode and DImode */
151 {3, 3}, /* cost of storing MMX registers
152 in SImode and DImode */
153 3, /* cost of moving SSE register */
154 {3, 3, 3}, /* cost of loading SSE registers
155 in SImode, DImode and TImode */
156 {3, 3, 3}, /* cost of storing SSE registers
157 in SImode, DImode and TImode */
158 3, /* MMX or SSE register to integer */
159 0, /* size of l1 cache */
160 0, /* size of l2 cache */
161 0, /* size of prefetch block */
162 0, /* number of parallel prefetches */
163 2, /* Branch cost */
164 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
165 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
166 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
167 COSTS_N_BYTES (2), /* cost of FABS instruction. */
168 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
169 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
170 ix86_size_memcpy,
171 ix86_size_memset,
172 1, /* scalar_stmt_cost. */
173 1, /* scalar load_cost. */
174 1, /* scalar_store_cost. */
175 1, /* vec_stmt_cost. */
176 1, /* vec_to_scalar_cost. */
177 1, /* scalar_to_vec_cost. */
178 1, /* vec_align_load_cost. */
179 1, /* vec_unalign_load_cost. */
180 1, /* vec_store_cost. */
181 1, /* cond_taken_branch_cost. */
182 1, /* cond_not_taken_branch_cost. */
183 };
184
185 /* Processor costs (relative to an add) */
186 static stringop_algs i386_memcpy[2] = {
187 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
188 DUMMY_STRINGOP_ALGS};
189 static stringop_algs i386_memset[2] = {
190 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
191 DUMMY_STRINGOP_ALGS};
192
193 static const
194 struct processor_costs i386_cost = { /* 386 specific costs */
195 COSTS_N_INSNS (1), /* cost of an add instruction */
196 COSTS_N_INSNS (1), /* cost of a lea instruction */
197 COSTS_N_INSNS (3), /* variable shift costs */
198 COSTS_N_INSNS (2), /* constant shift costs */
199 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
200 COSTS_N_INSNS (6), /* HI */
201 COSTS_N_INSNS (6), /* SI */
202 COSTS_N_INSNS (6), /* DI */
203 COSTS_N_INSNS (6)}, /* other */
204 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
205 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
206 COSTS_N_INSNS (23), /* HI */
207 COSTS_N_INSNS (23), /* SI */
208 COSTS_N_INSNS (23), /* DI */
209 COSTS_N_INSNS (23)}, /* other */
210 COSTS_N_INSNS (3), /* cost of movsx */
211 COSTS_N_INSNS (2), /* cost of movzx */
212 15, /* "large" insn */
213 3, /* MOVE_RATIO */
214 4, /* cost for loading QImode using movzbl */
215 {2, 4, 2}, /* cost of loading integer registers
216 in QImode, HImode and SImode.
217 Relative to reg-reg move (2). */
218 {2, 4, 2}, /* cost of storing integer registers */
219 2, /* cost of reg,reg fld/fst */
220 {8, 8, 8}, /* cost of loading fp registers
221 in SFmode, DFmode and XFmode */
222 {8, 8, 8}, /* cost of storing fp registers
223 in SFmode, DFmode and XFmode */
224 2, /* cost of moving MMX register */
225 {4, 8}, /* cost of loading MMX registers
226 in SImode and DImode */
227 {4, 8}, /* cost of storing MMX registers
228 in SImode and DImode */
229 2, /* cost of moving SSE register */
230 {4, 8, 16}, /* cost of loading SSE registers
231 in SImode, DImode and TImode */
232 {4, 8, 16}, /* cost of storing SSE registers
233 in SImode, DImode and TImode */
234 3, /* MMX or SSE register to integer */
235 0, /* size of l1 cache */
236 0, /* size of l2 cache */
237 0, /* size of prefetch block */
238 0, /* number of parallel prefetches */
239 1, /* Branch cost */
240 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
241 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
242 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
243 COSTS_N_INSNS (22), /* cost of FABS instruction. */
244 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
245 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
246 i386_memcpy,
247 i386_memset,
248 1, /* scalar_stmt_cost. */
249 1, /* scalar load_cost. */
250 1, /* scalar_store_cost. */
251 1, /* vec_stmt_cost. */
252 1, /* vec_to_scalar_cost. */
253 1, /* scalar_to_vec_cost. */
254 1, /* vec_align_load_cost. */
255 2, /* vec_unalign_load_cost. */
256 1, /* vec_store_cost. */
257 3, /* cond_taken_branch_cost. */
258 1, /* cond_not_taken_branch_cost. */
259 };
260
261 static stringop_algs i486_memcpy[2] = {
262 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
263 DUMMY_STRINGOP_ALGS};
264 static stringop_algs i486_memset[2] = {
265 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
266 DUMMY_STRINGOP_ALGS};
267
268 static const
269 struct processor_costs i486_cost = { /* 486 specific costs */
270 COSTS_N_INSNS (1), /* cost of an add instruction */
271 COSTS_N_INSNS (1), /* cost of a lea instruction */
272 COSTS_N_INSNS (3), /* variable shift costs */
273 COSTS_N_INSNS (2), /* constant shift costs */
274 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
275 COSTS_N_INSNS (12), /* HI */
276 COSTS_N_INSNS (12), /* SI */
277 COSTS_N_INSNS (12), /* DI */
278 COSTS_N_INSNS (12)}, /* other */
279 1, /* cost of multiply per each bit set */
280 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
281 COSTS_N_INSNS (40), /* HI */
282 COSTS_N_INSNS (40), /* SI */
283 COSTS_N_INSNS (40), /* DI */
284 COSTS_N_INSNS (40)}, /* other */
285 COSTS_N_INSNS (3), /* cost of movsx */
286 COSTS_N_INSNS (2), /* cost of movzx */
287 15, /* "large" insn */
288 3, /* MOVE_RATIO */
289 4, /* cost for loading QImode using movzbl */
290 {2, 4, 2}, /* cost of loading integer registers
291 in QImode, HImode and SImode.
292 Relative to reg-reg move (2). */
293 {2, 4, 2}, /* cost of storing integer registers */
294 2, /* cost of reg,reg fld/fst */
295 {8, 8, 8}, /* cost of loading fp registers
296 in SFmode, DFmode and XFmode */
297 {8, 8, 8}, /* cost of storing fp registers
298 in SFmode, DFmode and XFmode */
299 2, /* cost of moving MMX register */
300 {4, 8}, /* cost of loading MMX registers
301 in SImode and DImode */
302 {4, 8}, /* cost of storing MMX registers
303 in SImode and DImode */
304 2, /* cost of moving SSE register */
305 {4, 8, 16}, /* cost of loading SSE registers
306 in SImode, DImode and TImode */
307 {4, 8, 16}, /* cost of storing SSE registers
308 in SImode, DImode and TImode */
309 3, /* MMX or SSE register to integer */
310 4, /* size of l1 cache. 486 has 8kB cache
311 shared for code and data, so 4kB is
312 not really precise. */
313 4, /* size of l2 cache */
314 0, /* size of prefetch block */
315 0, /* number of parallel prefetches */
316 1, /* Branch cost */
317 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
318 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
319 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
320 COSTS_N_INSNS (3), /* cost of FABS instruction. */
321 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
322 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
323 i486_memcpy,
324 i486_memset,
325 1, /* scalar_stmt_cost. */
326 1, /* scalar load_cost. */
327 1, /* scalar_store_cost. */
328 1, /* vec_stmt_cost. */
329 1, /* vec_to_scalar_cost. */
330 1, /* scalar_to_vec_cost. */
331 1, /* vec_align_load_cost. */
332 2, /* vec_unalign_load_cost. */
333 1, /* vec_store_cost. */
334 3, /* cond_taken_branch_cost. */
335 1, /* cond_not_taken_branch_cost. */
336 };
337
338 static stringop_algs pentium_memcpy[2] = {
339 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
340 DUMMY_STRINGOP_ALGS};
341 static stringop_algs pentium_memset[2] = {
342 {libcall, {{-1, rep_prefix_4_byte, false}}},
343 DUMMY_STRINGOP_ALGS};
344
345 static const
346 struct processor_costs pentium_cost = {
347 COSTS_N_INSNS (1), /* cost of an add instruction */
348 COSTS_N_INSNS (1), /* cost of a lea instruction */
349 COSTS_N_INSNS (4), /* variable shift costs */
350 COSTS_N_INSNS (1), /* constant shift costs */
351 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
352 COSTS_N_INSNS (11), /* HI */
353 COSTS_N_INSNS (11), /* SI */
354 COSTS_N_INSNS (11), /* DI */
355 COSTS_N_INSNS (11)}, /* other */
356 0, /* cost of multiply per each bit set */
357 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
358 COSTS_N_INSNS (25), /* HI */
359 COSTS_N_INSNS (25), /* SI */
360 COSTS_N_INSNS (25), /* DI */
361 COSTS_N_INSNS (25)}, /* other */
362 COSTS_N_INSNS (3), /* cost of movsx */
363 COSTS_N_INSNS (2), /* cost of movzx */
364 8, /* "large" insn */
365 6, /* MOVE_RATIO */
366 6, /* cost for loading QImode using movzbl */
367 {2, 4, 2}, /* cost of loading integer registers
368 in QImode, HImode and SImode.
369 Relative to reg-reg move (2). */
370 {2, 4, 2}, /* cost of storing integer registers */
371 2, /* cost of reg,reg fld/fst */
372 {2, 2, 6}, /* cost of loading fp registers
373 in SFmode, DFmode and XFmode */
374 {4, 4, 6}, /* cost of storing fp registers
375 in SFmode, DFmode and XFmode */
376 8, /* cost of moving MMX register */
377 {8, 8}, /* cost of loading MMX registers
378 in SImode and DImode */
379 {8, 8}, /* cost of storing MMX registers
380 in SImode and DImode */
381 2, /* cost of moving SSE register */
382 {4, 8, 16}, /* cost of loading SSE registers
383 in SImode, DImode and TImode */
384 {4, 8, 16}, /* cost of storing SSE registers
385 in SImode, DImode and TImode */
386 3, /* MMX or SSE register to integer */
387 8, /* size of l1 cache. */
388 8, /* size of l2 cache */
389 0, /* size of prefetch block */
390 0, /* number of parallel prefetches */
391 2, /* Branch cost */
392 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
393 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
394 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
395 COSTS_N_INSNS (1), /* cost of FABS instruction. */
396 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
397 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
398 pentium_memcpy,
399 pentium_memset,
400 1, /* scalar_stmt_cost. */
401 1, /* scalar load_cost. */
402 1, /* scalar_store_cost. */
403 1, /* vec_stmt_cost. */
404 1, /* vec_to_scalar_cost. */
405 1, /* scalar_to_vec_cost. */
406 1, /* vec_align_load_cost. */
407 2, /* vec_unalign_load_cost. */
408 1, /* vec_store_cost. */
409 3, /* cond_taken_branch_cost. */
410 1, /* cond_not_taken_branch_cost. */
411 };
412
413 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
414 (we ensure the alignment). For small blocks inline loop is still a
415 noticeable win, for bigger blocks either rep movsl or rep movsb is
416 way to go. Rep movsb has apparently more expensive startup time in CPU,
417 but after 4K the difference is down in the noise. */
418 static stringop_algs pentiumpro_memcpy[2] = {
419 {rep_prefix_4_byte, {{128, loop, false}, {1024, unrolled_loop, false},
420 {8192, rep_prefix_4_byte, false},
421 {-1, rep_prefix_1_byte, false}}},
422 DUMMY_STRINGOP_ALGS};
423 static stringop_algs pentiumpro_memset[2] = {
424 {rep_prefix_4_byte, {{1024, unrolled_loop, false},
425 {8192, rep_prefix_4_byte, false},
426 {-1, libcall, false}}},
427 DUMMY_STRINGOP_ALGS};
428 static const
429 struct processor_costs pentiumpro_cost = {
430 COSTS_N_INSNS (1), /* cost of an add instruction */
431 COSTS_N_INSNS (1), /* cost of a lea instruction */
432 COSTS_N_INSNS (1), /* variable shift costs */
433 COSTS_N_INSNS (1), /* constant shift costs */
434 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
435 COSTS_N_INSNS (4), /* HI */
436 COSTS_N_INSNS (4), /* SI */
437 COSTS_N_INSNS (4), /* DI */
438 COSTS_N_INSNS (4)}, /* other */
439 0, /* cost of multiply per each bit set */
440 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
441 COSTS_N_INSNS (17), /* HI */
442 COSTS_N_INSNS (17), /* SI */
443 COSTS_N_INSNS (17), /* DI */
444 COSTS_N_INSNS (17)}, /* other */
445 COSTS_N_INSNS (1), /* cost of movsx */
446 COSTS_N_INSNS (1), /* cost of movzx */
447 8, /* "large" insn */
448 6, /* MOVE_RATIO */
449 2, /* cost for loading QImode using movzbl */
450 {4, 4, 4}, /* cost of loading integer registers
451 in QImode, HImode and SImode.
452 Relative to reg-reg move (2). */
453 {2, 2, 2}, /* cost of storing integer registers */
454 2, /* cost of reg,reg fld/fst */
455 {2, 2, 6}, /* cost of loading fp registers
456 in SFmode, DFmode and XFmode */
457 {4, 4, 6}, /* cost of storing fp registers
458 in SFmode, DFmode and XFmode */
459 2, /* cost of moving MMX register */
460 {2, 2}, /* cost of loading MMX registers
461 in SImode and DImode */
462 {2, 2}, /* cost of storing MMX registers
463 in SImode and DImode */
464 2, /* cost of moving SSE register */
465 {2, 2, 8}, /* cost of loading SSE registers
466 in SImode, DImode and TImode */
467 {2, 2, 8}, /* cost of storing SSE registers
468 in SImode, DImode and TImode */
469 3, /* MMX or SSE register to integer */
470 8, /* size of l1 cache. */
471 256, /* size of l2 cache */
472 32, /* size of prefetch block */
473 6, /* number of parallel prefetches */
474 2, /* Branch cost */
475 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
476 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
477 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
478 COSTS_N_INSNS (2), /* cost of FABS instruction. */
479 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
480 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
481 pentiumpro_memcpy,
482 pentiumpro_memset,
483 1, /* scalar_stmt_cost. */
484 1, /* scalar load_cost. */
485 1, /* scalar_store_cost. */
486 1, /* vec_stmt_cost. */
487 1, /* vec_to_scalar_cost. */
488 1, /* scalar_to_vec_cost. */
489 1, /* vec_align_load_cost. */
490 2, /* vec_unalign_load_cost. */
491 1, /* vec_store_cost. */
492 3, /* cond_taken_branch_cost. */
493 1, /* cond_not_taken_branch_cost. */
494 };
495
496 static stringop_algs geode_memcpy[2] = {
497 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
498 DUMMY_STRINGOP_ALGS};
499 static stringop_algs geode_memset[2] = {
500 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
501 DUMMY_STRINGOP_ALGS};
502 static const
503 struct processor_costs geode_cost = {
504 COSTS_N_INSNS (1), /* cost of an add instruction */
505 COSTS_N_INSNS (1), /* cost of a lea instruction */
506 COSTS_N_INSNS (2), /* variable shift costs */
507 COSTS_N_INSNS (1), /* constant shift costs */
508 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
509 COSTS_N_INSNS (4), /* HI */
510 COSTS_N_INSNS (7), /* SI */
511 COSTS_N_INSNS (7), /* DI */
512 COSTS_N_INSNS (7)}, /* other */
513 0, /* cost of multiply per each bit set */
514 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
515 COSTS_N_INSNS (23), /* HI */
516 COSTS_N_INSNS (39), /* SI */
517 COSTS_N_INSNS (39), /* DI */
518 COSTS_N_INSNS (39)}, /* other */
519 COSTS_N_INSNS (1), /* cost of movsx */
520 COSTS_N_INSNS (1), /* cost of movzx */
521 8, /* "large" insn */
522 4, /* MOVE_RATIO */
523 1, /* cost for loading QImode using movzbl */
524 {1, 1, 1}, /* cost of loading integer registers
525 in QImode, HImode and SImode.
526 Relative to reg-reg move (2). */
527 {1, 1, 1}, /* cost of storing integer registers */
528 1, /* cost of reg,reg fld/fst */
529 {1, 1, 1}, /* cost of loading fp registers
530 in SFmode, DFmode and XFmode */
531 {4, 6, 6}, /* cost of storing fp registers
532 in SFmode, DFmode and XFmode */
533
534 1, /* cost of moving MMX register */
535 {1, 1}, /* cost of loading MMX registers
536 in SImode and DImode */
537 {1, 1}, /* cost of storing MMX registers
538 in SImode and DImode */
539 1, /* cost of moving SSE register */
540 {1, 1, 1}, /* cost of loading SSE registers
541 in SImode, DImode and TImode */
542 {1, 1, 1}, /* cost of storing SSE registers
543 in SImode, DImode and TImode */
544 1, /* MMX or SSE register to integer */
545 64, /* size of l1 cache. */
546 128, /* size of l2 cache. */
547 32, /* size of prefetch block */
548 1, /* number of parallel prefetches */
549 1, /* Branch cost */
550 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
551 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
552 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
553 COSTS_N_INSNS (1), /* cost of FABS instruction. */
554 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
555 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
556 geode_memcpy,
557 geode_memset,
558 1, /* scalar_stmt_cost. */
559 1, /* scalar load_cost. */
560 1, /* scalar_store_cost. */
561 1, /* vec_stmt_cost. */
562 1, /* vec_to_scalar_cost. */
563 1, /* scalar_to_vec_cost. */
564 1, /* vec_align_load_cost. */
565 2, /* vec_unalign_load_cost. */
566 1, /* vec_store_cost. */
567 3, /* cond_taken_branch_cost. */
568 1, /* cond_not_taken_branch_cost. */
569 };
570
571 static stringop_algs k6_memcpy[2] = {
572 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
573 DUMMY_STRINGOP_ALGS};
574 static stringop_algs k6_memset[2] = {
575 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
576 DUMMY_STRINGOP_ALGS};
577 static const
578 struct processor_costs k6_cost = {
579 COSTS_N_INSNS (1), /* cost of an add instruction */
580 COSTS_N_INSNS (2), /* cost of a lea instruction */
581 COSTS_N_INSNS (1), /* variable shift costs */
582 COSTS_N_INSNS (1), /* constant shift costs */
583 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
584 COSTS_N_INSNS (3), /* HI */
585 COSTS_N_INSNS (3), /* SI */
586 COSTS_N_INSNS (3), /* DI */
587 COSTS_N_INSNS (3)}, /* other */
588 0, /* cost of multiply per each bit set */
589 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
590 COSTS_N_INSNS (18), /* HI */
591 COSTS_N_INSNS (18), /* SI */
592 COSTS_N_INSNS (18), /* DI */
593 COSTS_N_INSNS (18)}, /* other */
594 COSTS_N_INSNS (2), /* cost of movsx */
595 COSTS_N_INSNS (2), /* cost of movzx */
596 8, /* "large" insn */
597 4, /* MOVE_RATIO */
598 3, /* cost for loading QImode using movzbl */
599 {4, 5, 4}, /* cost of loading integer registers
600 in QImode, HImode and SImode.
601 Relative to reg-reg move (2). */
602 {2, 3, 2}, /* cost of storing integer registers */
603 4, /* cost of reg,reg fld/fst */
604 {6, 6, 6}, /* cost of loading fp registers
605 in SFmode, DFmode and XFmode */
606 {4, 4, 4}, /* cost of storing fp registers
607 in SFmode, DFmode and XFmode */
608 2, /* cost of moving MMX register */
609 {2, 2}, /* cost of loading MMX registers
610 in SImode and DImode */
611 {2, 2}, /* cost of storing MMX registers
612 in SImode and DImode */
613 2, /* cost of moving SSE register */
614 {2, 2, 8}, /* cost of loading SSE registers
615 in SImode, DImode and TImode */
616 {2, 2, 8}, /* cost of storing SSE registers
617 in SImode, DImode and TImode */
618 6, /* MMX or SSE register to integer */
619 32, /* size of l1 cache. */
620 32, /* size of l2 cache. Some models
621 have integrated l2 cache, but
622 optimizing for k6 is not important
623 enough to worry about that. */
624 32, /* size of prefetch block */
625 1, /* number of parallel prefetches */
626 1, /* Branch cost */
627 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
628 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
629 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
630 COSTS_N_INSNS (2), /* cost of FABS instruction. */
631 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
632 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
633 k6_memcpy,
634 k6_memset,
635 1, /* scalar_stmt_cost. */
636 1, /* scalar load_cost. */
637 1, /* scalar_store_cost. */
638 1, /* vec_stmt_cost. */
639 1, /* vec_to_scalar_cost. */
640 1, /* scalar_to_vec_cost. */
641 1, /* vec_align_load_cost. */
642 2, /* vec_unalign_load_cost. */
643 1, /* vec_store_cost. */
644 3, /* cond_taken_branch_cost. */
645 1, /* cond_not_taken_branch_cost. */
646 };
647
648 /* For some reason, Athlon deals better with REP prefix (relative to loops)
649 compared to K8. Alignment becomes important after 8 bytes for memcpy and
650 128 bytes for memset. */
651 static stringop_algs athlon_memcpy[2] = {
652 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
653 DUMMY_STRINGOP_ALGS};
654 static stringop_algs athlon_memset[2] = {
655 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
656 DUMMY_STRINGOP_ALGS};
657 static const
658 struct processor_costs athlon_cost = {
659 COSTS_N_INSNS (1), /* cost of an add instruction */
660 COSTS_N_INSNS (2), /* cost of a lea instruction */
661 COSTS_N_INSNS (1), /* variable shift costs */
662 COSTS_N_INSNS (1), /* constant shift costs */
663 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
664 COSTS_N_INSNS (5), /* HI */
665 COSTS_N_INSNS (5), /* SI */
666 COSTS_N_INSNS (5), /* DI */
667 COSTS_N_INSNS (5)}, /* other */
668 0, /* cost of multiply per each bit set */
669 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
670 COSTS_N_INSNS (26), /* HI */
671 COSTS_N_INSNS (42), /* SI */
672 COSTS_N_INSNS (74), /* DI */
673 COSTS_N_INSNS (74)}, /* other */
674 COSTS_N_INSNS (1), /* cost of movsx */
675 COSTS_N_INSNS (1), /* cost of movzx */
676 8, /* "large" insn */
677 9, /* MOVE_RATIO */
678 4, /* cost for loading QImode using movzbl */
679 {3, 4, 3}, /* cost of loading integer registers
680 in QImode, HImode and SImode.
681 Relative to reg-reg move (2). */
682 {3, 4, 3}, /* cost of storing integer registers */
683 4, /* cost of reg,reg fld/fst */
684 {4, 4, 12}, /* cost of loading fp registers
685 in SFmode, DFmode and XFmode */
686 {6, 6, 8}, /* cost of storing fp registers
687 in SFmode, DFmode and XFmode */
688 2, /* cost of moving MMX register */
689 {4, 4}, /* cost of loading MMX registers
690 in SImode and DImode */
691 {4, 4}, /* cost of storing MMX registers
692 in SImode and DImode */
693 2, /* cost of moving SSE register */
694 {4, 4, 6}, /* cost of loading SSE registers
695 in SImode, DImode and TImode */
696 {4, 4, 5}, /* cost of storing SSE registers
697 in SImode, DImode and TImode */
698 5, /* MMX or SSE register to integer */
699 64, /* size of l1 cache. */
700 256, /* size of l2 cache. */
701 64, /* size of prefetch block */
702 6, /* number of parallel prefetches */
703 5, /* Branch cost */
704 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
705 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
706 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
707 COSTS_N_INSNS (2), /* cost of FABS instruction. */
708 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
709 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
710 athlon_memcpy,
711 athlon_memset,
712 1, /* scalar_stmt_cost. */
713 1, /* scalar load_cost. */
714 1, /* scalar_store_cost. */
715 1, /* vec_stmt_cost. */
716 1, /* vec_to_scalar_cost. */
717 1, /* scalar_to_vec_cost. */
718 1, /* vec_align_load_cost. */
719 2, /* vec_unalign_load_cost. */
720 1, /* vec_store_cost. */
721 3, /* cond_taken_branch_cost. */
722 1, /* cond_not_taken_branch_cost. */
723 };
724
725 /* K8 has optimized REP instruction for medium sized blocks, but for very
726 small blocks it is better to use loop. For large blocks, libcall can
727 do nontemporary accesses and beat inline considerably. */
728 static stringop_algs k8_memcpy[2] = {
729 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
730 {-1, rep_prefix_4_byte, false}}},
731 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
732 {-1, libcall, false}}}};
733 static stringop_algs k8_memset[2] = {
734 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
735 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
736 {libcall, {{48, unrolled_loop, false},
737 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
738 static const
739 struct processor_costs k8_cost = {
740 COSTS_N_INSNS (1), /* cost of an add instruction */
741 COSTS_N_INSNS (2), /* cost of a lea instruction */
742 COSTS_N_INSNS (1), /* variable shift costs */
743 COSTS_N_INSNS (1), /* constant shift costs */
744 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
745 COSTS_N_INSNS (4), /* HI */
746 COSTS_N_INSNS (3), /* SI */
747 COSTS_N_INSNS (4), /* DI */
748 COSTS_N_INSNS (5)}, /* other */
749 0, /* cost of multiply per each bit set */
750 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
751 COSTS_N_INSNS (26), /* HI */
752 COSTS_N_INSNS (42), /* SI */
753 COSTS_N_INSNS (74), /* DI */
754 COSTS_N_INSNS (74)}, /* other */
755 COSTS_N_INSNS (1), /* cost of movsx */
756 COSTS_N_INSNS (1), /* cost of movzx */
757 8, /* "large" insn */
758 9, /* MOVE_RATIO */
759 4, /* cost for loading QImode using movzbl */
760 {3, 4, 3}, /* cost of loading integer registers
761 in QImode, HImode and SImode.
762 Relative to reg-reg move (2). */
763 {3, 4, 3}, /* cost of storing integer registers */
764 4, /* cost of reg,reg fld/fst */
765 {4, 4, 12}, /* cost of loading fp registers
766 in SFmode, DFmode and XFmode */
767 {6, 6, 8}, /* cost of storing fp registers
768 in SFmode, DFmode and XFmode */
769 2, /* cost of moving MMX register */
770 {3, 3}, /* cost of loading MMX registers
771 in SImode and DImode */
772 {4, 4}, /* cost of storing MMX registers
773 in SImode and DImode */
774 2, /* cost of moving SSE register */
775 {4, 3, 6}, /* cost of loading SSE registers
776 in SImode, DImode and TImode */
777 {4, 4, 5}, /* cost of storing SSE registers
778 in SImode, DImode and TImode */
779 5, /* MMX or SSE register to integer */
780 64, /* size of l1 cache. */
781 512, /* size of l2 cache. */
782 64, /* size of prefetch block */
783 /* New AMD processors never drop prefetches; if they cannot be performed
784 immediately, they are queued. We set number of simultaneous prefetches
785 to a large constant to reflect this (it probably is not a good idea not
786 to limit number of prefetches at all, as their execution also takes some
787 time). */
788 100, /* number of parallel prefetches */
789 3, /* Branch cost */
790 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
791 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
792 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
793 COSTS_N_INSNS (2), /* cost of FABS instruction. */
794 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
795 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
796
797 k8_memcpy,
798 k8_memset,
799 4, /* scalar_stmt_cost. */
800 2, /* scalar load_cost. */
801 2, /* scalar_store_cost. */
802 5, /* vec_stmt_cost. */
803 0, /* vec_to_scalar_cost. */
804 2, /* scalar_to_vec_cost. */
805 2, /* vec_align_load_cost. */
806 3, /* vec_unalign_load_cost. */
807 3, /* vec_store_cost. */
808 3, /* cond_taken_branch_cost. */
809 2, /* cond_not_taken_branch_cost. */
810 };
811
812 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
813 very small blocks it is better to use loop. For large blocks, libcall can
814 do nontemporary accesses and beat inline considerably. */
815 static stringop_algs amdfam10_memcpy[2] = {
816 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
817 {-1, rep_prefix_4_byte, false}}},
818 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
819 {-1, libcall, false}}}};
820 static stringop_algs amdfam10_memset[2] = {
821 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
822 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
823 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
824 {-1, libcall, false}}}};
825 struct processor_costs amdfam10_cost = {
826 COSTS_N_INSNS (1), /* cost of an add instruction */
827 COSTS_N_INSNS (2), /* cost of a lea instruction */
828 COSTS_N_INSNS (1), /* variable shift costs */
829 COSTS_N_INSNS (1), /* constant shift costs */
830 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
831 COSTS_N_INSNS (4), /* HI */
832 COSTS_N_INSNS (3), /* SI */
833 COSTS_N_INSNS (4), /* DI */
834 COSTS_N_INSNS (5)}, /* other */
835 0, /* cost of multiply per each bit set */
836 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
837 COSTS_N_INSNS (35), /* HI */
838 COSTS_N_INSNS (51), /* SI */
839 COSTS_N_INSNS (83), /* DI */
840 COSTS_N_INSNS (83)}, /* other */
841 COSTS_N_INSNS (1), /* cost of movsx */
842 COSTS_N_INSNS (1), /* cost of movzx */
843 8, /* "large" insn */
844 9, /* MOVE_RATIO */
845 4, /* cost for loading QImode using movzbl */
846 {3, 4, 3}, /* cost of loading integer registers
847 in QImode, HImode and SImode.
848 Relative to reg-reg move (2). */
849 {3, 4, 3}, /* cost of storing integer registers */
850 4, /* cost of reg,reg fld/fst */
851 {4, 4, 12}, /* cost of loading fp registers
852 in SFmode, DFmode and XFmode */
853 {6, 6, 8}, /* cost of storing fp registers
854 in SFmode, DFmode and XFmode */
855 2, /* cost of moving MMX register */
856 {3, 3}, /* cost of loading MMX registers
857 in SImode and DImode */
858 {4, 4}, /* cost of storing MMX registers
859 in SImode and DImode */
860 2, /* cost of moving SSE register */
861 {4, 4, 3}, /* cost of loading SSE registers
862 in SImode, DImode and TImode */
863 {4, 4, 5}, /* cost of storing SSE registers
864 in SImode, DImode and TImode */
865 3, /* MMX or SSE register to integer */
866 /* On K8:
867 MOVD reg64, xmmreg Double FSTORE 4
868 MOVD reg32, xmmreg Double FSTORE 4
869 On AMDFAM10:
870 MOVD reg64, xmmreg Double FADD 3
871 1/1 1/1
872 MOVD reg32, xmmreg Double FADD 3
873 1/1 1/1 */
874 64, /* size of l1 cache. */
875 512, /* size of l2 cache. */
876 64, /* size of prefetch block */
877 /* New AMD processors never drop prefetches; if they cannot be performed
878 immediately, they are queued. We set number of simultaneous prefetches
879 to a large constant to reflect this (it probably is not a good idea not
880 to limit number of prefetches at all, as their execution also takes some
881 time). */
882 100, /* number of parallel prefetches */
883 2, /* Branch cost */
884 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
885 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
886 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
887 COSTS_N_INSNS (2), /* cost of FABS instruction. */
888 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
889 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
890
891 amdfam10_memcpy,
892 amdfam10_memset,
893 4, /* scalar_stmt_cost. */
894 2, /* scalar load_cost. */
895 2, /* scalar_store_cost. */
896 6, /* vec_stmt_cost. */
897 0, /* vec_to_scalar_cost. */
898 2, /* scalar_to_vec_cost. */
899 2, /* vec_align_load_cost. */
900 2, /* vec_unalign_load_cost. */
901 2, /* vec_store_cost. */
902 2, /* cond_taken_branch_cost. */
903 1, /* cond_not_taken_branch_cost. */
904 };
905
906 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
907 very small blocks it is better to use loop. For large blocks, libcall
908 can do nontemporary accesses and beat inline considerably. */
909 static stringop_algs bdver1_memcpy[2] = {
910 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
911 {-1, rep_prefix_4_byte, false}}},
912 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
913 {-1, libcall, false}}}};
914 static stringop_algs bdver1_memset[2] = {
915 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
916 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
917 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
918 {-1, libcall, false}}}};
919
920 const struct processor_costs bdver1_cost = {
921 COSTS_N_INSNS (1), /* cost of an add instruction */
922 COSTS_N_INSNS (1), /* cost of a lea instruction */
923 COSTS_N_INSNS (1), /* variable shift costs */
924 COSTS_N_INSNS (1), /* constant shift costs */
925 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
926 COSTS_N_INSNS (4), /* HI */
927 COSTS_N_INSNS (4), /* SI */
928 COSTS_N_INSNS (6), /* DI */
929 COSTS_N_INSNS (6)}, /* other */
930 0, /* cost of multiply per each bit set */
931 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
932 COSTS_N_INSNS (35), /* HI */
933 COSTS_N_INSNS (51), /* SI */
934 COSTS_N_INSNS (83), /* DI */
935 COSTS_N_INSNS (83)}, /* other */
936 COSTS_N_INSNS (1), /* cost of movsx */
937 COSTS_N_INSNS (1), /* cost of movzx */
938 8, /* "large" insn */
939 9, /* MOVE_RATIO */
940 4, /* cost for loading QImode using movzbl */
941 {5, 5, 4}, /* cost of loading integer registers
942 in QImode, HImode and SImode.
943 Relative to reg-reg move (2). */
944 {4, 4, 4}, /* cost of storing integer registers */
945 2, /* cost of reg,reg fld/fst */
946 {5, 5, 12}, /* cost of loading fp registers
947 in SFmode, DFmode and XFmode */
948 {4, 4, 8}, /* cost of storing fp registers
949 in SFmode, DFmode and XFmode */
950 2, /* cost of moving MMX register */
951 {4, 4}, /* cost of loading MMX registers
952 in SImode and DImode */
953 {4, 4}, /* cost of storing MMX registers
954 in SImode and DImode */
955 2, /* cost of moving SSE register */
956 {4, 4, 4}, /* cost of loading SSE registers
957 in SImode, DImode and TImode */
958 {4, 4, 4}, /* cost of storing SSE registers
959 in SImode, DImode and TImode */
960 2, /* MMX or SSE register to integer */
961 /* On K8:
962 MOVD reg64, xmmreg Double FSTORE 4
963 MOVD reg32, xmmreg Double FSTORE 4
964 On AMDFAM10:
965 MOVD reg64, xmmreg Double FADD 3
966 1/1 1/1
967 MOVD reg32, xmmreg Double FADD 3
968 1/1 1/1 */
969 16, /* size of l1 cache. */
970 2048, /* size of l2 cache. */
971 64, /* size of prefetch block */
972 /* New AMD processors never drop prefetches; if they cannot be performed
973 immediately, they are queued. We set number of simultaneous prefetches
974 to a large constant to reflect this (it probably is not a good idea not
975 to limit number of prefetches at all, as their execution also takes some
976 time). */
977 100, /* number of parallel prefetches */
978 2, /* Branch cost */
979 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
980 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
981 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
982 COSTS_N_INSNS (2), /* cost of FABS instruction. */
983 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
984 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
985
986 bdver1_memcpy,
987 bdver1_memset,
988 6, /* scalar_stmt_cost. */
989 4, /* scalar load_cost. */
990 4, /* scalar_store_cost. */
991 6, /* vec_stmt_cost. */
992 0, /* vec_to_scalar_cost. */
993 2, /* scalar_to_vec_cost. */
994 4, /* vec_align_load_cost. */
995 4, /* vec_unalign_load_cost. */
996 4, /* vec_store_cost. */
997 2, /* cond_taken_branch_cost. */
998 1, /* cond_not_taken_branch_cost. */
999 };
1000
1001 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1002 very small blocks it is better to use loop. For large blocks, libcall
1003 can do nontemporary accesses and beat inline considerably. */
1004
1005 static stringop_algs bdver2_memcpy[2] = {
1006 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1007 {-1, rep_prefix_4_byte, false}}},
1008 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1009 {-1, libcall, false}}}};
1010 static stringop_algs bdver2_memset[2] = {
1011 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1012 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1013 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1014 {-1, libcall, false}}}};
1015
1016 const struct processor_costs bdver2_cost = {
1017 COSTS_N_INSNS (1), /* cost of an add instruction */
1018 COSTS_N_INSNS (1), /* cost of a lea instruction */
1019 COSTS_N_INSNS (1), /* variable shift costs */
1020 COSTS_N_INSNS (1), /* constant shift costs */
1021 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1022 COSTS_N_INSNS (4), /* HI */
1023 COSTS_N_INSNS (4), /* SI */
1024 COSTS_N_INSNS (6), /* DI */
1025 COSTS_N_INSNS (6)}, /* other */
1026 0, /* cost of multiply per each bit set */
1027 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1028 COSTS_N_INSNS (35), /* HI */
1029 COSTS_N_INSNS (51), /* SI */
1030 COSTS_N_INSNS (83), /* DI */
1031 COSTS_N_INSNS (83)}, /* other */
1032 COSTS_N_INSNS (1), /* cost of movsx */
1033 COSTS_N_INSNS (1), /* cost of movzx */
1034 8, /* "large" insn */
1035 9, /* MOVE_RATIO */
1036 4, /* cost for loading QImode using movzbl */
1037 {5, 5, 4}, /* cost of loading integer registers
1038 in QImode, HImode and SImode.
1039 Relative to reg-reg move (2). */
1040 {4, 4, 4}, /* cost of storing integer registers */
1041 2, /* cost of reg,reg fld/fst */
1042 {5, 5, 12}, /* cost of loading fp registers
1043 in SFmode, DFmode and XFmode */
1044 {4, 4, 8}, /* cost of storing fp registers
1045 in SFmode, DFmode and XFmode */
1046 2, /* cost of moving MMX register */
1047 {4, 4}, /* cost of loading MMX registers
1048 in SImode and DImode */
1049 {4, 4}, /* cost of storing MMX registers
1050 in SImode and DImode */
1051 2, /* cost of moving SSE register */
1052 {4, 4, 4}, /* cost of loading SSE registers
1053 in SImode, DImode and TImode */
1054 {4, 4, 4}, /* cost of storing SSE registers
1055 in SImode, DImode and TImode */
1056 2, /* MMX or SSE register to integer */
1057 /* On K8:
1058 MOVD reg64, xmmreg Double FSTORE 4
1059 MOVD reg32, xmmreg Double FSTORE 4
1060 On AMDFAM10:
1061 MOVD reg64, xmmreg Double FADD 3
1062 1/1 1/1
1063 MOVD reg32, xmmreg Double FADD 3
1064 1/1 1/1 */
1065 16, /* size of l1 cache. */
1066 2048, /* size of l2 cache. */
1067 64, /* size of prefetch block */
1068 /* New AMD processors never drop prefetches; if they cannot be performed
1069 immediately, they are queued. We set number of simultaneous prefetches
1070 to a large constant to reflect this (it probably is not a good idea not
1071 to limit number of prefetches at all, as their execution also takes some
1072 time). */
1073 100, /* number of parallel prefetches */
1074 2, /* Branch cost */
1075 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1076 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1077 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1078 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1079 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1080 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1081
1082 bdver2_memcpy,
1083 bdver2_memset,
1084 6, /* scalar_stmt_cost. */
1085 4, /* scalar load_cost. */
1086 4, /* scalar_store_cost. */
1087 6, /* vec_stmt_cost. */
1088 0, /* vec_to_scalar_cost. */
1089 2, /* scalar_to_vec_cost. */
1090 4, /* vec_align_load_cost. */
1091 4, /* vec_unalign_load_cost. */
1092 4, /* vec_store_cost. */
1093 2, /* cond_taken_branch_cost. */
1094 1, /* cond_not_taken_branch_cost. */
1095 };
1096
1097
1098 /* BDVER3 has optimized REP instruction for medium sized blocks, but for
1099 very small blocks it is better to use loop. For large blocks, libcall
1100 can do nontemporary accesses and beat inline considerably. */
1101 static stringop_algs bdver3_memcpy[2] = {
1102 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1103 {-1, rep_prefix_4_byte, false}}},
1104 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1105 {-1, libcall, false}}}};
1106 static stringop_algs bdver3_memset[2] = {
1107 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1108 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1109 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1110 {-1, libcall, false}}}};
1111 struct processor_costs bdver3_cost = {
1112 COSTS_N_INSNS (1), /* cost of an add instruction */
1113 COSTS_N_INSNS (1), /* cost of a lea instruction */
1114 COSTS_N_INSNS (1), /* variable shift costs */
1115 COSTS_N_INSNS (1), /* constant shift costs */
1116 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1117 COSTS_N_INSNS (4), /* HI */
1118 COSTS_N_INSNS (4), /* SI */
1119 COSTS_N_INSNS (6), /* DI */
1120 COSTS_N_INSNS (6)}, /* other */
1121 0, /* cost of multiply per each bit set */
1122 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1123 COSTS_N_INSNS (35), /* HI */
1124 COSTS_N_INSNS (51), /* SI */
1125 COSTS_N_INSNS (83), /* DI */
1126 COSTS_N_INSNS (83)}, /* other */
1127 COSTS_N_INSNS (1), /* cost of movsx */
1128 COSTS_N_INSNS (1), /* cost of movzx */
1129 8, /* "large" insn */
1130 9, /* MOVE_RATIO */
1131 4, /* cost for loading QImode using movzbl */
1132 {5, 5, 4}, /* cost of loading integer registers
1133 in QImode, HImode and SImode.
1134 Relative to reg-reg move (2). */
1135 {4, 4, 4}, /* cost of storing integer registers */
1136 2, /* cost of reg,reg fld/fst */
1137 {5, 5, 12}, /* cost of loading fp registers
1138 in SFmode, DFmode and XFmode */
1139 {4, 4, 8}, /* cost of storing fp registers
1140 in SFmode, DFmode and XFmode */
1141 2, /* cost of moving MMX register */
1142 {4, 4}, /* cost of loading MMX registers
1143 in SImode and DImode */
1144 {4, 4}, /* cost of storing MMX registers
1145 in SImode and DImode */
1146 2, /* cost of moving SSE register */
1147 {4, 4, 4}, /* cost of loading SSE registers
1148 in SImode, DImode and TImode */
1149 {4, 4, 4}, /* cost of storing SSE registers
1150 in SImode, DImode and TImode */
1151 2, /* MMX or SSE register to integer */
1152 16, /* size of l1 cache. */
1153 2048, /* size of l2 cache. */
1154 64, /* size of prefetch block */
1155 /* New AMD processors never drop prefetches; if they cannot be performed
1156 immediately, they are queued. We set number of simultaneous prefetches
1157 to a large constant to reflect this (it probably is not a good idea not
1158 to limit number of prefetches at all, as their execution also takes some
1159 time). */
1160 100, /* number of parallel prefetches */
1161 2, /* Branch cost */
1162 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1163 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1164 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1165 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1166 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1167 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1168
1169 bdver3_memcpy,
1170 bdver3_memset,
1171 6, /* scalar_stmt_cost. */
1172 4, /* scalar load_cost. */
1173 4, /* scalar_store_cost. */
1174 6, /* vec_stmt_cost. */
1175 0, /* vec_to_scalar_cost. */
1176 2, /* scalar_to_vec_cost. */
1177 4, /* vec_align_load_cost. */
1178 4, /* vec_unalign_load_cost. */
1179 4, /* vec_store_cost. */
1180 2, /* cond_taken_branch_cost. */
1181 1, /* cond_not_taken_branch_cost. */
1182 };
1183
1184 /* BDVER4 has optimized REP instruction for medium sized blocks, but for
1185 very small blocks it is better to use loop. For large blocks, libcall
1186 can do nontemporary accesses and beat inline considerably. */
1187 static stringop_algs bdver4_memcpy[2] = {
1188 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1189 {-1, rep_prefix_4_byte, false}}},
1190 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1191 {-1, libcall, false}}}};
1192 static stringop_algs bdver4_memset[2] = {
1193 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1194 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1195 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1196 {-1, libcall, false}}}};
1197 struct processor_costs bdver4_cost = {
1198 COSTS_N_INSNS (1), /* cost of an add instruction */
1199 COSTS_N_INSNS (1), /* cost of a lea instruction */
1200 COSTS_N_INSNS (1), /* variable shift costs */
1201 COSTS_N_INSNS (1), /* constant shift costs */
1202 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1203 COSTS_N_INSNS (4), /* HI */
1204 COSTS_N_INSNS (4), /* SI */
1205 COSTS_N_INSNS (6), /* DI */
1206 COSTS_N_INSNS (6)}, /* other */
1207 0, /* cost of multiply per each bit set */
1208 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1209 COSTS_N_INSNS (35), /* HI */
1210 COSTS_N_INSNS (51), /* SI */
1211 COSTS_N_INSNS (83), /* DI */
1212 COSTS_N_INSNS (83)}, /* other */
1213 COSTS_N_INSNS (1), /* cost of movsx */
1214 COSTS_N_INSNS (1), /* cost of movzx */
1215 8, /* "large" insn */
1216 9, /* MOVE_RATIO */
1217 4, /* cost for loading QImode using movzbl */
1218 {5, 5, 4}, /* cost of loading integer registers
1219 in QImode, HImode and SImode.
1220 Relative to reg-reg move (2). */
1221 {4, 4, 4}, /* cost of storing integer registers */
1222 2, /* cost of reg,reg fld/fst */
1223 {5, 5, 12}, /* cost of loading fp registers
1224 in SFmode, DFmode and XFmode */
1225 {4, 4, 8}, /* cost of storing fp registers
1226 in SFmode, DFmode and XFmode */
1227 2, /* cost of moving MMX register */
1228 {4, 4}, /* cost of loading MMX registers
1229 in SImode and DImode */
1230 {4, 4}, /* cost of storing MMX registers
1231 in SImode and DImode */
1232 2, /* cost of moving SSE register */
1233 {4, 4, 4}, /* cost of loading SSE registers
1234 in SImode, DImode and TImode */
1235 {4, 4, 4}, /* cost of storing SSE registers
1236 in SImode, DImode and TImode */
1237 2, /* MMX or SSE register to integer */
1238 16, /* size of l1 cache. */
1239 2048, /* size of l2 cache. */
1240 64, /* size of prefetch block */
1241 /* New AMD processors never drop prefetches; if they cannot be performed
1242 immediately, they are queued. We set number of simultaneous prefetches
1243 to a large constant to reflect this (it probably is not a good idea not
1244 to limit number of prefetches at all, as their execution also takes some
1245 time). */
1246 100, /* number of parallel prefetches */
1247 2, /* Branch cost */
1248 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1249 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1250 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1251 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1252 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1253 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1254
1255 bdver4_memcpy,
1256 bdver4_memset,
1257 6, /* scalar_stmt_cost. */
1258 4, /* scalar load_cost. */
1259 4, /* scalar_store_cost. */
1260 6, /* vec_stmt_cost. */
1261 0, /* vec_to_scalar_cost. */
1262 2, /* scalar_to_vec_cost. */
1263 4, /* vec_align_load_cost. */
1264 4, /* vec_unalign_load_cost. */
1265 4, /* vec_store_cost. */
1266 2, /* cond_taken_branch_cost. */
1267 1, /* cond_not_taken_branch_cost. */
1268 };
1269
1270 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1271 very small blocks it is better to use loop. For large blocks, libcall can
1272 do nontemporary accesses and beat inline considerably. */
1273 static stringop_algs btver1_memcpy[2] = {
1274 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1275 {-1, rep_prefix_4_byte, false}}},
1276 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1277 {-1, libcall, false}}}};
1278 static stringop_algs btver1_memset[2] = {
1279 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1280 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1281 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1282 {-1, libcall, false}}}};
1283 const struct processor_costs btver1_cost = {
1284 COSTS_N_INSNS (1), /* cost of an add instruction */
1285 COSTS_N_INSNS (2), /* cost of a lea instruction */
1286 COSTS_N_INSNS (1), /* variable shift costs */
1287 COSTS_N_INSNS (1), /* constant shift costs */
1288 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1289 COSTS_N_INSNS (4), /* HI */
1290 COSTS_N_INSNS (3), /* SI */
1291 COSTS_N_INSNS (4), /* DI */
1292 COSTS_N_INSNS (5)}, /* other */
1293 0, /* cost of multiply per each bit set */
1294 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1295 COSTS_N_INSNS (35), /* HI */
1296 COSTS_N_INSNS (51), /* SI */
1297 COSTS_N_INSNS (83), /* DI */
1298 COSTS_N_INSNS (83)}, /* other */
1299 COSTS_N_INSNS (1), /* cost of movsx */
1300 COSTS_N_INSNS (1), /* cost of movzx */
1301 8, /* "large" insn */
1302 9, /* MOVE_RATIO */
1303 4, /* cost for loading QImode using movzbl */
1304 {3, 4, 3}, /* cost of loading integer registers
1305 in QImode, HImode and SImode.
1306 Relative to reg-reg move (2). */
1307 {3, 4, 3}, /* cost of storing integer registers */
1308 4, /* cost of reg,reg fld/fst */
1309 {4, 4, 12}, /* cost of loading fp registers
1310 in SFmode, DFmode and XFmode */
1311 {6, 6, 8}, /* cost of storing fp registers
1312 in SFmode, DFmode and XFmode */
1313 2, /* cost of moving MMX register */
1314 {3, 3}, /* cost of loading MMX registers
1315 in SImode and DImode */
1316 {4, 4}, /* cost of storing MMX registers
1317 in SImode and DImode */
1318 2, /* cost of moving SSE register */
1319 {4, 4, 3}, /* cost of loading SSE registers
1320 in SImode, DImode and TImode */
1321 {4, 4, 5}, /* cost of storing SSE registers
1322 in SImode, DImode and TImode */
1323 3, /* MMX or SSE register to integer */
1324 /* On K8:
1325 MOVD reg64, xmmreg Double FSTORE 4
1326 MOVD reg32, xmmreg Double FSTORE 4
1327 On AMDFAM10:
1328 MOVD reg64, xmmreg Double FADD 3
1329 1/1 1/1
1330 MOVD reg32, xmmreg Double FADD 3
1331 1/1 1/1 */
1332 32, /* size of l1 cache. */
1333 512, /* size of l2 cache. */
1334 64, /* size of prefetch block */
1335 100, /* number of parallel prefetches */
1336 2, /* Branch cost */
1337 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1338 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1339 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1340 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1341 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1342 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1343
1344 btver1_memcpy,
1345 btver1_memset,
1346 4, /* scalar_stmt_cost. */
1347 2, /* scalar load_cost. */
1348 2, /* scalar_store_cost. */
1349 6, /* vec_stmt_cost. */
1350 0, /* vec_to_scalar_cost. */
1351 2, /* scalar_to_vec_cost. */
1352 2, /* vec_align_load_cost. */
1353 2, /* vec_unalign_load_cost. */
1354 2, /* vec_store_cost. */
1355 2, /* cond_taken_branch_cost. */
1356 1, /* cond_not_taken_branch_cost. */
1357 };
1358
1359 static stringop_algs btver2_memcpy[2] = {
1360 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1361 {-1, rep_prefix_4_byte, false}}},
1362 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1363 {-1, libcall, false}}}};
1364 static stringop_algs btver2_memset[2] = {
1365 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1366 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1367 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1368 {-1, libcall, false}}}};
1369 const struct processor_costs btver2_cost = {
1370 COSTS_N_INSNS (1), /* cost of an add instruction */
1371 COSTS_N_INSNS (2), /* cost of a lea instruction */
1372 COSTS_N_INSNS (1), /* variable shift costs */
1373 COSTS_N_INSNS (1), /* constant shift costs */
1374 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1375 COSTS_N_INSNS (4), /* HI */
1376 COSTS_N_INSNS (3), /* SI */
1377 COSTS_N_INSNS (4), /* DI */
1378 COSTS_N_INSNS (5)}, /* other */
1379 0, /* cost of multiply per each bit set */
1380 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1381 COSTS_N_INSNS (35), /* HI */
1382 COSTS_N_INSNS (51), /* SI */
1383 COSTS_N_INSNS (83), /* DI */
1384 COSTS_N_INSNS (83)}, /* other */
1385 COSTS_N_INSNS (1), /* cost of movsx */
1386 COSTS_N_INSNS (1), /* cost of movzx */
1387 8, /* "large" insn */
1388 9, /* MOVE_RATIO */
1389 4, /* cost for loading QImode using movzbl */
1390 {3, 4, 3}, /* cost of loading integer registers
1391 in QImode, HImode and SImode.
1392 Relative to reg-reg move (2). */
1393 {3, 4, 3}, /* cost of storing integer registers */
1394 4, /* cost of reg,reg fld/fst */
1395 {4, 4, 12}, /* cost of loading fp registers
1396 in SFmode, DFmode and XFmode */
1397 {6, 6, 8}, /* cost of storing fp registers
1398 in SFmode, DFmode and XFmode */
1399 2, /* cost of moving MMX register */
1400 {3, 3}, /* cost of loading MMX registers
1401 in SImode and DImode */
1402 {4, 4}, /* cost of storing MMX registers
1403 in SImode and DImode */
1404 2, /* cost of moving SSE register */
1405 {4, 4, 3}, /* cost of loading SSE registers
1406 in SImode, DImode and TImode */
1407 {4, 4, 5}, /* cost of storing SSE registers
1408 in SImode, DImode and TImode */
1409 3, /* MMX or SSE register to integer */
1410 /* On K8:
1411 MOVD reg64, xmmreg Double FSTORE 4
1412 MOVD reg32, xmmreg Double FSTORE 4
1413 On AMDFAM10:
1414 MOVD reg64, xmmreg Double FADD 3
1415 1/1 1/1
1416 MOVD reg32, xmmreg Double FADD 3
1417 1/1 1/1 */
1418 32, /* size of l1 cache. */
1419 2048, /* size of l2 cache. */
1420 64, /* size of prefetch block */
1421 100, /* number of parallel prefetches */
1422 2, /* Branch cost */
1423 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1424 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1425 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1426 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1427 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1428 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1429 btver2_memcpy,
1430 btver2_memset,
1431 4, /* scalar_stmt_cost. */
1432 2, /* scalar load_cost. */
1433 2, /* scalar_store_cost. */
1434 6, /* vec_stmt_cost. */
1435 0, /* vec_to_scalar_cost. */
1436 2, /* scalar_to_vec_cost. */
1437 2, /* vec_align_load_cost. */
1438 2, /* vec_unalign_load_cost. */
1439 2, /* vec_store_cost. */
1440 2, /* cond_taken_branch_cost. */
1441 1, /* cond_not_taken_branch_cost. */
1442 };
1443
1444 static stringop_algs pentium4_memcpy[2] = {
1445 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1446 DUMMY_STRINGOP_ALGS};
1447 static stringop_algs pentium4_memset[2] = {
1448 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1449 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1450 DUMMY_STRINGOP_ALGS};
1451
1452 static const
1453 struct processor_costs pentium4_cost = {
1454 COSTS_N_INSNS (1), /* cost of an add instruction */
1455 COSTS_N_INSNS (3), /* cost of a lea instruction */
1456 COSTS_N_INSNS (4), /* variable shift costs */
1457 COSTS_N_INSNS (4), /* constant shift costs */
1458 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1459 COSTS_N_INSNS (15), /* HI */
1460 COSTS_N_INSNS (15), /* SI */
1461 COSTS_N_INSNS (15), /* DI */
1462 COSTS_N_INSNS (15)}, /* other */
1463 0, /* cost of multiply per each bit set */
1464 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1465 COSTS_N_INSNS (56), /* HI */
1466 COSTS_N_INSNS (56), /* SI */
1467 COSTS_N_INSNS (56), /* DI */
1468 COSTS_N_INSNS (56)}, /* other */
1469 COSTS_N_INSNS (1), /* cost of movsx */
1470 COSTS_N_INSNS (1), /* cost of movzx */
1471 16, /* "large" insn */
1472 6, /* MOVE_RATIO */
1473 2, /* cost for loading QImode using movzbl */
1474 {4, 5, 4}, /* cost of loading integer registers
1475 in QImode, HImode and SImode.
1476 Relative to reg-reg move (2). */
1477 {2, 3, 2}, /* cost of storing integer registers */
1478 2, /* cost of reg,reg fld/fst */
1479 {2, 2, 6}, /* cost of loading fp registers
1480 in SFmode, DFmode and XFmode */
1481 {4, 4, 6}, /* cost of storing fp registers
1482 in SFmode, DFmode and XFmode */
1483 2, /* cost of moving MMX register */
1484 {2, 2}, /* cost of loading MMX registers
1485 in SImode and DImode */
1486 {2, 2}, /* cost of storing MMX registers
1487 in SImode and DImode */
1488 12, /* cost of moving SSE register */
1489 {12, 12, 12}, /* cost of loading SSE registers
1490 in SImode, DImode and TImode */
1491 {2, 2, 8}, /* cost of storing SSE registers
1492 in SImode, DImode and TImode */
1493 10, /* MMX or SSE register to integer */
1494 8, /* size of l1 cache. */
1495 256, /* size of l2 cache. */
1496 64, /* size of prefetch block */
1497 6, /* number of parallel prefetches */
1498 2, /* Branch cost */
1499 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1500 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1501 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1502 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1503 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1504 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1505 pentium4_memcpy,
1506 pentium4_memset,
1507 1, /* scalar_stmt_cost. */
1508 1, /* scalar load_cost. */
1509 1, /* scalar_store_cost. */
1510 1, /* vec_stmt_cost. */
1511 1, /* vec_to_scalar_cost. */
1512 1, /* scalar_to_vec_cost. */
1513 1, /* vec_align_load_cost. */
1514 2, /* vec_unalign_load_cost. */
1515 1, /* vec_store_cost. */
1516 3, /* cond_taken_branch_cost. */
1517 1, /* cond_not_taken_branch_cost. */
1518 };
1519
1520 static stringop_algs nocona_memcpy[2] = {
1521 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1522 {libcall, {{32, loop, false}, {20000, rep_prefix_8_byte, false},
1523 {100000, unrolled_loop, false}, {-1, libcall, false}}}};
1524
1525 static stringop_algs nocona_memset[2] = {
1526 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1527 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1528 {libcall, {{24, loop, false}, {64, unrolled_loop, false},
1529 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1530
1531 static const
1532 struct processor_costs nocona_cost = {
1533 COSTS_N_INSNS (1), /* cost of an add instruction */
1534 COSTS_N_INSNS (1), /* cost of a lea instruction */
1535 COSTS_N_INSNS (1), /* variable shift costs */
1536 COSTS_N_INSNS (1), /* constant shift costs */
1537 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1538 COSTS_N_INSNS (10), /* HI */
1539 COSTS_N_INSNS (10), /* SI */
1540 COSTS_N_INSNS (10), /* DI */
1541 COSTS_N_INSNS (10)}, /* other */
1542 0, /* cost of multiply per each bit set */
1543 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1544 COSTS_N_INSNS (66), /* HI */
1545 COSTS_N_INSNS (66), /* SI */
1546 COSTS_N_INSNS (66), /* DI */
1547 COSTS_N_INSNS (66)}, /* other */
1548 COSTS_N_INSNS (1), /* cost of movsx */
1549 COSTS_N_INSNS (1), /* cost of movzx */
1550 16, /* "large" insn */
1551 17, /* MOVE_RATIO */
1552 4, /* cost for loading QImode using movzbl */
1553 {4, 4, 4}, /* cost of loading integer registers
1554 in QImode, HImode and SImode.
1555 Relative to reg-reg move (2). */
1556 {4, 4, 4}, /* cost of storing integer registers */
1557 3, /* cost of reg,reg fld/fst */
1558 {12, 12, 12}, /* cost of loading fp registers
1559 in SFmode, DFmode and XFmode */
1560 {4, 4, 4}, /* cost of storing fp registers
1561 in SFmode, DFmode and XFmode */
1562 6, /* cost of moving MMX register */
1563 {12, 12}, /* cost of loading MMX registers
1564 in SImode and DImode */
1565 {12, 12}, /* cost of storing MMX registers
1566 in SImode and DImode */
1567 6, /* cost of moving SSE register */
1568 {12, 12, 12}, /* cost of loading SSE registers
1569 in SImode, DImode and TImode */
1570 {12, 12, 12}, /* cost of storing SSE registers
1571 in SImode, DImode and TImode */
1572 8, /* MMX or SSE register to integer */
1573 8, /* size of l1 cache. */
1574 1024, /* size of l2 cache. */
1575 64, /* size of prefetch block */
1576 8, /* number of parallel prefetches */
1577 1, /* Branch cost */
1578 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1579 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1580 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1581 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1582 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1583 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1584 nocona_memcpy,
1585 nocona_memset,
1586 1, /* scalar_stmt_cost. */
1587 1, /* scalar load_cost. */
1588 1, /* scalar_store_cost. */
1589 1, /* vec_stmt_cost. */
1590 1, /* vec_to_scalar_cost. */
1591 1, /* scalar_to_vec_cost. */
1592 1, /* vec_align_load_cost. */
1593 2, /* vec_unalign_load_cost. */
1594 1, /* vec_store_cost. */
1595 3, /* cond_taken_branch_cost. */
1596 1, /* cond_not_taken_branch_cost. */
1597 };
1598
1599 static stringop_algs atom_memcpy[2] = {
1600 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1601 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1602 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1603 static stringop_algs atom_memset[2] = {
1604 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1605 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1606 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1607 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1608 static const
1609 struct processor_costs atom_cost = {
1610 COSTS_N_INSNS (1), /* cost of an add instruction */
1611 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1612 COSTS_N_INSNS (1), /* variable shift costs */
1613 COSTS_N_INSNS (1), /* constant shift costs */
1614 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1615 COSTS_N_INSNS (4), /* HI */
1616 COSTS_N_INSNS (3), /* SI */
1617 COSTS_N_INSNS (4), /* DI */
1618 COSTS_N_INSNS (2)}, /* other */
1619 0, /* cost of multiply per each bit set */
1620 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1621 COSTS_N_INSNS (26), /* HI */
1622 COSTS_N_INSNS (42), /* SI */
1623 COSTS_N_INSNS (74), /* DI */
1624 COSTS_N_INSNS (74)}, /* other */
1625 COSTS_N_INSNS (1), /* cost of movsx */
1626 COSTS_N_INSNS (1), /* cost of movzx */
1627 8, /* "large" insn */
1628 17, /* MOVE_RATIO */
1629 4, /* cost for loading QImode using movzbl */
1630 {4, 4, 4}, /* cost of loading integer registers
1631 in QImode, HImode and SImode.
1632 Relative to reg-reg move (2). */
1633 {4, 4, 4}, /* cost of storing integer registers */
1634 4, /* cost of reg,reg fld/fst */
1635 {12, 12, 12}, /* cost of loading fp registers
1636 in SFmode, DFmode and XFmode */
1637 {6, 6, 8}, /* cost of storing fp registers
1638 in SFmode, DFmode and XFmode */
1639 2, /* cost of moving MMX register */
1640 {8, 8}, /* cost of loading MMX registers
1641 in SImode and DImode */
1642 {8, 8}, /* cost of storing MMX registers
1643 in SImode and DImode */
1644 2, /* cost of moving SSE register */
1645 {8, 8, 8}, /* cost of loading SSE registers
1646 in SImode, DImode and TImode */
1647 {8, 8, 8}, /* cost of storing SSE registers
1648 in SImode, DImode and TImode */
1649 5, /* MMX or SSE register to integer */
1650 32, /* size of l1 cache. */
1651 256, /* size of l2 cache. */
1652 64, /* size of prefetch block */
1653 6, /* number of parallel prefetches */
1654 3, /* Branch cost */
1655 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1656 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1657 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1658 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1659 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1660 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1661 atom_memcpy,
1662 atom_memset,
1663 1, /* scalar_stmt_cost. */
1664 1, /* scalar load_cost. */
1665 1, /* scalar_store_cost. */
1666 1, /* vec_stmt_cost. */
1667 1, /* vec_to_scalar_cost. */
1668 1, /* scalar_to_vec_cost. */
1669 1, /* vec_align_load_cost. */
1670 2, /* vec_unalign_load_cost. */
1671 1, /* vec_store_cost. */
1672 3, /* cond_taken_branch_cost. */
1673 1, /* cond_not_taken_branch_cost. */
1674 };
1675
1676 static stringop_algs slm_memcpy[2] = {
1677 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1678 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1679 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1680 static stringop_algs slm_memset[2] = {
1681 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1682 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1683 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1684 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1685 static const
1686 struct processor_costs slm_cost = {
1687 COSTS_N_INSNS (1), /* cost of an add instruction */
1688 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1689 COSTS_N_INSNS (1), /* variable shift costs */
1690 COSTS_N_INSNS (1), /* constant shift costs */
1691 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1692 COSTS_N_INSNS (3), /* HI */
1693 COSTS_N_INSNS (3), /* SI */
1694 COSTS_N_INSNS (4), /* DI */
1695 COSTS_N_INSNS (2)}, /* other */
1696 0, /* cost of multiply per each bit set */
1697 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1698 COSTS_N_INSNS (26), /* HI */
1699 COSTS_N_INSNS (42), /* SI */
1700 COSTS_N_INSNS (74), /* DI */
1701 COSTS_N_INSNS (74)}, /* other */
1702 COSTS_N_INSNS (1), /* cost of movsx */
1703 COSTS_N_INSNS (1), /* cost of movzx */
1704 8, /* "large" insn */
1705 17, /* MOVE_RATIO */
1706 4, /* cost for loading QImode using movzbl */
1707 {4, 4, 4}, /* cost of loading integer registers
1708 in QImode, HImode and SImode.
1709 Relative to reg-reg move (2). */
1710 {4, 4, 4}, /* cost of storing integer registers */
1711 4, /* cost of reg,reg fld/fst */
1712 {12, 12, 12}, /* cost of loading fp registers
1713 in SFmode, DFmode and XFmode */
1714 {6, 6, 8}, /* cost of storing fp registers
1715 in SFmode, DFmode and XFmode */
1716 2, /* cost of moving MMX register */
1717 {8, 8}, /* cost of loading MMX registers
1718 in SImode and DImode */
1719 {8, 8}, /* cost of storing MMX registers
1720 in SImode and DImode */
1721 2, /* cost of moving SSE register */
1722 {8, 8, 8}, /* cost of loading SSE registers
1723 in SImode, DImode and TImode */
1724 {8, 8, 8}, /* cost of storing SSE registers
1725 in SImode, DImode and TImode */
1726 5, /* MMX or SSE register to integer */
1727 32, /* size of l1 cache. */
1728 256, /* size of l2 cache. */
1729 64, /* size of prefetch block */
1730 6, /* number of parallel prefetches */
1731 3, /* Branch cost */
1732 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1733 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1734 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1735 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1736 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1737 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1738 slm_memcpy,
1739 slm_memset,
1740 1, /* scalar_stmt_cost. */
1741 1, /* scalar load_cost. */
1742 1, /* scalar_store_cost. */
1743 1, /* vec_stmt_cost. */
1744 4, /* vec_to_scalar_cost. */
1745 1, /* scalar_to_vec_cost. */
1746 1, /* vec_align_load_cost. */
1747 2, /* vec_unalign_load_cost. */
1748 1, /* vec_store_cost. */
1749 3, /* cond_taken_branch_cost. */
1750 1, /* cond_not_taken_branch_cost. */
1751 };
1752
1753 static stringop_algs intel_memcpy[2] = {
1754 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1755 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1756 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1757 static stringop_algs intel_memset[2] = {
1758 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1759 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1760 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1761 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1762 static const
1763 struct processor_costs intel_cost = {
1764 COSTS_N_INSNS (1), /* cost of an add instruction */
1765 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1766 COSTS_N_INSNS (1), /* variable shift costs */
1767 COSTS_N_INSNS (1), /* constant shift costs */
1768 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1769 COSTS_N_INSNS (3), /* HI */
1770 COSTS_N_INSNS (3), /* SI */
1771 COSTS_N_INSNS (4), /* DI */
1772 COSTS_N_INSNS (2)}, /* other */
1773 0, /* cost of multiply per each bit set */
1774 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1775 COSTS_N_INSNS (26), /* HI */
1776 COSTS_N_INSNS (42), /* SI */
1777 COSTS_N_INSNS (74), /* DI */
1778 COSTS_N_INSNS (74)}, /* other */
1779 COSTS_N_INSNS (1), /* cost of movsx */
1780 COSTS_N_INSNS (1), /* cost of movzx */
1781 8, /* "large" insn */
1782 17, /* MOVE_RATIO */
1783 4, /* cost for loading QImode using movzbl */
1784 {4, 4, 4}, /* cost of loading integer registers
1785 in QImode, HImode and SImode.
1786 Relative to reg-reg move (2). */
1787 {4, 4, 4}, /* cost of storing integer registers */
1788 4, /* cost of reg,reg fld/fst */
1789 {12, 12, 12}, /* cost of loading fp registers
1790 in SFmode, DFmode and XFmode */
1791 {6, 6, 8}, /* cost of storing fp registers
1792 in SFmode, DFmode and XFmode */
1793 2, /* cost of moving MMX register */
1794 {8, 8}, /* cost of loading MMX registers
1795 in SImode and DImode */
1796 {8, 8}, /* cost of storing MMX registers
1797 in SImode and DImode */
1798 2, /* cost of moving SSE register */
1799 {8, 8, 8}, /* cost of loading SSE registers
1800 in SImode, DImode and TImode */
1801 {8, 8, 8}, /* cost of storing SSE registers
1802 in SImode, DImode and TImode */
1803 5, /* MMX or SSE register to integer */
1804 32, /* size of l1 cache. */
1805 256, /* size of l2 cache. */
1806 64, /* size of prefetch block */
1807 6, /* number of parallel prefetches */
1808 3, /* Branch cost */
1809 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1810 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1811 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1812 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1813 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1814 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1815 intel_memcpy,
1816 intel_memset,
1817 1, /* scalar_stmt_cost. */
1818 1, /* scalar load_cost. */
1819 1, /* scalar_store_cost. */
1820 1, /* vec_stmt_cost. */
1821 4, /* vec_to_scalar_cost. */
1822 1, /* scalar_to_vec_cost. */
1823 1, /* vec_align_load_cost. */
1824 2, /* vec_unalign_load_cost. */
1825 1, /* vec_store_cost. */
1826 3, /* cond_taken_branch_cost. */
1827 1, /* cond_not_taken_branch_cost. */
1828 };
1829
1830 /* Generic should produce code tuned for Core-i7 (and newer chips)
1831 and btver1 (and newer chips). */
1832
1833 static stringop_algs generic_memcpy[2] = {
1834 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1835 {-1, libcall, false}}},
1836 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1837 {-1, libcall, false}}}};
1838 static stringop_algs generic_memset[2] = {
1839 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1840 {-1, libcall, false}}},
1841 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1842 {-1, libcall, false}}}};
1843 static const
1844 struct processor_costs generic_cost = {
1845 COSTS_N_INSNS (1), /* cost of an add instruction */
1846 /* On all chips taken into consideration lea is 2 cycles and more. With
1847 this cost however our current implementation of synth_mult results in
1848 use of unnecessary temporary registers causing regression on several
1849 SPECfp benchmarks. */
1850 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1851 COSTS_N_INSNS (1), /* variable shift costs */
1852 COSTS_N_INSNS (1), /* constant shift costs */
1853 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1854 COSTS_N_INSNS (4), /* HI */
1855 COSTS_N_INSNS (3), /* SI */
1856 COSTS_N_INSNS (4), /* DI */
1857 COSTS_N_INSNS (2)}, /* other */
1858 0, /* cost of multiply per each bit set */
1859 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1860 COSTS_N_INSNS (26), /* HI */
1861 COSTS_N_INSNS (42), /* SI */
1862 COSTS_N_INSNS (74), /* DI */
1863 COSTS_N_INSNS (74)}, /* other */
1864 COSTS_N_INSNS (1), /* cost of movsx */
1865 COSTS_N_INSNS (1), /* cost of movzx */
1866 8, /* "large" insn */
1867 17, /* MOVE_RATIO */
1868 4, /* cost for loading QImode using movzbl */
1869 {4, 4, 4}, /* cost of loading integer registers
1870 in QImode, HImode and SImode.
1871 Relative to reg-reg move (2). */
1872 {4, 4, 4}, /* cost of storing integer registers */
1873 4, /* cost of reg,reg fld/fst */
1874 {12, 12, 12}, /* cost of loading fp registers
1875 in SFmode, DFmode and XFmode */
1876 {6, 6, 8}, /* cost of storing fp registers
1877 in SFmode, DFmode and XFmode */
1878 2, /* cost of moving MMX register */
1879 {8, 8}, /* cost of loading MMX registers
1880 in SImode and DImode */
1881 {8, 8}, /* cost of storing MMX registers
1882 in SImode and DImode */
1883 2, /* cost of moving SSE register */
1884 {8, 8, 8}, /* cost of loading SSE registers
1885 in SImode, DImode and TImode */
1886 {8, 8, 8}, /* cost of storing SSE registers
1887 in SImode, DImode and TImode */
1888 5, /* MMX or SSE register to integer */
1889 32, /* size of l1 cache. */
1890 512, /* size of l2 cache. */
1891 64, /* size of prefetch block */
1892 6, /* number of parallel prefetches */
1893 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1894 value is increased to perhaps more appropriate value of 5. */
1895 3, /* Branch cost */
1896 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1897 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1898 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1899 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1900 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1901 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1902 generic_memcpy,
1903 generic_memset,
1904 1, /* scalar_stmt_cost. */
1905 1, /* scalar load_cost. */
1906 1, /* scalar_store_cost. */
1907 1, /* vec_stmt_cost. */
1908 1, /* vec_to_scalar_cost. */
1909 1, /* scalar_to_vec_cost. */
1910 1, /* vec_align_load_cost. */
1911 2, /* vec_unalign_load_cost. */
1912 1, /* vec_store_cost. */
1913 3, /* cond_taken_branch_cost. */
1914 1, /* cond_not_taken_branch_cost. */
1915 };
1916
1917 /* core_cost should produce code tuned for Core familly of CPUs. */
1918 static stringop_algs core_memcpy[2] = {
1919 {libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}},
1920 {libcall, {{24, loop, true}, {128, rep_prefix_8_byte, true},
1921 {-1, libcall, false}}}};
1922 static stringop_algs core_memset[2] = {
1923 {libcall, {{6, loop_1_byte, true},
1924 {24, loop, true},
1925 {8192, rep_prefix_4_byte, true},
1926 {-1, libcall, false}}},
1927 {libcall, {{24, loop, true}, {512, rep_prefix_8_byte, true},
1928 {-1, libcall, false}}}};
1929
1930 static const
1931 struct processor_costs core_cost = {
1932 COSTS_N_INSNS (1), /* cost of an add instruction */
1933 /* On all chips taken into consideration lea is 2 cycles and more. With
1934 this cost however our current implementation of synth_mult results in
1935 use of unnecessary temporary registers causing regression on several
1936 SPECfp benchmarks. */
1937 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1938 COSTS_N_INSNS (1), /* variable shift costs */
1939 COSTS_N_INSNS (1), /* constant shift costs */
1940 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1941 COSTS_N_INSNS (4), /* HI */
1942 COSTS_N_INSNS (3), /* SI */
1943 COSTS_N_INSNS (4), /* DI */
1944 COSTS_N_INSNS (2)}, /* other */
1945 0, /* cost of multiply per each bit set */
1946 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1947 COSTS_N_INSNS (26), /* HI */
1948 COSTS_N_INSNS (42), /* SI */
1949 COSTS_N_INSNS (74), /* DI */
1950 COSTS_N_INSNS (74)}, /* other */
1951 COSTS_N_INSNS (1), /* cost of movsx */
1952 COSTS_N_INSNS (1), /* cost of movzx */
1953 8, /* "large" insn */
1954 17, /* MOVE_RATIO */
1955 4, /* cost for loading QImode using movzbl */
1956 {4, 4, 4}, /* cost of loading integer registers
1957 in QImode, HImode and SImode.
1958 Relative to reg-reg move (2). */
1959 {4, 4, 4}, /* cost of storing integer registers */
1960 4, /* cost of reg,reg fld/fst */
1961 {12, 12, 12}, /* cost of loading fp registers
1962 in SFmode, DFmode and XFmode */
1963 {6, 6, 8}, /* cost of storing fp registers
1964 in SFmode, DFmode and XFmode */
1965 2, /* cost of moving MMX register */
1966 {8, 8}, /* cost of loading MMX registers
1967 in SImode and DImode */
1968 {8, 8}, /* cost of storing MMX registers
1969 in SImode and DImode */
1970 2, /* cost of moving SSE register */
1971 {8, 8, 8}, /* cost of loading SSE registers
1972 in SImode, DImode and TImode */
1973 {8, 8, 8}, /* cost of storing SSE registers
1974 in SImode, DImode and TImode */
1975 5, /* MMX or SSE register to integer */
1976 64, /* size of l1 cache. */
1977 512, /* size of l2 cache. */
1978 64, /* size of prefetch block */
1979 6, /* number of parallel prefetches */
1980 /* FIXME perhaps more appropriate value is 5. */
1981 3, /* Branch cost */
1982 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1983 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1984 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1985 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1986 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1987 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1988 core_memcpy,
1989 core_memset,
1990 1, /* scalar_stmt_cost. */
1991 1, /* scalar load_cost. */
1992 1, /* scalar_store_cost. */
1993 1, /* vec_stmt_cost. */
1994 1, /* vec_to_scalar_cost. */
1995 1, /* scalar_to_vec_cost. */
1996 1, /* vec_align_load_cost. */
1997 2, /* vec_unalign_load_cost. */
1998 1, /* vec_store_cost. */
1999 3, /* cond_taken_branch_cost. */
2000 1, /* cond_not_taken_branch_cost. */
2001 };
2002
2003
2004 /* Set by -mtune. */
2005 const struct processor_costs *ix86_tune_cost = &pentium_cost;
2006
2007 /* Set by -mtune or -Os. */
2008 const struct processor_costs *ix86_cost = &pentium_cost;
2009
2010 /* Processor feature/optimization bitmasks. */
2011 #define m_386 (1<<PROCESSOR_I386)
2012 #define m_486 (1<<PROCESSOR_I486)
2013 #define m_PENT (1<<PROCESSOR_PENTIUM)
2014 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
2015 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
2016 #define m_NOCONA (1<<PROCESSOR_NOCONA)
2017 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
2018 #define m_CORE2 (1<<PROCESSOR_CORE2)
2019 #define m_NEHALEM (1<<PROCESSOR_NEHALEM)
2020 #define m_SANDYBRIDGE (1<<PROCESSOR_SANDYBRIDGE)
2021 #define m_HASWELL (1<<PROCESSOR_HASWELL)
2022 #define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL)
2023 #define m_BONNELL (1<<PROCESSOR_BONNELL)
2024 #define m_SILVERMONT (1<<PROCESSOR_SILVERMONT)
2025 #define m_INTEL (1<<PROCESSOR_INTEL)
2026
2027 #define m_GEODE (1<<PROCESSOR_GEODE)
2028 #define m_K6 (1<<PROCESSOR_K6)
2029 #define m_K6_GEODE (m_K6 | m_GEODE)
2030 #define m_K8 (1<<PROCESSOR_K8)
2031 #define m_ATHLON (1<<PROCESSOR_ATHLON)
2032 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
2033 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
2034 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
2035 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
2036 #define m_BDVER3 (1<<PROCESSOR_BDVER3)
2037 #define m_BDVER4 (1<<PROCESSOR_BDVER4)
2038 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
2039 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
2040 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
2041 #define m_BTVER (m_BTVER1 | m_BTVER2)
2042 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER)
2043
2044 #define m_GENERIC (1<<PROCESSOR_GENERIC)
2045
2046 const char* ix86_tune_feature_names[X86_TUNE_LAST] = {
2047 #undef DEF_TUNE
2048 #define DEF_TUNE(tune, name, selector) name,
2049 #include "x86-tune.def"
2050 #undef DEF_TUNE
2051 };
2052
2053 /* Feature tests against the various tunings. */
2054 unsigned char ix86_tune_features[X86_TUNE_LAST];
2055
2056 /* Feature tests against the various tunings used to create ix86_tune_features
2057 based on the processor mask. */
2058 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
2059 #undef DEF_TUNE
2060 #define DEF_TUNE(tune, name, selector) selector,
2061 #include "x86-tune.def"
2062 #undef DEF_TUNE
2063 };
2064
2065 /* Feature tests against the various architecture variations. */
2066 unsigned char ix86_arch_features[X86_ARCH_LAST];
2067
2068 /* Feature tests against the various architecture variations, used to create
2069 ix86_arch_features based on the processor mask. */
2070 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2071 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
2072 ~(m_386 | m_486 | m_PENT | m_K6),
2073
2074 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2075 ~m_386,
2076
2077 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2078 ~(m_386 | m_486),
2079
2080 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2081 ~m_386,
2082
2083 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2084 ~m_386,
2085 };
2086
2087 /* In case the average insn count for single function invocation is
2088 lower than this constant, emit fast (but longer) prologue and
2089 epilogue code. */
2090 #define FAST_PROLOGUE_INSN_COUNT 20
2091
2092 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2093 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2094 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2095 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2096
2097 /* Array of the smallest class containing reg number REGNO, indexed by
2098 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2099
2100 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2101 {
2102 /* ax, dx, cx, bx */
2103 AREG, DREG, CREG, BREG,
2104 /* si, di, bp, sp */
2105 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2106 /* FP registers */
2107 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2108 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2109 /* arg pointer */
2110 NON_Q_REGS,
2111 /* flags, fpsr, fpcr, frame */
2112 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2113 /* SSE registers */
2114 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2115 SSE_REGS, SSE_REGS,
2116 /* MMX registers */
2117 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2118 MMX_REGS, MMX_REGS,
2119 /* REX registers */
2120 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2121 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2122 /* SSE REX registers */
2123 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2124 SSE_REGS, SSE_REGS,
2125 /* AVX-512 SSE registers */
2126 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2127 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2128 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2129 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2130 /* Mask registers. */
2131 MASK_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2132 MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2133 };
2134
2135 /* The "default" register map used in 32bit mode. */
2136
2137 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2138 {
2139 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2140 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2141 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2142 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2143 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2144 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2145 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2146 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2147 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2148 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2149 };
2150
2151 /* The "default" register map used in 64bit mode. */
2152
2153 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2154 {
2155 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2156 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2157 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2158 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2159 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2160 8,9,10,11,12,13,14,15, /* extended integer registers */
2161 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2162 67, 68, 69, 70, 71, 72, 73, 74, /* AVX-512 registers 16-23 */
2163 75, 76, 77, 78, 79, 80, 81, 82, /* AVX-512 registers 24-31 */
2164 118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
2165 };
2166
2167 /* Define the register numbers to be used in Dwarf debugging information.
2168 The SVR4 reference port C compiler uses the following register numbers
2169 in its Dwarf output code:
2170 0 for %eax (gcc regno = 0)
2171 1 for %ecx (gcc regno = 2)
2172 2 for %edx (gcc regno = 1)
2173 3 for %ebx (gcc regno = 3)
2174 4 for %esp (gcc regno = 7)
2175 5 for %ebp (gcc regno = 6)
2176 6 for %esi (gcc regno = 4)
2177 7 for %edi (gcc regno = 5)
2178 The following three DWARF register numbers are never generated by
2179 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2180 believes these numbers have these meanings.
2181 8 for %eip (no gcc equivalent)
2182 9 for %eflags (gcc regno = 17)
2183 10 for %trapno (no gcc equivalent)
2184 It is not at all clear how we should number the FP stack registers
2185 for the x86 architecture. If the version of SDB on x86/svr4 were
2186 a bit less brain dead with respect to floating-point then we would
2187 have a precedent to follow with respect to DWARF register numbers
2188 for x86 FP registers, but the SDB on x86/svr4 is so completely
2189 broken with respect to FP registers that it is hardly worth thinking
2190 of it as something to strive for compatibility with.
2191 The version of x86/svr4 SDB I have at the moment does (partially)
2192 seem to believe that DWARF register number 11 is associated with
2193 the x86 register %st(0), but that's about all. Higher DWARF
2194 register numbers don't seem to be associated with anything in
2195 particular, and even for DWARF regno 11, SDB only seems to under-
2196 stand that it should say that a variable lives in %st(0) (when
2197 asked via an `=' command) if we said it was in DWARF regno 11,
2198 but SDB still prints garbage when asked for the value of the
2199 variable in question (via a `/' command).
2200 (Also note that the labels SDB prints for various FP stack regs
2201 when doing an `x' command are all wrong.)
2202 Note that these problems generally don't affect the native SVR4
2203 C compiler because it doesn't allow the use of -O with -g and
2204 because when it is *not* optimizing, it allocates a memory
2205 location for each floating-point variable, and the memory
2206 location is what gets described in the DWARF AT_location
2207 attribute for the variable in question.
2208 Regardless of the severe mental illness of the x86/svr4 SDB, we
2209 do something sensible here and we use the following DWARF
2210 register numbers. Note that these are all stack-top-relative
2211 numbers.
2212 11 for %st(0) (gcc regno = 8)
2213 12 for %st(1) (gcc regno = 9)
2214 13 for %st(2) (gcc regno = 10)
2215 14 for %st(3) (gcc regno = 11)
2216 15 for %st(4) (gcc regno = 12)
2217 16 for %st(5) (gcc regno = 13)
2218 17 for %st(6) (gcc regno = 14)
2219 18 for %st(7) (gcc regno = 15)
2220 */
2221 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2222 {
2223 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2224 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2225 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2226 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2227 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2228 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2229 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2230 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2231 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2232 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2233 };
2234
2235 /* Define parameter passing and return registers. */
2236
2237 static int const x86_64_int_parameter_registers[6] =
2238 {
2239 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2240 };
2241
2242 static int const x86_64_ms_abi_int_parameter_registers[4] =
2243 {
2244 CX_REG, DX_REG, R8_REG, R9_REG
2245 };
2246
2247 static int const x86_64_int_return_registers[4] =
2248 {
2249 AX_REG, DX_REG, DI_REG, SI_REG
2250 };
2251
2252 /* Additional registers that are clobbered by SYSV calls. */
2253
2254 int const x86_64_ms_sysv_extra_clobbered_registers[12] =
2255 {
2256 SI_REG, DI_REG,
2257 XMM6_REG, XMM7_REG,
2258 XMM8_REG, XMM9_REG, XMM10_REG, XMM11_REG,
2259 XMM12_REG, XMM13_REG, XMM14_REG, XMM15_REG
2260 };
2261
2262 /* Define the structure for the machine field in struct function. */
2263
2264 struct GTY(()) stack_local_entry {
2265 unsigned short mode;
2266 unsigned short n;
2267 rtx rtl;
2268 struct stack_local_entry *next;
2269 };
2270
2271 /* Structure describing stack frame layout.
2272 Stack grows downward:
2273
2274 [arguments]
2275 <- ARG_POINTER
2276 saved pc
2277
2278 saved static chain if ix86_static_chain_on_stack
2279
2280 saved frame pointer if frame_pointer_needed
2281 <- HARD_FRAME_POINTER
2282 [saved regs]
2283 <- regs_save_offset
2284 [padding0]
2285
2286 [saved SSE regs]
2287 <- sse_regs_save_offset
2288 [padding1] |
2289 | <- FRAME_POINTER
2290 [va_arg registers] |
2291 |
2292 [frame] |
2293 |
2294 [padding2] | = to_allocate
2295 <- STACK_POINTER
2296 */
2297 struct ix86_frame
2298 {
2299 int nsseregs;
2300 int nregs;
2301 int va_arg_size;
2302 int red_zone_size;
2303 int outgoing_arguments_size;
2304
2305 /* The offsets relative to ARG_POINTER. */
2306 HOST_WIDE_INT frame_pointer_offset;
2307 HOST_WIDE_INT hard_frame_pointer_offset;
2308 HOST_WIDE_INT stack_pointer_offset;
2309 HOST_WIDE_INT hfp_save_offset;
2310 HOST_WIDE_INT reg_save_offset;
2311 HOST_WIDE_INT sse_reg_save_offset;
2312
2313 /* When save_regs_using_mov is set, emit prologue using
2314 move instead of push instructions. */
2315 bool save_regs_using_mov;
2316 };
2317
2318 /* Which cpu are we scheduling for. */
2319 enum attr_cpu ix86_schedule;
2320
2321 /* Which cpu are we optimizing for. */
2322 enum processor_type ix86_tune;
2323
2324 /* Which instruction set architecture to use. */
2325 enum processor_type ix86_arch;
2326
2327 /* True if processor has SSE prefetch instruction. */
2328 unsigned char x86_prefetch_sse;
2329
2330 /* -mstackrealign option */
2331 static const char ix86_force_align_arg_pointer_string[]
2332 = "force_align_arg_pointer";
2333
2334 static rtx (*ix86_gen_leave) (void);
2335 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2336 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2337 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2338 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2339 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2340 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2341 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2342 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2343 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2344 static rtx (*ix86_gen_tls_global_dynamic_64) (rtx, rtx, rtx);
2345 static rtx (*ix86_gen_tls_local_dynamic_base_64) (rtx, rtx);
2346
2347 /* Preferred alignment for stack boundary in bits. */
2348 unsigned int ix86_preferred_stack_boundary;
2349
2350 /* Alignment for incoming stack boundary in bits specified at
2351 command line. */
2352 static unsigned int ix86_user_incoming_stack_boundary;
2353
2354 /* Default alignment for incoming stack boundary in bits. */
2355 static unsigned int ix86_default_incoming_stack_boundary;
2356
2357 /* Alignment for incoming stack boundary in bits. */
2358 unsigned int ix86_incoming_stack_boundary;
2359
2360 /* Calling abi specific va_list type nodes. */
2361 static GTY(()) tree sysv_va_list_type_node;
2362 static GTY(()) tree ms_va_list_type_node;
2363
2364 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2365 char internal_label_prefix[16];
2366 int internal_label_prefix_len;
2367
2368 /* Fence to use after loop using movnt. */
2369 tree x86_mfence;
2370
2371 /* Register class used for passing given 64bit part of the argument.
2372 These represent classes as documented by the PS ABI, with the exception
2373 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2374 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2375
2376 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2377 whenever possible (upper half does contain padding). */
2378 enum x86_64_reg_class
2379 {
2380 X86_64_NO_CLASS,
2381 X86_64_INTEGER_CLASS,
2382 X86_64_INTEGERSI_CLASS,
2383 X86_64_SSE_CLASS,
2384 X86_64_SSESF_CLASS,
2385 X86_64_SSEDF_CLASS,
2386 X86_64_SSEUP_CLASS,
2387 X86_64_X87_CLASS,
2388 X86_64_X87UP_CLASS,
2389 X86_64_COMPLEX_X87_CLASS,
2390 X86_64_MEMORY_CLASS
2391 };
2392
2393 #define MAX_CLASSES 8
2394
2395 /* Table of constants used by fldpi, fldln2, etc.... */
2396 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2397 static bool ext_80387_constants_init = 0;
2398
2399 \f
2400 static struct machine_function * ix86_init_machine_status (void);
2401 static rtx ix86_function_value (const_tree, const_tree, bool);
2402 static bool ix86_function_value_regno_p (const unsigned int);
2403 static unsigned int ix86_function_arg_boundary (enum machine_mode,
2404 const_tree);
2405 static rtx ix86_static_chain (const_tree, bool);
2406 static int ix86_function_regparm (const_tree, const_tree);
2407 static void ix86_compute_frame_layout (struct ix86_frame *);
2408 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
2409 rtx, rtx, int);
2410 static void ix86_add_new_builtins (HOST_WIDE_INT);
2411 static tree ix86_canonical_va_list_type (tree);
2412 static void predict_jump (int);
2413 static unsigned int split_stack_prologue_scratch_regno (void);
2414 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2415
2416 enum ix86_function_specific_strings
2417 {
2418 IX86_FUNCTION_SPECIFIC_ARCH,
2419 IX86_FUNCTION_SPECIFIC_TUNE,
2420 IX86_FUNCTION_SPECIFIC_MAX
2421 };
2422
2423 static char *ix86_target_string (HOST_WIDE_INT, int, const char *,
2424 const char *, enum fpmath_unit, bool);
2425 static void ix86_function_specific_save (struct cl_target_option *,
2426 struct gcc_options *opts);
2427 static void ix86_function_specific_restore (struct gcc_options *opts,
2428 struct cl_target_option *);
2429 static void ix86_function_specific_print (FILE *, int,
2430 struct cl_target_option *);
2431 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2432 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2433 struct gcc_options *,
2434 struct gcc_options *,
2435 struct gcc_options *);
2436 static bool ix86_can_inline_p (tree, tree);
2437 static void ix86_set_current_function (tree);
2438 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2439
2440 static enum calling_abi ix86_function_abi (const_tree);
2441
2442 \f
2443 #ifndef SUBTARGET32_DEFAULT_CPU
2444 #define SUBTARGET32_DEFAULT_CPU "i386"
2445 #endif
2446
2447 /* Whether -mtune= or -march= were specified */
2448 static int ix86_tune_defaulted;
2449 static int ix86_arch_specified;
2450
2451 /* Vectorization library interface and handlers. */
2452 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2453
2454 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2455 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2456
2457 /* Processor target table, indexed by processor number */
2458 struct ptt
2459 {
2460 const char *const name; /* processor name */
2461 const struct processor_costs *cost; /* Processor costs */
2462 const int align_loop; /* Default alignments. */
2463 const int align_loop_max_skip;
2464 const int align_jump;
2465 const int align_jump_max_skip;
2466 const int align_func;
2467 };
2468
2469 /* This table must be in sync with enum processor_type in i386.h. */
2470 static const struct ptt processor_target_table[PROCESSOR_max] =
2471 {
2472 {"generic", &generic_cost, 16, 10, 16, 10, 16},
2473 {"i386", &i386_cost, 4, 3, 4, 3, 4},
2474 {"i486", &i486_cost, 16, 15, 16, 15, 16},
2475 {"pentium", &pentium_cost, 16, 7, 16, 7, 16},
2476 {"pentiumpro", &pentiumpro_cost, 16, 15, 16, 10, 16},
2477 {"pentium4", &pentium4_cost, 0, 0, 0, 0, 0},
2478 {"nocona", &nocona_cost, 0, 0, 0, 0, 0},
2479 {"core2", &core_cost, 16, 10, 16, 10, 16},
2480 {"nehalem", &core_cost, 16, 10, 16, 10, 16},
2481 {"sandybridge", &core_cost, 16, 10, 16, 10, 16},
2482 {"haswell", &core_cost, 16, 10, 16, 10, 16},
2483 {"bonnell", &atom_cost, 16, 15, 16, 7, 16},
2484 {"silvermont", &slm_cost, 16, 15, 16, 7, 16},
2485 {"intel", &intel_cost, 16, 15, 16, 7, 16},
2486 {"geode", &geode_cost, 0, 0, 0, 0, 0},
2487 {"k6", &k6_cost, 32, 7, 32, 7, 32},
2488 {"athlon", &athlon_cost, 16, 7, 16, 7, 16},
2489 {"k8", &k8_cost, 16, 7, 16, 7, 16},
2490 {"amdfam10", &amdfam10_cost, 32, 24, 32, 7, 32},
2491 {"bdver1", &bdver1_cost, 16, 10, 16, 7, 11},
2492 {"bdver2", &bdver2_cost, 16, 10, 16, 7, 11},
2493 {"bdver3", &bdver3_cost, 16, 10, 16, 7, 11},
2494 {"bdver4", &bdver4_cost, 16, 10, 16, 7, 11},
2495 {"btver1", &btver1_cost, 16, 10, 16, 7, 11},
2496 {"btver2", &btver2_cost, 16, 10, 16, 7, 11}
2497 };
2498 \f
2499 static unsigned int
2500 rest_of_handle_insert_vzeroupper (void)
2501 {
2502 int i;
2503
2504 /* vzeroupper instructions are inserted immediately after reload to
2505 account for possible spills from 256bit registers. The pass
2506 reuses mode switching infrastructure by re-running mode insertion
2507 pass, so disable entities that have already been processed. */
2508 for (i = 0; i < MAX_386_ENTITIES; i++)
2509 ix86_optimize_mode_switching[i] = 0;
2510
2511 ix86_optimize_mode_switching[AVX_U128] = 1;
2512
2513 /* Call optimize_mode_switching. */
2514 g->get_passes ()->execute_pass_mode_switching ();
2515 return 0;
2516 }
2517
2518 namespace {
2519
2520 const pass_data pass_data_insert_vzeroupper =
2521 {
2522 RTL_PASS, /* type */
2523 "vzeroupper", /* name */
2524 OPTGROUP_NONE, /* optinfo_flags */
2525 TV_NONE, /* tv_id */
2526 0, /* properties_required */
2527 0, /* properties_provided */
2528 0, /* properties_destroyed */
2529 0, /* todo_flags_start */
2530 TODO_df_finish, /* todo_flags_finish */
2531 };
2532
2533 class pass_insert_vzeroupper : public rtl_opt_pass
2534 {
2535 public:
2536 pass_insert_vzeroupper(gcc::context *ctxt)
2537 : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
2538 {}
2539
2540 /* opt_pass methods: */
2541 virtual bool gate (function *)
2542 {
2543 return TARGET_AVX && !TARGET_AVX512F && TARGET_VZEROUPPER;
2544 }
2545
2546 virtual unsigned int execute (function *)
2547 {
2548 return rest_of_handle_insert_vzeroupper ();
2549 }
2550
2551 }; // class pass_insert_vzeroupper
2552
2553 } // anon namespace
2554
2555 rtl_opt_pass *
2556 make_pass_insert_vzeroupper (gcc::context *ctxt)
2557 {
2558 return new pass_insert_vzeroupper (ctxt);
2559 }
2560
2561 /* Return true if a red-zone is in use. */
2562
2563 static inline bool
2564 ix86_using_red_zone (void)
2565 {
2566 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2567 }
2568 \f
2569 /* Return a string that documents the current -m options. The caller is
2570 responsible for freeing the string. */
2571
2572 static char *
2573 ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
2574 const char *tune, enum fpmath_unit fpmath,
2575 bool add_nl_p)
2576 {
2577 struct ix86_target_opts
2578 {
2579 const char *option; /* option string */
2580 HOST_WIDE_INT mask; /* isa mask options */
2581 };
2582
2583 /* This table is ordered so that options like -msse4.2 that imply
2584 preceding options while match those first. */
2585 static struct ix86_target_opts isa_opts[] =
2586 {
2587 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2588 { "-mfma", OPTION_MASK_ISA_FMA },
2589 { "-mxop", OPTION_MASK_ISA_XOP },
2590 { "-mlwp", OPTION_MASK_ISA_LWP },
2591 { "-mavx512f", OPTION_MASK_ISA_AVX512F },
2592 { "-mavx512er", OPTION_MASK_ISA_AVX512ER },
2593 { "-mavx512cd", OPTION_MASK_ISA_AVX512CD },
2594 { "-mavx512pf", OPTION_MASK_ISA_AVX512PF },
2595 { "-mavx512dq", OPTION_MASK_ISA_AVX512DQ },
2596 { "-mavx512bw", OPTION_MASK_ISA_AVX512BW },
2597 { "-mavx512vl", OPTION_MASK_ISA_AVX512VL },
2598 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2599 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2600 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2601 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2602 { "-msse3", OPTION_MASK_ISA_SSE3 },
2603 { "-msse2", OPTION_MASK_ISA_SSE2 },
2604 { "-msse", OPTION_MASK_ISA_SSE },
2605 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2606 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2607 { "-mmmx", OPTION_MASK_ISA_MMX },
2608 { "-mabm", OPTION_MASK_ISA_ABM },
2609 { "-mbmi", OPTION_MASK_ISA_BMI },
2610 { "-mbmi2", OPTION_MASK_ISA_BMI2 },
2611 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
2612 { "-mhle", OPTION_MASK_ISA_HLE },
2613 { "-mfxsr", OPTION_MASK_ISA_FXSR },
2614 { "-mrdseed", OPTION_MASK_ISA_RDSEED },
2615 { "-mprfchw", OPTION_MASK_ISA_PRFCHW },
2616 { "-madx", OPTION_MASK_ISA_ADX },
2617 { "-mtbm", OPTION_MASK_ISA_TBM },
2618 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2619 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2620 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2621 { "-maes", OPTION_MASK_ISA_AES },
2622 { "-msha", OPTION_MASK_ISA_SHA },
2623 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2624 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2625 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2626 { "-mf16c", OPTION_MASK_ISA_F16C },
2627 { "-mrtm", OPTION_MASK_ISA_RTM },
2628 { "-mxsave", OPTION_MASK_ISA_XSAVE },
2629 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT },
2630 { "-mprefetchwt1", OPTION_MASK_ISA_PREFETCHWT1 },
2631 { "-mclflushopt", OPTION_MASK_ISA_CLFLUSHOPT },
2632 { "-mxsavec", OPTION_MASK_ISA_XSAVEC },
2633 { "-mxsaves", OPTION_MASK_ISA_XSAVES },
2634 };
2635
2636 /* Flag options. */
2637 static struct ix86_target_opts flag_opts[] =
2638 {
2639 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2640 { "-mlong-double-128", MASK_LONG_DOUBLE_128 },
2641 { "-mlong-double-64", MASK_LONG_DOUBLE_64 },
2642 { "-m80387", MASK_80387 },
2643 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2644 { "-malign-double", MASK_ALIGN_DOUBLE },
2645 { "-mcld", MASK_CLD },
2646 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2647 { "-mieee-fp", MASK_IEEE_FP },
2648 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2649 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2650 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2651 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2652 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2653 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2654 { "-mno-red-zone", MASK_NO_RED_ZONE },
2655 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2656 { "-mrecip", MASK_RECIP },
2657 { "-mrtd", MASK_RTD },
2658 { "-msseregparm", MASK_SSEREGPARM },
2659 { "-mstack-arg-probe", MASK_STACK_PROBE },
2660 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2661 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2662 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2663 { "-mvzeroupper", MASK_VZEROUPPER },
2664 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
2665 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
2666 { "-mprefer-avx128", MASK_PREFER_AVX128},
2667 };
2668
2669 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2670
2671 char isa_other[40];
2672 char target_other[40];
2673 unsigned num = 0;
2674 unsigned i, j;
2675 char *ret;
2676 char *ptr;
2677 size_t len;
2678 size_t line_len;
2679 size_t sep_len;
2680 const char *abi;
2681
2682 memset (opts, '\0', sizeof (opts));
2683
2684 /* Add -march= option. */
2685 if (arch)
2686 {
2687 opts[num][0] = "-march=";
2688 opts[num++][1] = arch;
2689 }
2690
2691 /* Add -mtune= option. */
2692 if (tune)
2693 {
2694 opts[num][0] = "-mtune=";
2695 opts[num++][1] = tune;
2696 }
2697
2698 /* Add -m32/-m64/-mx32. */
2699 if ((isa & OPTION_MASK_ISA_64BIT) != 0)
2700 {
2701 if ((isa & OPTION_MASK_ABI_64) != 0)
2702 abi = "-m64";
2703 else
2704 abi = "-mx32";
2705 isa &= ~ (OPTION_MASK_ISA_64BIT
2706 | OPTION_MASK_ABI_64
2707 | OPTION_MASK_ABI_X32);
2708 }
2709 else
2710 abi = "-m32";
2711 opts[num++][0] = abi;
2712
2713 /* Pick out the options in isa options. */
2714 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2715 {
2716 if ((isa & isa_opts[i].mask) != 0)
2717 {
2718 opts[num++][0] = isa_opts[i].option;
2719 isa &= ~ isa_opts[i].mask;
2720 }
2721 }
2722
2723 if (isa && add_nl_p)
2724 {
2725 opts[num++][0] = isa_other;
2726 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)",
2727 isa);
2728 }
2729
2730 /* Add flag options. */
2731 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2732 {
2733 if ((flags & flag_opts[i].mask) != 0)
2734 {
2735 opts[num++][0] = flag_opts[i].option;
2736 flags &= ~ flag_opts[i].mask;
2737 }
2738 }
2739
2740 if (flags && add_nl_p)
2741 {
2742 opts[num++][0] = target_other;
2743 sprintf (target_other, "(other flags: %#x)", flags);
2744 }
2745
2746 /* Add -fpmath= option. */
2747 if (fpmath)
2748 {
2749 opts[num][0] = "-mfpmath=";
2750 switch ((int) fpmath)
2751 {
2752 case FPMATH_387:
2753 opts[num++][1] = "387";
2754 break;
2755
2756 case FPMATH_SSE:
2757 opts[num++][1] = "sse";
2758 break;
2759
2760 case FPMATH_387 | FPMATH_SSE:
2761 opts[num++][1] = "sse+387";
2762 break;
2763
2764 default:
2765 gcc_unreachable ();
2766 }
2767 }
2768
2769 /* Any options? */
2770 if (num == 0)
2771 return NULL;
2772
2773 gcc_assert (num < ARRAY_SIZE (opts));
2774
2775 /* Size the string. */
2776 len = 0;
2777 sep_len = (add_nl_p) ? 3 : 1;
2778 for (i = 0; i < num; i++)
2779 {
2780 len += sep_len;
2781 for (j = 0; j < 2; j++)
2782 if (opts[i][j])
2783 len += strlen (opts[i][j]);
2784 }
2785
2786 /* Build the string. */
2787 ret = ptr = (char *) xmalloc (len);
2788 line_len = 0;
2789
2790 for (i = 0; i < num; i++)
2791 {
2792 size_t len2[2];
2793
2794 for (j = 0; j < 2; j++)
2795 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2796
2797 if (i != 0)
2798 {
2799 *ptr++ = ' ';
2800 line_len++;
2801
2802 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2803 {
2804 *ptr++ = '\\';
2805 *ptr++ = '\n';
2806 line_len = 0;
2807 }
2808 }
2809
2810 for (j = 0; j < 2; j++)
2811 if (opts[i][j])
2812 {
2813 memcpy (ptr, opts[i][j], len2[j]);
2814 ptr += len2[j];
2815 line_len += len2[j];
2816 }
2817 }
2818
2819 *ptr = '\0';
2820 gcc_assert (ret + len >= ptr);
2821
2822 return ret;
2823 }
2824
2825 /* Return true, if profiling code should be emitted before
2826 prologue. Otherwise it returns false.
2827 Note: For x86 with "hotfix" it is sorried. */
2828 static bool
2829 ix86_profile_before_prologue (void)
2830 {
2831 return flag_fentry != 0;
2832 }
2833
2834 /* Function that is callable from the debugger to print the current
2835 options. */
2836 void ATTRIBUTE_UNUSED
2837 ix86_debug_options (void)
2838 {
2839 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2840 ix86_arch_string, ix86_tune_string,
2841 ix86_fpmath, true);
2842
2843 if (opts)
2844 {
2845 fprintf (stderr, "%s\n\n", opts);
2846 free (opts);
2847 }
2848 else
2849 fputs ("<no options>\n\n", stderr);
2850
2851 return;
2852 }
2853
2854 static const char *stringop_alg_names[] = {
2855 #define DEF_ENUM
2856 #define DEF_ALG(alg, name) #name,
2857 #include "stringop.def"
2858 #undef DEF_ENUM
2859 #undef DEF_ALG
2860 };
2861
2862 /* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=.
2863 The string is of the following form (or comma separated list of it):
2864
2865 strategy_alg:max_size:[align|noalign]
2866
2867 where the full size range for the strategy is either [0, max_size] or
2868 [min_size, max_size], in which min_size is the max_size + 1 of the
2869 preceding range. The last size range must have max_size == -1.
2870
2871 Examples:
2872
2873 1.
2874 -mmemcpy-strategy=libcall:-1:noalign
2875
2876 this is equivalent to (for known size memcpy) -mstringop-strategy=libcall
2877
2878
2879 2.
2880 -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign
2881
2882 This is to tell the compiler to use the following strategy for memset
2883 1) when the expected size is between [1, 16], use rep_8byte strategy;
2884 2) when the size is between [17, 2048], use vector_loop;
2885 3) when the size is > 2048, use libcall. */
2886
2887 struct stringop_size_range
2888 {
2889 int max;
2890 stringop_alg alg;
2891 bool noalign;
2892 };
2893
2894 static void
2895 ix86_parse_stringop_strategy_string (char *strategy_str, bool is_memset)
2896 {
2897 const struct stringop_algs *default_algs;
2898 stringop_size_range input_ranges[MAX_STRINGOP_ALGS];
2899 char *curr_range_str, *next_range_str;
2900 int i = 0, n = 0;
2901
2902 if (is_memset)
2903 default_algs = &ix86_cost->memset[TARGET_64BIT != 0];
2904 else
2905 default_algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
2906
2907 curr_range_str = strategy_str;
2908
2909 do
2910 {
2911 int maxs;
2912 char alg_name[128];
2913 char align[16];
2914 next_range_str = strchr (curr_range_str, ',');
2915 if (next_range_str)
2916 *next_range_str++ = '\0';
2917
2918 if (3 != sscanf (curr_range_str, "%20[^:]:%d:%10s",
2919 alg_name, &maxs, align))
2920 {
2921 error ("wrong arg %s to option %s", curr_range_str,
2922 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2923 return;
2924 }
2925
2926 if (n > 0 && (maxs < (input_ranges[n - 1].max + 1) && maxs != -1))
2927 {
2928 error ("size ranges of option %s should be increasing",
2929 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2930 return;
2931 }
2932
2933 for (i = 0; i < last_alg; i++)
2934 if (!strcmp (alg_name, stringop_alg_names[i]))
2935 break;
2936
2937 if (i == last_alg)
2938 {
2939 error ("wrong stringop strategy name %s specified for option %s",
2940 alg_name,
2941 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2942 return;
2943 }
2944
2945 input_ranges[n].max = maxs;
2946 input_ranges[n].alg = (stringop_alg) i;
2947 if (!strcmp (align, "align"))
2948 input_ranges[n].noalign = false;
2949 else if (!strcmp (align, "noalign"))
2950 input_ranges[n].noalign = true;
2951 else
2952 {
2953 error ("unknown alignment %s specified for option %s",
2954 align, is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2955 return;
2956 }
2957 n++;
2958 curr_range_str = next_range_str;
2959 }
2960 while (curr_range_str);
2961
2962 if (input_ranges[n - 1].max != -1)
2963 {
2964 error ("the max value for the last size range should be -1"
2965 " for option %s",
2966 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2967 return;
2968 }
2969
2970 if (n > MAX_STRINGOP_ALGS)
2971 {
2972 error ("too many size ranges specified in option %s",
2973 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2974 return;
2975 }
2976
2977 /* Now override the default algs array. */
2978 for (i = 0; i < n; i++)
2979 {
2980 *const_cast<int *>(&default_algs->size[i].max) = input_ranges[i].max;
2981 *const_cast<stringop_alg *>(&default_algs->size[i].alg)
2982 = input_ranges[i].alg;
2983 *const_cast<int *>(&default_algs->size[i].noalign)
2984 = input_ranges[i].noalign;
2985 }
2986 }
2987
2988 \f
2989 /* parse -mtune-ctrl= option. When DUMP is true,
2990 print the features that are explicitly set. */
2991
2992 static void
2993 parse_mtune_ctrl_str (bool dump)
2994 {
2995 if (!ix86_tune_ctrl_string)
2996 return;
2997
2998 char *next_feature_string = NULL;
2999 char *curr_feature_string = xstrdup (ix86_tune_ctrl_string);
3000 char *orig = curr_feature_string;
3001 int i;
3002 do
3003 {
3004 bool clear = false;
3005
3006 next_feature_string = strchr (curr_feature_string, ',');
3007 if (next_feature_string)
3008 *next_feature_string++ = '\0';
3009 if (*curr_feature_string == '^')
3010 {
3011 curr_feature_string++;
3012 clear = true;
3013 }
3014 for (i = 0; i < X86_TUNE_LAST; i++)
3015 {
3016 if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
3017 {
3018 ix86_tune_features[i] = !clear;
3019 if (dump)
3020 fprintf (stderr, "Explicitly %s feature %s\n",
3021 clear ? "clear" : "set", ix86_tune_feature_names[i]);
3022 break;
3023 }
3024 }
3025 if (i == X86_TUNE_LAST)
3026 error ("Unknown parameter to option -mtune-ctrl: %s",
3027 clear ? curr_feature_string - 1 : curr_feature_string);
3028 curr_feature_string = next_feature_string;
3029 }
3030 while (curr_feature_string);
3031 free (orig);
3032 }
3033
3034 /* Helper function to set ix86_tune_features. IX86_TUNE is the
3035 processor type. */
3036
3037 static void
3038 set_ix86_tune_features (enum processor_type ix86_tune, bool dump)
3039 {
3040 unsigned int ix86_tune_mask = 1u << ix86_tune;
3041 int i;
3042
3043 for (i = 0; i < X86_TUNE_LAST; ++i)
3044 {
3045 if (ix86_tune_no_default)
3046 ix86_tune_features[i] = 0;
3047 else
3048 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3049 }
3050
3051 if (dump)
3052 {
3053 fprintf (stderr, "List of x86 specific tuning parameter names:\n");
3054 for (i = 0; i < X86_TUNE_LAST; i++)
3055 fprintf (stderr, "%s : %s\n", ix86_tune_feature_names[i],
3056 ix86_tune_features[i] ? "on" : "off");
3057 }
3058
3059 parse_mtune_ctrl_str (dump);
3060 }
3061
3062
3063 /* Override various settings based on options. If MAIN_ARGS_P, the
3064 options are from the command line, otherwise they are from
3065 attributes. */
3066
3067 static void
3068 ix86_option_override_internal (bool main_args_p,
3069 struct gcc_options *opts,
3070 struct gcc_options *opts_set)
3071 {
3072 int i;
3073 unsigned int ix86_arch_mask;
3074 const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL);
3075 const char *prefix;
3076 const char *suffix;
3077 const char *sw;
3078
3079 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
3080 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
3081 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
3082 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
3083 #define PTA_AES (HOST_WIDE_INT_1 << 4)
3084 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
3085 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
3086 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
3087 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
3088 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
3089 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
3090 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
3091 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
3092 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
3093 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
3094 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
3095 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
3096 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
3097 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
3098 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
3099 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
3100 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
3101 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
3102 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
3103 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
3104 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
3105 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
3106 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
3107 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
3108 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
3109 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
3110 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
3111 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
3112 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
3113 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
3114 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
3115 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
3116 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
3117 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
3118 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
3119 #define PTA_AVX512F (HOST_WIDE_INT_1 << 40)
3120 #define PTA_AVX512ER (HOST_WIDE_INT_1 << 41)
3121 #define PTA_AVX512PF (HOST_WIDE_INT_1 << 42)
3122 #define PTA_AVX512CD (HOST_WIDE_INT_1 << 43)
3123 #define PTA_SHA (HOST_WIDE_INT_1 << 45)
3124 #define PTA_PREFETCHWT1 (HOST_WIDE_INT_1 << 46)
3125 #define PTA_CLFLUSHOPT (HOST_WIDE_INT_1 << 47)
3126 #define PTA_XSAVEC (HOST_WIDE_INT_1 << 48)
3127 #define PTA_XSAVES (HOST_WIDE_INT_1 << 49)
3128 #define PTA_AVX512DQ (HOST_WIDE_INT_1 << 50)
3129 #define PTA_AVX512BW (HOST_WIDE_INT_1 << 51)
3130 #define PTA_AVX512VL (HOST_WIDE_INT_1 << 52)
3131
3132 #define PTA_CORE2 \
3133 (PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \
3134 | PTA_CX16 | PTA_FXSR)
3135 #define PTA_NEHALEM \
3136 (PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_POPCNT)
3137 #define PTA_WESTMERE \
3138 (PTA_NEHALEM | PTA_AES | PTA_PCLMUL)
3139 #define PTA_SANDYBRIDGE \
3140 (PTA_WESTMERE | PTA_AVX | PTA_XSAVE | PTA_XSAVEOPT)
3141 #define PTA_IVYBRIDGE \
3142 (PTA_SANDYBRIDGE | PTA_FSGSBASE | PTA_RDRND | PTA_F16C)
3143 #define PTA_HASWELL \
3144 (PTA_IVYBRIDGE | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_LZCNT \
3145 | PTA_FMA | PTA_MOVBE | PTA_HLE)
3146 #define PTA_BROADWELL \
3147 (PTA_HASWELL | PTA_ADX | PTA_PRFCHW | PTA_RDSEED)
3148 #define PTA_BONNELL \
3149 (PTA_CORE2 | PTA_MOVBE)
3150 #define PTA_SILVERMONT \
3151 (PTA_WESTMERE | PTA_MOVBE)
3152
3153 /* if this reaches 64, need to widen struct pta flags below */
3154
3155 static struct pta
3156 {
3157 const char *const name; /* processor name or nickname. */
3158 const enum processor_type processor;
3159 const enum attr_cpu schedule;
3160 const unsigned HOST_WIDE_INT flags;
3161 }
3162 const processor_alias_table[] =
3163 {
3164 {"i386", PROCESSOR_I386, CPU_NONE, 0},
3165 {"i486", PROCESSOR_I486, CPU_NONE, 0},
3166 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3167 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3168 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
3169 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
3170 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3171 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3172 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3173 PTA_MMX | PTA_SSE | PTA_FXSR},
3174 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3175 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3176 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_FXSR},
3177 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3178 PTA_MMX | PTA_SSE | PTA_FXSR},
3179 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3180 PTA_MMX | PTA_SSE | PTA_FXSR},
3181 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3182 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3183 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
3184 PTA_MMX |PTA_SSE | PTA_SSE2 | PTA_FXSR},
3185 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
3186 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3187 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
3188 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3189 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
3190 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3191 | PTA_CX16 | PTA_NO_SAHF | PTA_FXSR},
3192 {"core2", PROCESSOR_CORE2, CPU_CORE2, PTA_CORE2},
3193 {"nehalem", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3194 {"corei7", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3195 {"westmere", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_WESTMERE},
3196 {"sandybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3197 PTA_SANDYBRIDGE},
3198 {"corei7-avx", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3199 PTA_SANDYBRIDGE},
3200 {"ivybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3201 PTA_IVYBRIDGE},
3202 {"core-avx-i", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3203 PTA_IVYBRIDGE},
3204 {"haswell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3205 {"core-avx2", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3206 {"broadwell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_BROADWELL},
3207 {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3208 {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3209 {"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3210 {"slm", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3211 {"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM},
3212 {"geode", PROCESSOR_GEODE, CPU_GEODE,
3213 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3214 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
3215 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3216 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3217 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3218 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3219 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3220 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3221 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3222 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3223 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3224 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3225 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3226 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3227 {"x86-64", PROCESSOR_K8, CPU_K8,
3228 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3229 {"k8", PROCESSOR_K8, CPU_K8,
3230 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3231 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3232 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3233 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3234 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3235 {"opteron", PROCESSOR_K8, CPU_K8,
3236 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3237 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3238 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3239 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3240 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3241 {"athlon64", PROCESSOR_K8, CPU_K8,
3242 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3243 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3244 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3245 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3246 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3247 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3248 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3249 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3250 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3251 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3252 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3253 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3254 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3255 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3256 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3257 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3258 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3259 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3260 | PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3261 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
3262 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3263 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3264 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3265 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3266 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3267 {"bdver3", PROCESSOR_BDVER3, CPU_BDVER3,
3268 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3269 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3270 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3271 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3272 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE
3273 | PTA_XSAVEOPT | PTA_FSGSBASE},
3274 {"bdver4", PROCESSOR_BDVER4, CPU_BDVER4,
3275 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3276 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3277 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
3278 | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2
3279 | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR
3280 | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE | PTA_RDRND
3281 | PTA_MOVBE},
3282 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
3283 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3284 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW
3285 | PTA_FXSR | PTA_XSAVE},
3286 {"btver2", PROCESSOR_BTVER2, CPU_BTVER2,
3287 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3288 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_SSE4_1
3289 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3290 | PTA_BMI | PTA_F16C | PTA_MOVBE | PTA_PRFCHW
3291 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT},
3292
3293 {"generic", PROCESSOR_GENERIC, CPU_GENERIC,
3294 PTA_64BIT
3295 | PTA_HLE /* flags are only used for -march switch. */ },
3296 };
3297
3298 /* -mrecip options. */
3299 static struct
3300 {
3301 const char *string; /* option name */
3302 unsigned int mask; /* mask bits to set */
3303 }
3304 const recip_options[] =
3305 {
3306 { "all", RECIP_MASK_ALL },
3307 { "none", RECIP_MASK_NONE },
3308 { "div", RECIP_MASK_DIV },
3309 { "sqrt", RECIP_MASK_SQRT },
3310 { "vec-div", RECIP_MASK_VEC_DIV },
3311 { "vec-sqrt", RECIP_MASK_VEC_SQRT },
3312 };
3313
3314 int const pta_size = ARRAY_SIZE (processor_alias_table);
3315
3316 /* Set up prefix/suffix so the error messages refer to either the command
3317 line argument, or the attribute(target). */
3318 if (main_args_p)
3319 {
3320 prefix = "-m";
3321 suffix = "";
3322 sw = "switch";
3323 }
3324 else
3325 {
3326 prefix = "option(\"";
3327 suffix = "\")";
3328 sw = "attribute";
3329 }
3330
3331 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3332 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3333 if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3334 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ABI_64 | OPTION_MASK_ABI_X32);
3335 #ifdef TARGET_BI_ARCH
3336 else
3337 {
3338 #if TARGET_BI_ARCH == 1
3339 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3340 is on and OPTION_MASK_ABI_X32 is off. We turn off
3341 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3342 -mx32. */
3343 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3344 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3345 #else
3346 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3347 on and OPTION_MASK_ABI_64 is off. We turn off
3348 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3349 -m64 or OPTION_MASK_CODE16 is turned on by -m16. */
3350 if (TARGET_LP64_P (opts->x_ix86_isa_flags)
3351 || TARGET_16BIT_P (opts->x_ix86_isa_flags))
3352 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3353 #endif
3354 }
3355 #endif
3356
3357 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3358 {
3359 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3360 OPTION_MASK_ABI_64 for TARGET_X32. */
3361 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3362 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3363 }
3364 else if (TARGET_16BIT_P (opts->x_ix86_isa_flags))
3365 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ISA_64BIT
3366 | OPTION_MASK_ABI_X32
3367 | OPTION_MASK_ABI_64);
3368 else if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3369 {
3370 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3371 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3372 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3373 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3374 }
3375
3376 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3377 SUBTARGET_OVERRIDE_OPTIONS;
3378 #endif
3379
3380 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3381 SUBSUBTARGET_OVERRIDE_OPTIONS;
3382 #endif
3383
3384 /* -fPIC is the default for x86_64. */
3385 if (TARGET_MACHO && TARGET_64BIT_P (opts->x_ix86_isa_flags))
3386 opts->x_flag_pic = 2;
3387
3388 /* Need to check -mtune=generic first. */
3389 if (opts->x_ix86_tune_string)
3390 {
3391 /* As special support for cross compilers we read -mtune=native
3392 as -mtune=generic. With native compilers we won't see the
3393 -mtune=native, as it was changed by the driver. */
3394 if (!strcmp (opts->x_ix86_tune_string, "native"))
3395 {
3396 opts->x_ix86_tune_string = "generic";
3397 }
3398 else if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3399 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3400 "%stune=k8%s or %stune=generic%s instead as appropriate",
3401 prefix, suffix, prefix, suffix, prefix, suffix);
3402 }
3403 else
3404 {
3405 if (opts->x_ix86_arch_string)
3406 opts->x_ix86_tune_string = opts->x_ix86_arch_string;
3407 if (!opts->x_ix86_tune_string)
3408 {
3409 opts->x_ix86_tune_string
3410 = processor_target_table[TARGET_CPU_DEFAULT].name;
3411 ix86_tune_defaulted = 1;
3412 }
3413
3414 /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string
3415 or defaulted. We need to use a sensible tune option. */
3416 if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3417 {
3418 opts->x_ix86_tune_string = "generic";
3419 }
3420 }
3421
3422 if (opts->x_ix86_stringop_alg == rep_prefix_8_byte
3423 && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3424 {
3425 /* rep; movq isn't available in 32-bit code. */
3426 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3427 opts->x_ix86_stringop_alg = no_stringop;
3428 }
3429
3430 if (!opts->x_ix86_arch_string)
3431 opts->x_ix86_arch_string
3432 = TARGET_64BIT_P (opts->x_ix86_isa_flags)
3433 ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3434 else
3435 ix86_arch_specified = 1;
3436
3437 if (opts_set->x_ix86_pmode)
3438 {
3439 if ((TARGET_LP64_P (opts->x_ix86_isa_flags)
3440 && opts->x_ix86_pmode == PMODE_SI)
3441 || (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
3442 && opts->x_ix86_pmode == PMODE_DI))
3443 error ("address mode %qs not supported in the %s bit mode",
3444 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "short" : "long",
3445 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "64" : "32");
3446 }
3447 else
3448 opts->x_ix86_pmode = TARGET_LP64_P (opts->x_ix86_isa_flags)
3449 ? PMODE_DI : PMODE_SI;
3450
3451 if (!opts_set->x_ix86_abi)
3452 opts->x_ix86_abi = DEFAULT_ABI;
3453
3454 /* For targets using ms ABI enable ms-extensions, if not
3455 explicit turned off. For non-ms ABI we turn off this
3456 option. */
3457 if (!opts_set->x_flag_ms_extensions)
3458 opts->x_flag_ms_extensions = (MS_ABI == DEFAULT_ABI);
3459
3460 if (opts_set->x_ix86_cmodel)
3461 {
3462 switch (opts->x_ix86_cmodel)
3463 {
3464 case CM_SMALL:
3465 case CM_SMALL_PIC:
3466 if (opts->x_flag_pic)
3467 opts->x_ix86_cmodel = CM_SMALL_PIC;
3468 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3469 error ("code model %qs not supported in the %s bit mode",
3470 "small", "32");
3471 break;
3472
3473 case CM_MEDIUM:
3474 case CM_MEDIUM_PIC:
3475 if (opts->x_flag_pic)
3476 opts->x_ix86_cmodel = CM_MEDIUM_PIC;
3477 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3478 error ("code model %qs not supported in the %s bit mode",
3479 "medium", "32");
3480 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3481 error ("code model %qs not supported in x32 mode",
3482 "medium");
3483 break;
3484
3485 case CM_LARGE:
3486 case CM_LARGE_PIC:
3487 if (opts->x_flag_pic)
3488 opts->x_ix86_cmodel = CM_LARGE_PIC;
3489 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3490 error ("code model %qs not supported in the %s bit mode",
3491 "large", "32");
3492 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3493 error ("code model %qs not supported in x32 mode",
3494 "large");
3495 break;
3496
3497 case CM_32:
3498 if (opts->x_flag_pic)
3499 error ("code model %s does not support PIC mode", "32");
3500 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3501 error ("code model %qs not supported in the %s bit mode",
3502 "32", "64");
3503 break;
3504
3505 case CM_KERNEL:
3506 if (opts->x_flag_pic)
3507 {
3508 error ("code model %s does not support PIC mode", "kernel");
3509 opts->x_ix86_cmodel = CM_32;
3510 }
3511 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3512 error ("code model %qs not supported in the %s bit mode",
3513 "kernel", "32");
3514 break;
3515
3516 default:
3517 gcc_unreachable ();
3518 }
3519 }
3520 else
3521 {
3522 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3523 use of rip-relative addressing. This eliminates fixups that
3524 would otherwise be needed if this object is to be placed in a
3525 DLL, and is essentially just as efficient as direct addressing. */
3526 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3527 && (TARGET_RDOS || TARGET_PECOFF))
3528 opts->x_ix86_cmodel = CM_MEDIUM_PIC, opts->x_flag_pic = 1;
3529 else if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3530 opts->x_ix86_cmodel = opts->x_flag_pic ? CM_SMALL_PIC : CM_SMALL;
3531 else
3532 opts->x_ix86_cmodel = CM_32;
3533 }
3534 if (TARGET_MACHO && opts->x_ix86_asm_dialect == ASM_INTEL)
3535 {
3536 error ("-masm=intel not supported in this configuration");
3537 opts->x_ix86_asm_dialect = ASM_ATT;
3538 }
3539 if ((TARGET_64BIT_P (opts->x_ix86_isa_flags) != 0)
3540 != ((opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3541 sorry ("%i-bit mode not compiled in",
3542 (opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3543
3544 for (i = 0; i < pta_size; i++)
3545 if (! strcmp (opts->x_ix86_arch_string, processor_alias_table[i].name))
3546 {
3547 ix86_schedule = processor_alias_table[i].schedule;
3548 ix86_arch = processor_alias_table[i].processor;
3549 /* Default cpu tuning to the architecture. */
3550 ix86_tune = ix86_arch;
3551
3552 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3553 && !(processor_alias_table[i].flags & PTA_64BIT))
3554 error ("CPU you selected does not support x86-64 "
3555 "instruction set");
3556
3557 if (processor_alias_table[i].flags & PTA_MMX
3558 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3559 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3560 if (processor_alias_table[i].flags & PTA_3DNOW
3561 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3562 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3563 if (processor_alias_table[i].flags & PTA_3DNOW_A
3564 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3565 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3566 if (processor_alias_table[i].flags & PTA_SSE
3567 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3568 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3569 if (processor_alias_table[i].flags & PTA_SSE2
3570 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3571 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3572 if (processor_alias_table[i].flags & PTA_SSE3
3573 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3574 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3575 if (processor_alias_table[i].flags & PTA_SSSE3
3576 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3577 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3578 if (processor_alias_table[i].flags & PTA_SSE4_1
3579 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3580 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3581 if (processor_alias_table[i].flags & PTA_SSE4_2
3582 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3583 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3584 if (processor_alias_table[i].flags & PTA_AVX
3585 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3586 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3587 if (processor_alias_table[i].flags & PTA_AVX2
3588 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
3589 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
3590 if (processor_alias_table[i].flags & PTA_FMA
3591 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3592 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3593 if (processor_alias_table[i].flags & PTA_SSE4A
3594 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3595 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3596 if (processor_alias_table[i].flags & PTA_FMA4
3597 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3598 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3599 if (processor_alias_table[i].flags & PTA_XOP
3600 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3601 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3602 if (processor_alias_table[i].flags & PTA_LWP
3603 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3604 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3605 if (processor_alias_table[i].flags & PTA_ABM
3606 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3607 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3608 if (processor_alias_table[i].flags & PTA_BMI
3609 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3610 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3611 if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
3612 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
3613 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
3614 if (processor_alias_table[i].flags & PTA_TBM
3615 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3616 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3617 if (processor_alias_table[i].flags & PTA_BMI2
3618 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
3619 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
3620 if (processor_alias_table[i].flags & PTA_CX16
3621 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3622 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3623 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3624 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3625 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3626 if (!(TARGET_64BIT_P (opts->x_ix86_isa_flags)
3627 && (processor_alias_table[i].flags & PTA_NO_SAHF))
3628 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3629 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3630 if (processor_alias_table[i].flags & PTA_MOVBE
3631 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3632 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3633 if (processor_alias_table[i].flags & PTA_AES
3634 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3635 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3636 if (processor_alias_table[i].flags & PTA_SHA
3637 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA))
3638 ix86_isa_flags |= OPTION_MASK_ISA_SHA;
3639 if (processor_alias_table[i].flags & PTA_PCLMUL
3640 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3641 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3642 if (processor_alias_table[i].flags & PTA_FSGSBASE
3643 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3644 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3645 if (processor_alias_table[i].flags & PTA_RDRND
3646 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3647 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3648 if (processor_alias_table[i].flags & PTA_F16C
3649 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3650 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3651 if (processor_alias_table[i].flags & PTA_RTM
3652 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RTM))
3653 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RTM;
3654 if (processor_alias_table[i].flags & PTA_HLE
3655 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_HLE))
3656 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_HLE;
3657 if (processor_alias_table[i].flags & PTA_PRFCHW
3658 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PRFCHW))
3659 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW;
3660 if (processor_alias_table[i].flags & PTA_RDSEED
3661 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDSEED))
3662 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDSEED;
3663 if (processor_alias_table[i].flags & PTA_ADX
3664 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ADX))
3665 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ADX;
3666 if (processor_alias_table[i].flags & PTA_FXSR
3667 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FXSR))
3668 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FXSR;
3669 if (processor_alias_table[i].flags & PTA_XSAVE
3670 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVE))
3671 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVE;
3672 if (processor_alias_table[i].flags & PTA_XSAVEOPT
3673 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT))
3674 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT;
3675 if (processor_alias_table[i].flags & PTA_AVX512F
3676 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F))
3677 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F;
3678 if (processor_alias_table[i].flags & PTA_AVX512ER
3679 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512ER))
3680 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER;
3681 if (processor_alias_table[i].flags & PTA_AVX512PF
3682 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512PF))
3683 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF;
3684 if (processor_alias_table[i].flags & PTA_AVX512CD
3685 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512CD))
3686 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD;
3687 if (processor_alias_table[i].flags & PTA_PREFETCHWT1
3688 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PREFETCHWT1))
3689 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PREFETCHWT1;
3690 if (processor_alias_table[i].flags & PTA_CLFLUSHOPT
3691 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLFLUSHOPT))
3692 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLFLUSHOPT;
3693 if (processor_alias_table[i].flags & PTA_XSAVEC
3694 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEC))
3695 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEC;
3696 if (processor_alias_table[i].flags & PTA_XSAVES
3697 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVES))
3698 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVES;
3699 if (processor_alias_table[i].flags & PTA_AVX512DQ
3700 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512DQ))
3701 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512DQ;
3702 if (processor_alias_table[i].flags & PTA_AVX512BW
3703 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512BW))
3704 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512BW;
3705 if (processor_alias_table[i].flags & PTA_AVX512VL
3706 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VL))
3707 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VL;
3708 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3709 x86_prefetch_sse = true;
3710
3711 break;
3712 }
3713
3714 if (!strcmp (opts->x_ix86_arch_string, "generic"))
3715 error ("generic CPU can be used only for %stune=%s %s",
3716 prefix, suffix, sw);
3717 else if (!strcmp (opts->x_ix86_arch_string, "intel"))
3718 error ("intel CPU can be used only for %stune=%s %s",
3719 prefix, suffix, sw);
3720 else if (i == pta_size)
3721 error ("bad value (%s) for %sarch=%s %s",
3722 opts->x_ix86_arch_string, prefix, suffix, sw);
3723
3724 ix86_arch_mask = 1u << ix86_arch;
3725 for (i = 0; i < X86_ARCH_LAST; ++i)
3726 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3727
3728 for (i = 0; i < pta_size; i++)
3729 if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name))
3730 {
3731 ix86_schedule = processor_alias_table[i].schedule;
3732 ix86_tune = processor_alias_table[i].processor;
3733 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3734 {
3735 if (!(processor_alias_table[i].flags & PTA_64BIT))
3736 {
3737 if (ix86_tune_defaulted)
3738 {
3739 opts->x_ix86_tune_string = "x86-64";
3740 for (i = 0; i < pta_size; i++)
3741 if (! strcmp (opts->x_ix86_tune_string,
3742 processor_alias_table[i].name))
3743 break;
3744 ix86_schedule = processor_alias_table[i].schedule;
3745 ix86_tune = processor_alias_table[i].processor;
3746 }
3747 else
3748 error ("CPU you selected does not support x86-64 "
3749 "instruction set");
3750 }
3751 }
3752 /* Intel CPUs have always interpreted SSE prefetch instructions as
3753 NOPs; so, we can enable SSE prefetch instructions even when
3754 -mtune (rather than -march) points us to a processor that has them.
3755 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3756 higher processors. */
3757 if (TARGET_CMOV
3758 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3759 x86_prefetch_sse = true;
3760 break;
3761 }
3762
3763 if (ix86_tune_specified && i == pta_size)
3764 error ("bad value (%s) for %stune=%s %s",
3765 opts->x_ix86_tune_string, prefix, suffix, sw);
3766
3767 set_ix86_tune_features (ix86_tune, opts->x_ix86_dump_tunes);
3768
3769 #ifndef USE_IX86_FRAME_POINTER
3770 #define USE_IX86_FRAME_POINTER 0
3771 #endif
3772
3773 #ifndef USE_X86_64_FRAME_POINTER
3774 #define USE_X86_64_FRAME_POINTER 0
3775 #endif
3776
3777 /* Set the default values for switches whose default depends on TARGET_64BIT
3778 in case they weren't overwritten by command line options. */
3779 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3780 {
3781 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3782 opts->x_flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3783 if (opts->x_flag_asynchronous_unwind_tables
3784 && !opts_set->x_flag_unwind_tables
3785 && TARGET_64BIT_MS_ABI)
3786 opts->x_flag_unwind_tables = 1;
3787 if (opts->x_flag_asynchronous_unwind_tables == 2)
3788 opts->x_flag_unwind_tables
3789 = opts->x_flag_asynchronous_unwind_tables = 1;
3790 if (opts->x_flag_pcc_struct_return == 2)
3791 opts->x_flag_pcc_struct_return = 0;
3792 }
3793 else
3794 {
3795 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3796 opts->x_flag_omit_frame_pointer
3797 = !(USE_IX86_FRAME_POINTER || opts->x_optimize_size);
3798 if (opts->x_flag_asynchronous_unwind_tables == 2)
3799 opts->x_flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3800 if (opts->x_flag_pcc_struct_return == 2)
3801 opts->x_flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3802 }
3803
3804 ix86_tune_cost = processor_target_table[ix86_tune].cost;
3805 if (opts->x_optimize_size)
3806 ix86_cost = &ix86_size_cost;
3807 else
3808 ix86_cost = ix86_tune_cost;
3809
3810 /* Arrange to set up i386_stack_locals for all functions. */
3811 init_machine_status = ix86_init_machine_status;
3812
3813 /* Validate -mregparm= value. */
3814 if (opts_set->x_ix86_regparm)
3815 {
3816 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3817 warning (0, "-mregparm is ignored in 64-bit mode");
3818 if (opts->x_ix86_regparm > REGPARM_MAX)
3819 {
3820 error ("-mregparm=%d is not between 0 and %d",
3821 opts->x_ix86_regparm, REGPARM_MAX);
3822 opts->x_ix86_regparm = 0;
3823 }
3824 }
3825 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3826 opts->x_ix86_regparm = REGPARM_MAX;
3827
3828 /* Default align_* from the processor table. */
3829 if (opts->x_align_loops == 0)
3830 {
3831 opts->x_align_loops = processor_target_table[ix86_tune].align_loop;
3832 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3833 }
3834 if (opts->x_align_jumps == 0)
3835 {
3836 opts->x_align_jumps = processor_target_table[ix86_tune].align_jump;
3837 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3838 }
3839 if (opts->x_align_functions == 0)
3840 {
3841 opts->x_align_functions = processor_target_table[ix86_tune].align_func;
3842 }
3843
3844 /* Provide default for -mbranch-cost= value. */
3845 if (!opts_set->x_ix86_branch_cost)
3846 opts->x_ix86_branch_cost = ix86_cost->branch_cost;
3847
3848 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3849 {
3850 opts->x_target_flags
3851 |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags;
3852
3853 /* Enable by default the SSE and MMX builtins. Do allow the user to
3854 explicitly disable any of these. In particular, disabling SSE and
3855 MMX for kernel code is extremely useful. */
3856 if (!ix86_arch_specified)
3857 opts->x_ix86_isa_flags
3858 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3859 | TARGET_SUBTARGET64_ISA_DEFAULT)
3860 & ~opts->x_ix86_isa_flags_explicit);
3861
3862 if (TARGET_RTD_P (opts->x_target_flags))
3863 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3864 }
3865 else
3866 {
3867 opts->x_target_flags
3868 |= TARGET_SUBTARGET32_DEFAULT & ~opts_set->x_target_flags;
3869
3870 if (!ix86_arch_specified)
3871 opts->x_ix86_isa_flags
3872 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit;
3873
3874 /* i386 ABI does not specify red zone. It still makes sense to use it
3875 when programmer takes care to stack from being destroyed. */
3876 if (!(opts_set->x_target_flags & MASK_NO_RED_ZONE))
3877 opts->x_target_flags |= MASK_NO_RED_ZONE;
3878 }
3879
3880 /* Keep nonleaf frame pointers. */
3881 if (opts->x_flag_omit_frame_pointer)
3882 opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3883 else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags))
3884 opts->x_flag_omit_frame_pointer = 1;
3885
3886 /* If we're doing fast math, we don't care about comparison order
3887 wrt NaNs. This lets us use a shorter comparison sequence. */
3888 if (opts->x_flag_finite_math_only)
3889 opts->x_target_flags &= ~MASK_IEEE_FP;
3890
3891 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3892 since the insns won't need emulation. */
3893 if (ix86_tune_features [X86_TUNE_ALWAYS_FANCY_MATH_387])
3894 opts->x_target_flags &= ~MASK_NO_FANCY_MATH_387;
3895
3896 /* Likewise, if the target doesn't have a 387, or we've specified
3897 software floating point, don't use 387 inline intrinsics. */
3898 if (!TARGET_80387_P (opts->x_target_flags))
3899 opts->x_target_flags |= MASK_NO_FANCY_MATH_387;
3900
3901 /* Turn on MMX builtins for -msse. */
3902 if (TARGET_SSE_P (opts->x_ix86_isa_flags))
3903 opts->x_ix86_isa_flags
3904 |= OPTION_MASK_ISA_MMX & ~opts->x_ix86_isa_flags_explicit;
3905
3906 /* Enable SSE prefetch. */
3907 if (TARGET_SSE_P (opts->x_ix86_isa_flags)
3908 || (TARGET_PRFCHW && !TARGET_3DNOW_P (opts->x_ix86_isa_flags)))
3909 x86_prefetch_sse = true;
3910
3911 /* Enable prefetch{,w} instructions for -m3dnow and -mprefetchwt1. */
3912 if (TARGET_3DNOW_P (opts->x_ix86_isa_flags)
3913 || TARGET_PREFETCHWT1_P (opts->x_ix86_isa_flags))
3914 opts->x_ix86_isa_flags
3915 |= OPTION_MASK_ISA_PRFCHW & ~opts->x_ix86_isa_flags_explicit;
3916
3917 /* Enable popcnt instruction for -msse4.2 or -mabm. */
3918 if (TARGET_SSE4_2_P (opts->x_ix86_isa_flags)
3919 || TARGET_ABM_P (opts->x_ix86_isa_flags))
3920 opts->x_ix86_isa_flags
3921 |= OPTION_MASK_ISA_POPCNT & ~opts->x_ix86_isa_flags_explicit;
3922
3923 /* Enable lzcnt instruction for -mabm. */
3924 if (TARGET_ABM_P(opts->x_ix86_isa_flags))
3925 opts->x_ix86_isa_flags
3926 |= OPTION_MASK_ISA_LZCNT & ~opts->x_ix86_isa_flags_explicit;
3927
3928 /* Validate -mpreferred-stack-boundary= value or default it to
3929 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3930 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3931 if (opts_set->x_ix86_preferred_stack_boundary_arg)
3932 {
3933 int min = (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3934 ? (TARGET_SSE_P (opts->x_ix86_isa_flags) ? 4 : 3) : 2);
3935 int max = (TARGET_SEH ? 4 : 12);
3936
3937 if (opts->x_ix86_preferred_stack_boundary_arg < min
3938 || opts->x_ix86_preferred_stack_boundary_arg > max)
3939 {
3940 if (min == max)
3941 error ("-mpreferred-stack-boundary is not supported "
3942 "for this target");
3943 else
3944 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3945 opts->x_ix86_preferred_stack_boundary_arg, min, max);
3946 }
3947 else
3948 ix86_preferred_stack_boundary
3949 = (1 << opts->x_ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
3950 }
3951
3952 /* Set the default value for -mstackrealign. */
3953 if (opts->x_ix86_force_align_arg_pointer == -1)
3954 opts->x_ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3955
3956 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3957
3958 /* Validate -mincoming-stack-boundary= value or default it to
3959 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3960 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3961 if (opts_set->x_ix86_incoming_stack_boundary_arg)
3962 {
3963 if (opts->x_ix86_incoming_stack_boundary_arg
3964 < (TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2)
3965 || opts->x_ix86_incoming_stack_boundary_arg > 12)
3966 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3967 opts->x_ix86_incoming_stack_boundary_arg,
3968 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2);
3969 else
3970 {
3971 ix86_user_incoming_stack_boundary
3972 = (1 << opts->x_ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
3973 ix86_incoming_stack_boundary
3974 = ix86_user_incoming_stack_boundary;
3975 }
3976 }
3977
3978 #ifndef NO_PROFILE_COUNTERS
3979 if (flag_nop_mcount)
3980 error ("-mnop-mcount is not compatible with this target");
3981 #endif
3982 if (flag_nop_mcount && flag_pic)
3983 error ("-mnop-mcount is not implemented for -fPIC");
3984
3985 /* Accept -msseregparm only if at least SSE support is enabled. */
3986 if (TARGET_SSEREGPARM_P (opts->x_target_flags)
3987 && ! TARGET_SSE_P (opts->x_ix86_isa_flags))
3988 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3989
3990 if (opts_set->x_ix86_fpmath)
3991 {
3992 if (opts->x_ix86_fpmath & FPMATH_SSE)
3993 {
3994 if (!TARGET_SSE_P (opts->x_ix86_isa_flags))
3995 {
3996 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3997 opts->x_ix86_fpmath = FPMATH_387;
3998 }
3999 else if ((opts->x_ix86_fpmath & FPMATH_387)
4000 && !TARGET_80387_P (opts->x_target_flags))
4001 {
4002 warning (0, "387 instruction set disabled, using SSE arithmetics");
4003 opts->x_ix86_fpmath = FPMATH_SSE;
4004 }
4005 }
4006 }
4007 /* For all chips supporting SSE2, -mfpmath=sse performs better than
4008 fpmath=387. The second is however default at many targets since the
4009 extra 80bit precision of temporaries is considered to be part of ABI.
4010 Overwrite the default at least for -ffast-math.
4011 TODO: -mfpmath=both seems to produce same performing code with bit
4012 smaller binaries. It is however not clear if register allocation is
4013 ready for this setting.
4014 Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE
4015 codegen. We may switch to 387 with -ffast-math for size optimized
4016 functions. */
4017 else if (fast_math_flags_set_p (&global_options)
4018 && TARGET_SSE2_P (opts->x_ix86_isa_flags))
4019 opts->x_ix86_fpmath = FPMATH_SSE;
4020 else
4021 opts->x_ix86_fpmath = TARGET_FPMATH_DEFAULT_P (opts->x_ix86_isa_flags);
4022
4023 /* If the i387 is disabled, then do not return values in it. */
4024 if (!TARGET_80387_P (opts->x_target_flags))
4025 opts->x_target_flags &= ~MASK_FLOAT_RETURNS;
4026
4027 /* Use external vectorized library in vectorizing intrinsics. */
4028 if (opts_set->x_ix86_veclibabi_type)
4029 switch (opts->x_ix86_veclibabi_type)
4030 {
4031 case ix86_veclibabi_type_svml:
4032 ix86_veclib_handler = ix86_veclibabi_svml;
4033 break;
4034
4035 case ix86_veclibabi_type_acml:
4036 ix86_veclib_handler = ix86_veclibabi_acml;
4037 break;
4038
4039 default:
4040 gcc_unreachable ();
4041 }
4042
4043 if (ix86_tune_features [X86_TUNE_ACCUMULATE_OUTGOING_ARGS]
4044 && !(opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4045 && !opts->x_optimize_size)
4046 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4047
4048 /* If stack probes are required, the space used for large function
4049 arguments on the stack must also be probed, so enable
4050 -maccumulate-outgoing-args so this happens in the prologue. */
4051 if (TARGET_STACK_PROBE_P (opts->x_target_flags)
4052 && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4053 {
4054 if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4055 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
4056 "for correctness", prefix, suffix);
4057 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4058 }
4059
4060 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
4061 {
4062 char *p;
4063 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
4064 p = strchr (internal_label_prefix, 'X');
4065 internal_label_prefix_len = p - internal_label_prefix;
4066 *p = '\0';
4067 }
4068
4069 /* When scheduling description is not available, disable scheduler pass
4070 so it won't slow down the compilation and make x87 code slower. */
4071 if (!TARGET_SCHEDULE)
4072 opts->x_flag_schedule_insns_after_reload = opts->x_flag_schedule_insns = 0;
4073
4074 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4075 ix86_tune_cost->simultaneous_prefetches,
4076 opts->x_param_values,
4077 opts_set->x_param_values);
4078 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4079 ix86_tune_cost->prefetch_block,
4080 opts->x_param_values,
4081 opts_set->x_param_values);
4082 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
4083 ix86_tune_cost->l1_cache_size,
4084 opts->x_param_values,
4085 opts_set->x_param_values);
4086 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
4087 ix86_tune_cost->l2_cache_size,
4088 opts->x_param_values,
4089 opts_set->x_param_values);
4090
4091 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
4092 if (opts->x_flag_prefetch_loop_arrays < 0
4093 && HAVE_prefetch
4094 && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
4095 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
4096 opts->x_flag_prefetch_loop_arrays = 1;
4097
4098 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
4099 can be opts->x_optimized to ap = __builtin_next_arg (0). */
4100 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && !opts->x_flag_split_stack)
4101 targetm.expand_builtin_va_start = NULL;
4102
4103 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4104 {
4105 ix86_gen_leave = gen_leave_rex64;
4106 if (Pmode == DImode)
4107 {
4108 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_di;
4109 ix86_gen_tls_local_dynamic_base_64
4110 = gen_tls_local_dynamic_base_64_di;
4111 }
4112 else
4113 {
4114 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_si;
4115 ix86_gen_tls_local_dynamic_base_64
4116 = gen_tls_local_dynamic_base_64_si;
4117 }
4118 }
4119 else
4120 ix86_gen_leave = gen_leave;
4121
4122 if (Pmode == DImode)
4123 {
4124 ix86_gen_add3 = gen_adddi3;
4125 ix86_gen_sub3 = gen_subdi3;
4126 ix86_gen_sub3_carry = gen_subdi3_carry;
4127 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
4128 ix86_gen_andsp = gen_anddi3;
4129 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
4130 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
4131 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
4132 ix86_gen_monitor = gen_sse3_monitor_di;
4133 }
4134 else
4135 {
4136 ix86_gen_add3 = gen_addsi3;
4137 ix86_gen_sub3 = gen_subsi3;
4138 ix86_gen_sub3_carry = gen_subsi3_carry;
4139 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
4140 ix86_gen_andsp = gen_andsi3;
4141 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
4142 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
4143 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
4144 ix86_gen_monitor = gen_sse3_monitor_si;
4145 }
4146
4147 #ifdef USE_IX86_CLD
4148 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
4149 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4150 opts->x_target_flags |= MASK_CLD & ~opts_set->x_target_flags;
4151 #endif
4152
4153 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && opts->x_flag_pic)
4154 {
4155 if (opts->x_flag_fentry > 0)
4156 sorry ("-mfentry isn%'t supported for 32-bit in combination "
4157 "with -fpic");
4158 opts->x_flag_fentry = 0;
4159 }
4160 else if (TARGET_SEH)
4161 {
4162 if (opts->x_flag_fentry == 0)
4163 sorry ("-mno-fentry isn%'t compatible with SEH");
4164 opts->x_flag_fentry = 1;
4165 }
4166 else if (opts->x_flag_fentry < 0)
4167 {
4168 #if defined(PROFILE_BEFORE_PROLOGUE)
4169 opts->x_flag_fentry = 1;
4170 #else
4171 opts->x_flag_fentry = 0;
4172 #endif
4173 }
4174
4175 /* When not opts->x_optimize for size, enable vzeroupper optimization for
4176 TARGET_AVX with -fexpensive-optimizations and split 32-byte
4177 AVX unaligned load/store. */
4178 if (!opts->x_optimize_size)
4179 {
4180 if (flag_expensive_optimizations
4181 && !(opts_set->x_target_flags & MASK_VZEROUPPER))
4182 opts->x_target_flags |= MASK_VZEROUPPER;
4183 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL]
4184 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
4185 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
4186 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL]
4187 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE))
4188 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
4189 /* Enable 128-bit AVX instruction generation
4190 for the auto-vectorizer. */
4191 if (TARGET_AVX128_OPTIMAL
4192 && !(opts_set->x_target_flags & MASK_PREFER_AVX128))
4193 opts->x_target_flags |= MASK_PREFER_AVX128;
4194 }
4195
4196 if (opts->x_ix86_recip_name)
4197 {
4198 char *p = ASTRDUP (opts->x_ix86_recip_name);
4199 char *q;
4200 unsigned int mask, i;
4201 bool invert;
4202
4203 while ((q = strtok (p, ",")) != NULL)
4204 {
4205 p = NULL;
4206 if (*q == '!')
4207 {
4208 invert = true;
4209 q++;
4210 }
4211 else
4212 invert = false;
4213
4214 if (!strcmp (q, "default"))
4215 mask = RECIP_MASK_ALL;
4216 else
4217 {
4218 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4219 if (!strcmp (q, recip_options[i].string))
4220 {
4221 mask = recip_options[i].mask;
4222 break;
4223 }
4224
4225 if (i == ARRAY_SIZE (recip_options))
4226 {
4227 error ("unknown option for -mrecip=%s", q);
4228 invert = false;
4229 mask = RECIP_MASK_NONE;
4230 }
4231 }
4232
4233 opts->x_recip_mask_explicit |= mask;
4234 if (invert)
4235 opts->x_recip_mask &= ~mask;
4236 else
4237 opts->x_recip_mask |= mask;
4238 }
4239 }
4240
4241 if (TARGET_RECIP_P (opts->x_target_flags))
4242 opts->x_recip_mask |= RECIP_MASK_ALL & ~opts->x_recip_mask_explicit;
4243 else if (opts_set->x_target_flags & MASK_RECIP)
4244 opts->x_recip_mask &= ~(RECIP_MASK_ALL & ~opts->x_recip_mask_explicit);
4245
4246 /* Default long double to 64-bit for 32-bit Bionic and to __float128
4247 for 64-bit Bionic. */
4248 if (TARGET_HAS_BIONIC
4249 && !(opts_set->x_target_flags
4250 & (MASK_LONG_DOUBLE_64 | MASK_LONG_DOUBLE_128)))
4251 opts->x_target_flags |= (TARGET_64BIT
4252 ? MASK_LONG_DOUBLE_128
4253 : MASK_LONG_DOUBLE_64);
4254
4255 /* Only one of them can be active. */
4256 gcc_assert ((opts->x_target_flags & MASK_LONG_DOUBLE_64) == 0
4257 || (opts->x_target_flags & MASK_LONG_DOUBLE_128) == 0);
4258
4259 /* Save the initial options in case the user does function specific
4260 options. */
4261 if (main_args_p)
4262 target_option_default_node = target_option_current_node
4263 = build_target_option_node (opts);
4264
4265 /* Handle stack protector */
4266 if (!opts_set->x_ix86_stack_protector_guard)
4267 opts->x_ix86_stack_protector_guard
4268 = TARGET_HAS_BIONIC ? SSP_GLOBAL : SSP_TLS;
4269
4270 /* Handle -mmemcpy-strategy= and -mmemset-strategy= */
4271 if (opts->x_ix86_tune_memcpy_strategy)
4272 {
4273 char *str = xstrdup (opts->x_ix86_tune_memcpy_strategy);
4274 ix86_parse_stringop_strategy_string (str, false);
4275 free (str);
4276 }
4277
4278 if (opts->x_ix86_tune_memset_strategy)
4279 {
4280 char *str = xstrdup (opts->x_ix86_tune_memset_strategy);
4281 ix86_parse_stringop_strategy_string (str, true);
4282 free (str);
4283 }
4284 }
4285
4286 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4287
4288 static void
4289 ix86_option_override (void)
4290 {
4291 opt_pass *pass_insert_vzeroupper = make_pass_insert_vzeroupper (g);
4292 static struct register_pass_info insert_vzeroupper_info
4293 = { pass_insert_vzeroupper, "reload",
4294 1, PASS_POS_INSERT_AFTER
4295 };
4296
4297 ix86_option_override_internal (true, &global_options, &global_options_set);
4298
4299
4300 /* This needs to be done at start up. It's convenient to do it here. */
4301 register_pass (&insert_vzeroupper_info);
4302 }
4303
4304 /* Update register usage after having seen the compiler flags. */
4305
4306 static void
4307 ix86_conditional_register_usage (void)
4308 {
4309 int i, c_mask;
4310 unsigned int j;
4311
4312 /* The PIC register, if it exists, is fixed. */
4313 j = PIC_OFFSET_TABLE_REGNUM;
4314 if (j != INVALID_REGNUM)
4315 fixed_regs[j] = call_used_regs[j] = 1;
4316
4317 /* For 32-bit targets, squash the REX registers. */
4318 if (! TARGET_64BIT)
4319 {
4320 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
4321 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4322 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4323 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4324 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4325 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4326 }
4327
4328 /* See the definition of CALL_USED_REGISTERS in i386.h. */
4329 c_mask = (TARGET_64BIT_MS_ABI ? (1 << 3)
4330 : TARGET_64BIT ? (1 << 2)
4331 : (1 << 1));
4332
4333 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
4334
4335 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4336 {
4337 /* Set/reset conditionally defined registers from
4338 CALL_USED_REGISTERS initializer. */
4339 if (call_used_regs[i] > 1)
4340 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
4341
4342 /* Calculate registers of CLOBBERED_REGS register set
4343 as call used registers from GENERAL_REGS register set. */
4344 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
4345 && call_used_regs[i])
4346 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
4347 }
4348
4349 /* If MMX is disabled, squash the registers. */
4350 if (! TARGET_MMX)
4351 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4352 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
4353 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4354
4355 /* If SSE is disabled, squash the registers. */
4356 if (! TARGET_SSE)
4357 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4358 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
4359 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4360
4361 /* If the FPU is disabled, squash the registers. */
4362 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
4363 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4364 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
4365 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4366
4367 /* If AVX512F is disabled, squash the registers. */
4368 if (! TARGET_AVX512F)
4369 {
4370 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4371 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4372
4373 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
4374 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4375 }
4376 }
4377
4378 \f
4379 /* Save the current options */
4380
4381 static void
4382 ix86_function_specific_save (struct cl_target_option *ptr,
4383 struct gcc_options *opts)
4384 {
4385 ptr->arch = ix86_arch;
4386 ptr->schedule = ix86_schedule;
4387 ptr->tune = ix86_tune;
4388 ptr->branch_cost = ix86_branch_cost;
4389 ptr->tune_defaulted = ix86_tune_defaulted;
4390 ptr->arch_specified = ix86_arch_specified;
4391 ptr->x_ix86_isa_flags_explicit = opts->x_ix86_isa_flags_explicit;
4392 ptr->x_ix86_target_flags_explicit = opts->x_ix86_target_flags_explicit;
4393 ptr->x_recip_mask_explicit = opts->x_recip_mask_explicit;
4394 ptr->x_ix86_arch_string = opts->x_ix86_arch_string;
4395 ptr->x_ix86_tune_string = opts->x_ix86_tune_string;
4396 ptr->x_ix86_cmodel = opts->x_ix86_cmodel;
4397 ptr->x_ix86_abi = opts->x_ix86_abi;
4398 ptr->x_ix86_asm_dialect = opts->x_ix86_asm_dialect;
4399 ptr->x_ix86_branch_cost = opts->x_ix86_branch_cost;
4400 ptr->x_ix86_dump_tunes = opts->x_ix86_dump_tunes;
4401 ptr->x_ix86_force_align_arg_pointer = opts->x_ix86_force_align_arg_pointer;
4402 ptr->x_ix86_force_drap = opts->x_ix86_force_drap;
4403 ptr->x_ix86_incoming_stack_boundary_arg = opts->x_ix86_incoming_stack_boundary_arg;
4404 ptr->x_ix86_pmode = opts->x_ix86_pmode;
4405 ptr->x_ix86_preferred_stack_boundary_arg = opts->x_ix86_preferred_stack_boundary_arg;
4406 ptr->x_ix86_recip_name = opts->x_ix86_recip_name;
4407 ptr->x_ix86_regparm = opts->x_ix86_regparm;
4408 ptr->x_ix86_section_threshold = opts->x_ix86_section_threshold;
4409 ptr->x_ix86_sse2avx = opts->x_ix86_sse2avx;
4410 ptr->x_ix86_stack_protector_guard = opts->x_ix86_stack_protector_guard;
4411 ptr->x_ix86_stringop_alg = opts->x_ix86_stringop_alg;
4412 ptr->x_ix86_tls_dialect = opts->x_ix86_tls_dialect;
4413 ptr->x_ix86_tune_ctrl_string = opts->x_ix86_tune_ctrl_string;
4414 ptr->x_ix86_tune_memcpy_strategy = opts->x_ix86_tune_memcpy_strategy;
4415 ptr->x_ix86_tune_memset_strategy = opts->x_ix86_tune_memset_strategy;
4416 ptr->x_ix86_tune_no_default = opts->x_ix86_tune_no_default;
4417 ptr->x_ix86_veclibabi_type = opts->x_ix86_veclibabi_type;
4418
4419 /* The fields are char but the variables are not; make sure the
4420 values fit in the fields. */
4421 gcc_assert (ptr->arch == ix86_arch);
4422 gcc_assert (ptr->schedule == ix86_schedule);
4423 gcc_assert (ptr->tune == ix86_tune);
4424 gcc_assert (ptr->branch_cost == ix86_branch_cost);
4425 }
4426
4427 /* Restore the current options */
4428
4429 static void
4430 ix86_function_specific_restore (struct gcc_options *opts,
4431 struct cl_target_option *ptr)
4432 {
4433 enum processor_type old_tune = ix86_tune;
4434 enum processor_type old_arch = ix86_arch;
4435 unsigned int ix86_arch_mask;
4436 int i;
4437
4438 /* We don't change -fPIC. */
4439 opts->x_flag_pic = flag_pic;
4440
4441 ix86_arch = (enum processor_type) ptr->arch;
4442 ix86_schedule = (enum attr_cpu) ptr->schedule;
4443 ix86_tune = (enum processor_type) ptr->tune;
4444 opts->x_ix86_branch_cost = ptr->branch_cost;
4445 ix86_tune_defaulted = ptr->tune_defaulted;
4446 ix86_arch_specified = ptr->arch_specified;
4447 opts->x_ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
4448 opts->x_ix86_target_flags_explicit = ptr->x_ix86_target_flags_explicit;
4449 opts->x_recip_mask_explicit = ptr->x_recip_mask_explicit;
4450 opts->x_ix86_arch_string = ptr->x_ix86_arch_string;
4451 opts->x_ix86_tune_string = ptr->x_ix86_tune_string;
4452 opts->x_ix86_cmodel = ptr->x_ix86_cmodel;
4453 opts->x_ix86_abi = ptr->x_ix86_abi;
4454 opts->x_ix86_asm_dialect = ptr->x_ix86_asm_dialect;
4455 opts->x_ix86_branch_cost = ptr->x_ix86_branch_cost;
4456 opts->x_ix86_dump_tunes = ptr->x_ix86_dump_tunes;
4457 opts->x_ix86_force_align_arg_pointer = ptr->x_ix86_force_align_arg_pointer;
4458 opts->x_ix86_force_drap = ptr->x_ix86_force_drap;
4459 opts->x_ix86_incoming_stack_boundary_arg = ptr->x_ix86_incoming_stack_boundary_arg;
4460 opts->x_ix86_pmode = ptr->x_ix86_pmode;
4461 opts->x_ix86_preferred_stack_boundary_arg = ptr->x_ix86_preferred_stack_boundary_arg;
4462 opts->x_ix86_recip_name = ptr->x_ix86_recip_name;
4463 opts->x_ix86_regparm = ptr->x_ix86_regparm;
4464 opts->x_ix86_section_threshold = ptr->x_ix86_section_threshold;
4465 opts->x_ix86_sse2avx = ptr->x_ix86_sse2avx;
4466 opts->x_ix86_stack_protector_guard = ptr->x_ix86_stack_protector_guard;
4467 opts->x_ix86_stringop_alg = ptr->x_ix86_stringop_alg;
4468 opts->x_ix86_tls_dialect = ptr->x_ix86_tls_dialect;
4469 opts->x_ix86_tune_ctrl_string = ptr->x_ix86_tune_ctrl_string;
4470 opts->x_ix86_tune_memcpy_strategy = ptr->x_ix86_tune_memcpy_strategy;
4471 opts->x_ix86_tune_memset_strategy = ptr->x_ix86_tune_memset_strategy;
4472 opts->x_ix86_tune_no_default = ptr->x_ix86_tune_no_default;
4473 opts->x_ix86_veclibabi_type = ptr->x_ix86_veclibabi_type;
4474
4475 /* Recreate the arch feature tests if the arch changed */
4476 if (old_arch != ix86_arch)
4477 {
4478 ix86_arch_mask = 1u << ix86_arch;
4479 for (i = 0; i < X86_ARCH_LAST; ++i)
4480 ix86_arch_features[i]
4481 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4482 }
4483
4484 /* Recreate the tune optimization tests */
4485 if (old_tune != ix86_tune)
4486 set_ix86_tune_features (ix86_tune, false);
4487 }
4488
4489 /* Print the current options */
4490
4491 static void
4492 ix86_function_specific_print (FILE *file, int indent,
4493 struct cl_target_option *ptr)
4494 {
4495 char *target_string
4496 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
4497 NULL, NULL, ptr->x_ix86_fpmath, false);
4498
4499 gcc_assert (ptr->arch < PROCESSOR_max);
4500 fprintf (file, "%*sarch = %d (%s)\n",
4501 indent, "",
4502 ptr->arch, processor_target_table[ptr->arch].name);
4503
4504 gcc_assert (ptr->tune < PROCESSOR_max);
4505 fprintf (file, "%*stune = %d (%s)\n",
4506 indent, "",
4507 ptr->tune, processor_target_table[ptr->tune].name);
4508
4509 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4510
4511 if (target_string)
4512 {
4513 fprintf (file, "%*s%s\n", indent, "", target_string);
4514 free (target_string);
4515 }
4516 }
4517
4518 \f
4519 /* Inner function to process the attribute((target(...))), take an argument and
4520 set the current options from the argument. If we have a list, recursively go
4521 over the list. */
4522
4523 static bool
4524 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
4525 struct gcc_options *opts,
4526 struct gcc_options *opts_set,
4527 struct gcc_options *enum_opts_set)
4528 {
4529 char *next_optstr;
4530 bool ret = true;
4531
4532 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4533 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4534 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4535 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4536 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4537
4538 enum ix86_opt_type
4539 {
4540 ix86_opt_unknown,
4541 ix86_opt_yes,
4542 ix86_opt_no,
4543 ix86_opt_str,
4544 ix86_opt_enum,
4545 ix86_opt_isa
4546 };
4547
4548 static const struct
4549 {
4550 const char *string;
4551 size_t len;
4552 enum ix86_opt_type type;
4553 int opt;
4554 int mask;
4555 } attrs[] = {
4556 /* isa options */
4557 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4558 IX86_ATTR_ISA ("abm", OPT_mabm),
4559 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4560 IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
4561 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
4562 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4563 IX86_ATTR_ISA ("aes", OPT_maes),
4564 IX86_ATTR_ISA ("sha", OPT_msha),
4565 IX86_ATTR_ISA ("avx", OPT_mavx),
4566 IX86_ATTR_ISA ("avx2", OPT_mavx2),
4567 IX86_ATTR_ISA ("avx512f", OPT_mavx512f),
4568 IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf),
4569 IX86_ATTR_ISA ("avx512er", OPT_mavx512er),
4570 IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd),
4571 IX86_ATTR_ISA ("avx512dq", OPT_mavx512dq),
4572 IX86_ATTR_ISA ("avx512bw", OPT_mavx512bw),
4573 IX86_ATTR_ISA ("avx512vl", OPT_mavx512vl),
4574 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4575 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4576 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4577 IX86_ATTR_ISA ("sse", OPT_msse),
4578 IX86_ATTR_ISA ("sse2", OPT_msse2),
4579 IX86_ATTR_ISA ("sse3", OPT_msse3),
4580 IX86_ATTR_ISA ("sse4", OPT_msse4),
4581 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4582 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4583 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4584 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4585 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4586 IX86_ATTR_ISA ("fma", OPT_mfma),
4587 IX86_ATTR_ISA ("xop", OPT_mxop),
4588 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4589 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4590 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4591 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4592 IX86_ATTR_ISA ("rtm", OPT_mrtm),
4593 IX86_ATTR_ISA ("hle", OPT_mhle),
4594 IX86_ATTR_ISA ("prfchw", OPT_mprfchw),
4595 IX86_ATTR_ISA ("rdseed", OPT_mrdseed),
4596 IX86_ATTR_ISA ("adx", OPT_madx),
4597 IX86_ATTR_ISA ("fxsr", OPT_mfxsr),
4598 IX86_ATTR_ISA ("xsave", OPT_mxsave),
4599 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt),
4600 IX86_ATTR_ISA ("prefetchwt1", OPT_mprefetchwt1),
4601 IX86_ATTR_ISA ("clflushopt", OPT_mclflushopt),
4602 IX86_ATTR_ISA ("xsavec", OPT_mxsavec),
4603 IX86_ATTR_ISA ("xsaves", OPT_mxsaves),
4604
4605 /* enum options */
4606 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
4607
4608 /* string options */
4609 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4610 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4611
4612 /* flag options */
4613 IX86_ATTR_YES ("cld",
4614 OPT_mcld,
4615 MASK_CLD),
4616
4617 IX86_ATTR_NO ("fancy-math-387",
4618 OPT_mfancy_math_387,
4619 MASK_NO_FANCY_MATH_387),
4620
4621 IX86_ATTR_YES ("ieee-fp",
4622 OPT_mieee_fp,
4623 MASK_IEEE_FP),
4624
4625 IX86_ATTR_YES ("inline-all-stringops",
4626 OPT_minline_all_stringops,
4627 MASK_INLINE_ALL_STRINGOPS),
4628
4629 IX86_ATTR_YES ("inline-stringops-dynamically",
4630 OPT_minline_stringops_dynamically,
4631 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4632
4633 IX86_ATTR_NO ("align-stringops",
4634 OPT_mno_align_stringops,
4635 MASK_NO_ALIGN_STRINGOPS),
4636
4637 IX86_ATTR_YES ("recip",
4638 OPT_mrecip,
4639 MASK_RECIP),
4640
4641 };
4642
4643 /* If this is a list, recurse to get the options. */
4644 if (TREE_CODE (args) == TREE_LIST)
4645 {
4646 bool ret = true;
4647
4648 for (; args; args = TREE_CHAIN (args))
4649 if (TREE_VALUE (args)
4650 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
4651 p_strings, opts, opts_set,
4652 enum_opts_set))
4653 ret = false;
4654
4655 return ret;
4656 }
4657
4658 else if (TREE_CODE (args) != STRING_CST)
4659 {
4660 error ("attribute %<target%> argument not a string");
4661 return false;
4662 }
4663
4664 /* Handle multiple arguments separated by commas. */
4665 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4666
4667 while (next_optstr && *next_optstr != '\0')
4668 {
4669 char *p = next_optstr;
4670 char *orig_p = p;
4671 char *comma = strchr (next_optstr, ',');
4672 const char *opt_string;
4673 size_t len, opt_len;
4674 int opt;
4675 bool opt_set_p;
4676 char ch;
4677 unsigned i;
4678 enum ix86_opt_type type = ix86_opt_unknown;
4679 int mask = 0;
4680
4681 if (comma)
4682 {
4683 *comma = '\0';
4684 len = comma - next_optstr;
4685 next_optstr = comma + 1;
4686 }
4687 else
4688 {
4689 len = strlen (p);
4690 next_optstr = NULL;
4691 }
4692
4693 /* Recognize no-xxx. */
4694 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4695 {
4696 opt_set_p = false;
4697 p += 3;
4698 len -= 3;
4699 }
4700 else
4701 opt_set_p = true;
4702
4703 /* Find the option. */
4704 ch = *p;
4705 opt = N_OPTS;
4706 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4707 {
4708 type = attrs[i].type;
4709 opt_len = attrs[i].len;
4710 if (ch == attrs[i].string[0]
4711 && ((type != ix86_opt_str && type != ix86_opt_enum)
4712 ? len == opt_len
4713 : len > opt_len)
4714 && memcmp (p, attrs[i].string, opt_len) == 0)
4715 {
4716 opt = attrs[i].opt;
4717 mask = attrs[i].mask;
4718 opt_string = attrs[i].string;
4719 break;
4720 }
4721 }
4722
4723 /* Process the option. */
4724 if (opt == N_OPTS)
4725 {
4726 error ("attribute(target(\"%s\")) is unknown", orig_p);
4727 ret = false;
4728 }
4729
4730 else if (type == ix86_opt_isa)
4731 {
4732 struct cl_decoded_option decoded;
4733
4734 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
4735 ix86_handle_option (opts, opts_set,
4736 &decoded, input_location);
4737 }
4738
4739 else if (type == ix86_opt_yes || type == ix86_opt_no)
4740 {
4741 if (type == ix86_opt_no)
4742 opt_set_p = !opt_set_p;
4743
4744 if (opt_set_p)
4745 opts->x_target_flags |= mask;
4746 else
4747 opts->x_target_flags &= ~mask;
4748 }
4749
4750 else if (type == ix86_opt_str)
4751 {
4752 if (p_strings[opt])
4753 {
4754 error ("option(\"%s\") was already specified", opt_string);
4755 ret = false;
4756 }
4757 else
4758 p_strings[opt] = xstrdup (p + opt_len);
4759 }
4760
4761 else if (type == ix86_opt_enum)
4762 {
4763 bool arg_ok;
4764 int value;
4765
4766 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
4767 if (arg_ok)
4768 set_option (opts, enum_opts_set, opt, value,
4769 p + opt_len, DK_UNSPECIFIED, input_location,
4770 global_dc);
4771 else
4772 {
4773 error ("attribute(target(\"%s\")) is unknown", orig_p);
4774 ret = false;
4775 }
4776 }
4777
4778 else
4779 gcc_unreachable ();
4780 }
4781
4782 return ret;
4783 }
4784
4785 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4786
4787 tree
4788 ix86_valid_target_attribute_tree (tree args,
4789 struct gcc_options *opts,
4790 struct gcc_options *opts_set)
4791 {
4792 const char *orig_arch_string = opts->x_ix86_arch_string;
4793 const char *orig_tune_string = opts->x_ix86_tune_string;
4794 enum fpmath_unit orig_fpmath_set = opts_set->x_ix86_fpmath;
4795 int orig_tune_defaulted = ix86_tune_defaulted;
4796 int orig_arch_specified = ix86_arch_specified;
4797 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
4798 tree t = NULL_TREE;
4799 int i;
4800 struct cl_target_option *def
4801 = TREE_TARGET_OPTION (target_option_default_node);
4802 struct gcc_options enum_opts_set;
4803
4804 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
4805
4806 /* Process each of the options on the chain. */
4807 if (! ix86_valid_target_attribute_inner_p (args, option_strings, opts,
4808 opts_set, &enum_opts_set))
4809 return error_mark_node;
4810
4811 /* If the changed options are different from the default, rerun
4812 ix86_option_override_internal, and then save the options away.
4813 The string options are are attribute options, and will be undone
4814 when we copy the save structure. */
4815 if (opts->x_ix86_isa_flags != def->x_ix86_isa_flags
4816 || opts->x_target_flags != def->x_target_flags
4817 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4818 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4819 || enum_opts_set.x_ix86_fpmath)
4820 {
4821 /* If we are using the default tune= or arch=, undo the string assigned,
4822 and use the default. */
4823 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4824 opts->x_ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4825 else if (!orig_arch_specified)
4826 opts->x_ix86_arch_string = NULL;
4827
4828 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4829 opts->x_ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4830 else if (orig_tune_defaulted)
4831 opts->x_ix86_tune_string = NULL;
4832
4833 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4834 if (enum_opts_set.x_ix86_fpmath)
4835 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4836 else if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
4837 && TARGET_SSE_P (opts->x_ix86_isa_flags))
4838 {
4839 opts->x_ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
4840 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4841 }
4842
4843 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4844 ix86_option_override_internal (false, opts, opts_set);
4845
4846 /* Add any builtin functions with the new isa if any. */
4847 ix86_add_new_builtins (opts->x_ix86_isa_flags);
4848
4849 /* Save the current options unless we are validating options for
4850 #pragma. */
4851 t = build_target_option_node (opts);
4852
4853 opts->x_ix86_arch_string = orig_arch_string;
4854 opts->x_ix86_tune_string = orig_tune_string;
4855 opts_set->x_ix86_fpmath = orig_fpmath_set;
4856
4857 /* Free up memory allocated to hold the strings */
4858 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4859 free (option_strings[i]);
4860 }
4861
4862 return t;
4863 }
4864
4865 /* Hook to validate attribute((target("string"))). */
4866
4867 static bool
4868 ix86_valid_target_attribute_p (tree fndecl,
4869 tree ARG_UNUSED (name),
4870 tree args,
4871 int ARG_UNUSED (flags))
4872 {
4873 struct gcc_options func_options;
4874 tree new_target, new_optimize;
4875 bool ret = true;
4876
4877 /* attribute((target("default"))) does nothing, beyond
4878 affecting multi-versioning. */
4879 if (TREE_VALUE (args)
4880 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
4881 && TREE_CHAIN (args) == NULL_TREE
4882 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
4883 return true;
4884
4885 tree old_optimize = build_optimization_node (&global_options);
4886
4887 /* Get the optimization options of the current function. */
4888 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4889
4890 if (!func_optimize)
4891 func_optimize = old_optimize;
4892
4893 /* Init func_options. */
4894 memset (&func_options, 0, sizeof (func_options));
4895 init_options_struct (&func_options, NULL);
4896 lang_hooks.init_options_struct (&func_options);
4897
4898 cl_optimization_restore (&func_options,
4899 TREE_OPTIMIZATION (func_optimize));
4900
4901 /* Initialize func_options to the default before its target options can
4902 be set. */
4903 cl_target_option_restore (&func_options,
4904 TREE_TARGET_OPTION (target_option_default_node));
4905
4906 new_target = ix86_valid_target_attribute_tree (args, &func_options,
4907 &global_options_set);
4908
4909 new_optimize = build_optimization_node (&func_options);
4910
4911 if (new_target == error_mark_node)
4912 ret = false;
4913
4914 else if (fndecl && new_target)
4915 {
4916 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4917
4918 if (old_optimize != new_optimize)
4919 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4920 }
4921
4922 return ret;
4923 }
4924
4925 \f
4926 /* Hook to determine if one function can safely inline another. */
4927
4928 static bool
4929 ix86_can_inline_p (tree caller, tree callee)
4930 {
4931 bool ret = false;
4932 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
4933 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
4934
4935 /* If callee has no option attributes, then it is ok to inline. */
4936 if (!callee_tree)
4937 ret = true;
4938
4939 /* If caller has no option attributes, but callee does then it is not ok to
4940 inline. */
4941 else if (!caller_tree)
4942 ret = false;
4943
4944 else
4945 {
4946 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
4947 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
4948
4949 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4950 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4951 function. */
4952 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
4953 != callee_opts->x_ix86_isa_flags)
4954 ret = false;
4955
4956 /* See if we have the same non-isa options. */
4957 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
4958 ret = false;
4959
4960 /* See if arch, tune, etc. are the same. */
4961 else if (caller_opts->arch != callee_opts->arch)
4962 ret = false;
4963
4964 else if (caller_opts->tune != callee_opts->tune)
4965 ret = false;
4966
4967 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
4968 ret = false;
4969
4970 else if (caller_opts->branch_cost != callee_opts->branch_cost)
4971 ret = false;
4972
4973 else
4974 ret = true;
4975 }
4976
4977 return ret;
4978 }
4979
4980 \f
4981 /* Remember the last target of ix86_set_current_function. */
4982 static GTY(()) tree ix86_previous_fndecl;
4983
4984 /* Invalidate ix86_previous_fndecl cache. */
4985 void
4986 ix86_reset_previous_fndecl (void)
4987 {
4988 ix86_previous_fndecl = NULL_TREE;
4989 }
4990
4991 /* Establish appropriate back-end context for processing the function
4992 FNDECL. The argument might be NULL to indicate processing at top
4993 level, outside of any function scope. */
4994 static void
4995 ix86_set_current_function (tree fndecl)
4996 {
4997 /* Only change the context if the function changes. This hook is called
4998 several times in the course of compiling a function, and we don't want to
4999 slow things down too much or call target_reinit when it isn't safe. */
5000 if (fndecl && fndecl != ix86_previous_fndecl)
5001 {
5002 tree old_tree = (ix86_previous_fndecl
5003 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
5004 : NULL_TREE);
5005
5006 tree new_tree = (fndecl
5007 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
5008 : NULL_TREE);
5009
5010 ix86_previous_fndecl = fndecl;
5011 if (old_tree == new_tree)
5012 ;
5013
5014 else if (new_tree)
5015 {
5016 cl_target_option_restore (&global_options,
5017 TREE_TARGET_OPTION (new_tree));
5018 if (TREE_TARGET_GLOBALS (new_tree))
5019 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5020 else
5021 TREE_TARGET_GLOBALS (new_tree)
5022 = save_target_globals_default_opts ();
5023 }
5024
5025 else if (old_tree)
5026 {
5027 new_tree = target_option_current_node;
5028 cl_target_option_restore (&global_options,
5029 TREE_TARGET_OPTION (new_tree));
5030 if (TREE_TARGET_GLOBALS (new_tree))
5031 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5032 else if (new_tree == target_option_default_node)
5033 restore_target_globals (&default_target_globals);
5034 else
5035 TREE_TARGET_GLOBALS (new_tree)
5036 = save_target_globals_default_opts ();
5037 }
5038 }
5039 }
5040
5041 \f
5042 /* Return true if this goes in large data/bss. */
5043
5044 static bool
5045 ix86_in_large_data_p (tree exp)
5046 {
5047 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
5048 return false;
5049
5050 /* Functions are never large data. */
5051 if (TREE_CODE (exp) == FUNCTION_DECL)
5052 return false;
5053
5054 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
5055 {
5056 const char *section = DECL_SECTION_NAME (exp);
5057 if (strcmp (section, ".ldata") == 0
5058 || strcmp (section, ".lbss") == 0)
5059 return true;
5060 return false;
5061 }
5062 else
5063 {
5064 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
5065
5066 /* If this is an incomplete type with size 0, then we can't put it
5067 in data because it might be too big when completed. Also,
5068 int_size_in_bytes returns -1 if size can vary or is larger than
5069 an integer in which case also it is safer to assume that it goes in
5070 large data. */
5071 if (size <= 0 || size > ix86_section_threshold)
5072 return true;
5073 }
5074
5075 return false;
5076 }
5077
5078 /* Switch to the appropriate section for output of DECL.
5079 DECL is either a `VAR_DECL' node or a constant of some sort.
5080 RELOC indicates whether forming the initial value of DECL requires
5081 link-time relocations. */
5082
5083 ATTRIBUTE_UNUSED static section *
5084 x86_64_elf_select_section (tree decl, int reloc,
5085 unsigned HOST_WIDE_INT align)
5086 {
5087 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5088 && ix86_in_large_data_p (decl))
5089 {
5090 const char *sname = NULL;
5091 unsigned int flags = SECTION_WRITE;
5092 switch (categorize_decl_for_section (decl, reloc))
5093 {
5094 case SECCAT_DATA:
5095 sname = ".ldata";
5096 break;
5097 case SECCAT_DATA_REL:
5098 sname = ".ldata.rel";
5099 break;
5100 case SECCAT_DATA_REL_LOCAL:
5101 sname = ".ldata.rel.local";
5102 break;
5103 case SECCAT_DATA_REL_RO:
5104 sname = ".ldata.rel.ro";
5105 break;
5106 case SECCAT_DATA_REL_RO_LOCAL:
5107 sname = ".ldata.rel.ro.local";
5108 break;
5109 case SECCAT_BSS:
5110 sname = ".lbss";
5111 flags |= SECTION_BSS;
5112 break;
5113 case SECCAT_RODATA:
5114 case SECCAT_RODATA_MERGE_STR:
5115 case SECCAT_RODATA_MERGE_STR_INIT:
5116 case SECCAT_RODATA_MERGE_CONST:
5117 sname = ".lrodata";
5118 flags = 0;
5119 break;
5120 case SECCAT_SRODATA:
5121 case SECCAT_SDATA:
5122 case SECCAT_SBSS:
5123 gcc_unreachable ();
5124 case SECCAT_TEXT:
5125 case SECCAT_TDATA:
5126 case SECCAT_TBSS:
5127 /* We don't split these for medium model. Place them into
5128 default sections and hope for best. */
5129 break;
5130 }
5131 if (sname)
5132 {
5133 /* We might get called with string constants, but get_named_section
5134 doesn't like them as they are not DECLs. Also, we need to set
5135 flags in that case. */
5136 if (!DECL_P (decl))
5137 return get_section (sname, flags, NULL);
5138 return get_named_section (decl, sname, reloc);
5139 }
5140 }
5141 return default_elf_select_section (decl, reloc, align);
5142 }
5143
5144 /* Select a set of attributes for section NAME based on the properties
5145 of DECL and whether or not RELOC indicates that DECL's initializer
5146 might contain runtime relocations. */
5147
5148 static unsigned int ATTRIBUTE_UNUSED
5149 x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
5150 {
5151 unsigned int flags = default_section_type_flags (decl, name, reloc);
5152
5153 if (decl == NULL_TREE
5154 && (strcmp (name, ".ldata.rel.ro") == 0
5155 || strcmp (name, ".ldata.rel.ro.local") == 0))
5156 flags |= SECTION_RELRO;
5157
5158 if (strcmp (name, ".lbss") == 0
5159 || strncmp (name, ".lbss.", 5) == 0
5160 || strncmp (name, ".gnu.linkonce.lb.", 16) == 0)
5161 flags |= SECTION_BSS;
5162
5163 return flags;
5164 }
5165
5166 /* Build up a unique section name, expressed as a
5167 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
5168 RELOC indicates whether the initial value of EXP requires
5169 link-time relocations. */
5170
5171 static void ATTRIBUTE_UNUSED
5172 x86_64_elf_unique_section (tree decl, int reloc)
5173 {
5174 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5175 && ix86_in_large_data_p (decl))
5176 {
5177 const char *prefix = NULL;
5178 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
5179 bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
5180
5181 switch (categorize_decl_for_section (decl, reloc))
5182 {
5183 case SECCAT_DATA:
5184 case SECCAT_DATA_REL:
5185 case SECCAT_DATA_REL_LOCAL:
5186 case SECCAT_DATA_REL_RO:
5187 case SECCAT_DATA_REL_RO_LOCAL:
5188 prefix = one_only ? ".ld" : ".ldata";
5189 break;
5190 case SECCAT_BSS:
5191 prefix = one_only ? ".lb" : ".lbss";
5192 break;
5193 case SECCAT_RODATA:
5194 case SECCAT_RODATA_MERGE_STR:
5195 case SECCAT_RODATA_MERGE_STR_INIT:
5196 case SECCAT_RODATA_MERGE_CONST:
5197 prefix = one_only ? ".lr" : ".lrodata";
5198 break;
5199 case SECCAT_SRODATA:
5200 case SECCAT_SDATA:
5201 case SECCAT_SBSS:
5202 gcc_unreachable ();
5203 case SECCAT_TEXT:
5204 case SECCAT_TDATA:
5205 case SECCAT_TBSS:
5206 /* We don't split these for medium model. Place them into
5207 default sections and hope for best. */
5208 break;
5209 }
5210 if (prefix)
5211 {
5212 const char *name, *linkonce;
5213 char *string;
5214
5215 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
5216 name = targetm.strip_name_encoding (name);
5217
5218 /* If we're using one_only, then there needs to be a .gnu.linkonce
5219 prefix to the section name. */
5220 linkonce = one_only ? ".gnu.linkonce" : "";
5221
5222 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
5223
5224 set_decl_section_name (decl, string);
5225 return;
5226 }
5227 }
5228 default_unique_section (decl, reloc);
5229 }
5230
5231 #ifdef COMMON_ASM_OP
5232 /* This says how to output assembler code to declare an
5233 uninitialized external linkage data object.
5234
5235 For medium model x86-64 we need to use .largecomm opcode for
5236 large objects. */
5237 void
5238 x86_elf_aligned_common (FILE *file,
5239 const char *name, unsigned HOST_WIDE_INT size,
5240 int align)
5241 {
5242 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5243 && size > (unsigned int)ix86_section_threshold)
5244 fputs (".largecomm\t", file);
5245 else
5246 fputs (COMMON_ASM_OP, file);
5247 assemble_name (file, name);
5248 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
5249 size, align / BITS_PER_UNIT);
5250 }
5251 #endif
5252
5253 /* Utility function for targets to use in implementing
5254 ASM_OUTPUT_ALIGNED_BSS. */
5255
5256 void
5257 x86_output_aligned_bss (FILE *file, tree decl, const char *name,
5258 unsigned HOST_WIDE_INT size, int align)
5259 {
5260 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5261 && size > (unsigned int)ix86_section_threshold)
5262 switch_to_section (get_named_section (decl, ".lbss", 0));
5263 else
5264 switch_to_section (bss_section);
5265 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
5266 #ifdef ASM_DECLARE_OBJECT_NAME
5267 last_assemble_variable_decl = decl;
5268 ASM_DECLARE_OBJECT_NAME (file, name, decl);
5269 #else
5270 /* Standard thing is just output label for the object. */
5271 ASM_OUTPUT_LABEL (file, name);
5272 #endif /* ASM_DECLARE_OBJECT_NAME */
5273 ASM_OUTPUT_SKIP (file, size ? size : 1);
5274 }
5275 \f
5276 /* Decide whether we must probe the stack before any space allocation
5277 on this target. It's essentially TARGET_STACK_PROBE except when
5278 -fstack-check causes the stack to be already probed differently. */
5279
5280 bool
5281 ix86_target_stack_probe (void)
5282 {
5283 /* Do not probe the stack twice if static stack checking is enabled. */
5284 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5285 return false;
5286
5287 return TARGET_STACK_PROBE;
5288 }
5289 \f
5290 /* Decide whether we can make a sibling call to a function. DECL is the
5291 declaration of the function being targeted by the call and EXP is the
5292 CALL_EXPR representing the call. */
5293
5294 static bool
5295 ix86_function_ok_for_sibcall (tree decl, tree exp)
5296 {
5297 tree type, decl_or_type;
5298 rtx a, b;
5299
5300 /* If we are generating position-independent code, we cannot sibcall
5301 optimize any indirect call, or a direct call to a global function,
5302 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
5303 if (!TARGET_MACHO
5304 && !TARGET_64BIT
5305 && flag_pic
5306 && (!decl || !targetm.binds_local_p (decl)))
5307 return false;
5308
5309 /* If we need to align the outgoing stack, then sibcalling would
5310 unalign the stack, which may break the called function. */
5311 if (ix86_minimum_incoming_stack_boundary (true)
5312 < PREFERRED_STACK_BOUNDARY)
5313 return false;
5314
5315 if (decl)
5316 {
5317 decl_or_type = decl;
5318 type = TREE_TYPE (decl);
5319 }
5320 else
5321 {
5322 /* We're looking at the CALL_EXPR, we need the type of the function. */
5323 type = CALL_EXPR_FN (exp); /* pointer expression */
5324 type = TREE_TYPE (type); /* pointer type */
5325 type = TREE_TYPE (type); /* function type */
5326 decl_or_type = type;
5327 }
5328
5329 /* Check that the return value locations are the same. Like
5330 if we are returning floats on the 80387 register stack, we cannot
5331 make a sibcall from a function that doesn't return a float to a
5332 function that does or, conversely, from a function that does return
5333 a float to a function that doesn't; the necessary stack adjustment
5334 would not be executed. This is also the place we notice
5335 differences in the return value ABI. Note that it is ok for one
5336 of the functions to have void return type as long as the return
5337 value of the other is passed in a register. */
5338 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
5339 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5340 cfun->decl, false);
5341 if (STACK_REG_P (a) || STACK_REG_P (b))
5342 {
5343 if (!rtx_equal_p (a, b))
5344 return false;
5345 }
5346 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5347 ;
5348 else if (!rtx_equal_p (a, b))
5349 return false;
5350
5351 if (TARGET_64BIT)
5352 {
5353 /* The SYSV ABI has more call-clobbered registers;
5354 disallow sibcalls from MS to SYSV. */
5355 if (cfun->machine->call_abi == MS_ABI
5356 && ix86_function_type_abi (type) == SYSV_ABI)
5357 return false;
5358 }
5359 else
5360 {
5361 /* If this call is indirect, we'll need to be able to use a
5362 call-clobbered register for the address of the target function.
5363 Make sure that all such registers are not used for passing
5364 parameters. Note that DLLIMPORT functions are indirect. */
5365 if (!decl
5366 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
5367 {
5368 if (ix86_function_regparm (type, NULL) >= 3)
5369 {
5370 /* ??? Need to count the actual number of registers to be used,
5371 not the possible number of registers. Fix later. */
5372 return false;
5373 }
5374 }
5375 }
5376
5377 /* Otherwise okay. That also includes certain types of indirect calls. */
5378 return true;
5379 }
5380
5381 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5382 and "sseregparm" calling convention attributes;
5383 arguments as in struct attribute_spec.handler. */
5384
5385 static tree
5386 ix86_handle_cconv_attribute (tree *node, tree name,
5387 tree args,
5388 int,
5389 bool *no_add_attrs)
5390 {
5391 if (TREE_CODE (*node) != FUNCTION_TYPE
5392 && TREE_CODE (*node) != METHOD_TYPE
5393 && TREE_CODE (*node) != FIELD_DECL
5394 && TREE_CODE (*node) != TYPE_DECL)
5395 {
5396 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5397 name);
5398 *no_add_attrs = true;
5399 return NULL_TREE;
5400 }
5401
5402 /* Can combine regparm with all attributes but fastcall, and thiscall. */
5403 if (is_attribute_p ("regparm", name))
5404 {
5405 tree cst;
5406
5407 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5408 {
5409 error ("fastcall and regparm attributes are not compatible");
5410 }
5411
5412 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5413 {
5414 error ("regparam and thiscall attributes are not compatible");
5415 }
5416
5417 cst = TREE_VALUE (args);
5418 if (TREE_CODE (cst) != INTEGER_CST)
5419 {
5420 warning (OPT_Wattributes,
5421 "%qE attribute requires an integer constant argument",
5422 name);
5423 *no_add_attrs = true;
5424 }
5425 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
5426 {
5427 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
5428 name, REGPARM_MAX);
5429 *no_add_attrs = true;
5430 }
5431
5432 return NULL_TREE;
5433 }
5434
5435 if (TARGET_64BIT)
5436 {
5437 /* Do not warn when emulating the MS ABI. */
5438 if ((TREE_CODE (*node) != FUNCTION_TYPE
5439 && TREE_CODE (*node) != METHOD_TYPE)
5440 || ix86_function_type_abi (*node) != MS_ABI)
5441 warning (OPT_Wattributes, "%qE attribute ignored",
5442 name);
5443 *no_add_attrs = true;
5444 return NULL_TREE;
5445 }
5446
5447 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5448 if (is_attribute_p ("fastcall", name))
5449 {
5450 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5451 {
5452 error ("fastcall and cdecl attributes are not compatible");
5453 }
5454 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5455 {
5456 error ("fastcall and stdcall attributes are not compatible");
5457 }
5458 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
5459 {
5460 error ("fastcall and regparm attributes are not compatible");
5461 }
5462 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5463 {
5464 error ("fastcall and thiscall attributes are not compatible");
5465 }
5466 }
5467
5468 /* Can combine stdcall with fastcall (redundant), regparm and
5469 sseregparm. */
5470 else if (is_attribute_p ("stdcall", name))
5471 {
5472 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5473 {
5474 error ("stdcall and cdecl attributes are not compatible");
5475 }
5476 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5477 {
5478 error ("stdcall and fastcall attributes are not compatible");
5479 }
5480 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5481 {
5482 error ("stdcall and thiscall attributes are not compatible");
5483 }
5484 }
5485
5486 /* Can combine cdecl with regparm and sseregparm. */
5487 else if (is_attribute_p ("cdecl", name))
5488 {
5489 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5490 {
5491 error ("stdcall and cdecl attributes are not compatible");
5492 }
5493 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5494 {
5495 error ("fastcall and cdecl attributes are not compatible");
5496 }
5497 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5498 {
5499 error ("cdecl and thiscall attributes are not compatible");
5500 }
5501 }
5502 else if (is_attribute_p ("thiscall", name))
5503 {
5504 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
5505 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
5506 name);
5507 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5508 {
5509 error ("stdcall and thiscall attributes are not compatible");
5510 }
5511 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5512 {
5513 error ("fastcall and thiscall attributes are not compatible");
5514 }
5515 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5516 {
5517 error ("cdecl and thiscall attributes are not compatible");
5518 }
5519 }
5520
5521 /* Can combine sseregparm with all attributes. */
5522
5523 return NULL_TREE;
5524 }
5525
5526 /* The transactional memory builtins are implicitly regparm or fastcall
5527 depending on the ABI. Override the generic do-nothing attribute that
5528 these builtins were declared with, and replace it with one of the two
5529 attributes that we expect elsewhere. */
5530
5531 static tree
5532 ix86_handle_tm_regparm_attribute (tree *node, tree, tree,
5533 int flags, bool *no_add_attrs)
5534 {
5535 tree alt;
5536
5537 /* In no case do we want to add the placeholder attribute. */
5538 *no_add_attrs = true;
5539
5540 /* The 64-bit ABI is unchanged for transactional memory. */
5541 if (TARGET_64BIT)
5542 return NULL_TREE;
5543
5544 /* ??? Is there a better way to validate 32-bit windows? We have
5545 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5546 if (CHECK_STACK_LIMIT > 0)
5547 alt = tree_cons (get_identifier ("fastcall"), NULL, NULL);
5548 else
5549 {
5550 alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL);
5551 alt = tree_cons (get_identifier ("regparm"), alt, NULL);
5552 }
5553 decl_attributes (node, alt, flags);
5554
5555 return NULL_TREE;
5556 }
5557
5558 /* This function determines from TYPE the calling-convention. */
5559
5560 unsigned int
5561 ix86_get_callcvt (const_tree type)
5562 {
5563 unsigned int ret = 0;
5564 bool is_stdarg;
5565 tree attrs;
5566
5567 if (TARGET_64BIT)
5568 return IX86_CALLCVT_CDECL;
5569
5570 attrs = TYPE_ATTRIBUTES (type);
5571 if (attrs != NULL_TREE)
5572 {
5573 if (lookup_attribute ("cdecl", attrs))
5574 ret |= IX86_CALLCVT_CDECL;
5575 else if (lookup_attribute ("stdcall", attrs))
5576 ret |= IX86_CALLCVT_STDCALL;
5577 else if (lookup_attribute ("fastcall", attrs))
5578 ret |= IX86_CALLCVT_FASTCALL;
5579 else if (lookup_attribute ("thiscall", attrs))
5580 ret |= IX86_CALLCVT_THISCALL;
5581
5582 /* Regparam isn't allowed for thiscall and fastcall. */
5583 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
5584 {
5585 if (lookup_attribute ("regparm", attrs))
5586 ret |= IX86_CALLCVT_REGPARM;
5587 if (lookup_attribute ("sseregparm", attrs))
5588 ret |= IX86_CALLCVT_SSEREGPARM;
5589 }
5590
5591 if (IX86_BASE_CALLCVT(ret) != 0)
5592 return ret;
5593 }
5594
5595 is_stdarg = stdarg_p (type);
5596 if (TARGET_RTD && !is_stdarg)
5597 return IX86_CALLCVT_STDCALL | ret;
5598
5599 if (ret != 0
5600 || is_stdarg
5601 || TREE_CODE (type) != METHOD_TYPE
5602 || ix86_function_type_abi (type) != MS_ABI)
5603 return IX86_CALLCVT_CDECL | ret;
5604
5605 return IX86_CALLCVT_THISCALL;
5606 }
5607
5608 /* Return 0 if the attributes for two types are incompatible, 1 if they
5609 are compatible, and 2 if they are nearly compatible (which causes a
5610 warning to be generated). */
5611
5612 static int
5613 ix86_comp_type_attributes (const_tree type1, const_tree type2)
5614 {
5615 unsigned int ccvt1, ccvt2;
5616
5617 if (TREE_CODE (type1) != FUNCTION_TYPE
5618 && TREE_CODE (type1) != METHOD_TYPE)
5619 return 1;
5620
5621 ccvt1 = ix86_get_callcvt (type1);
5622 ccvt2 = ix86_get_callcvt (type2);
5623 if (ccvt1 != ccvt2)
5624 return 0;
5625 if (ix86_function_regparm (type1, NULL)
5626 != ix86_function_regparm (type2, NULL))
5627 return 0;
5628
5629 return 1;
5630 }
5631 \f
5632 /* Return the regparm value for a function with the indicated TYPE and DECL.
5633 DECL may be NULL when calling function indirectly
5634 or considering a libcall. */
5635
5636 static int
5637 ix86_function_regparm (const_tree type, const_tree decl)
5638 {
5639 tree attr;
5640 int regparm;
5641 unsigned int ccvt;
5642
5643 if (TARGET_64BIT)
5644 return (ix86_function_type_abi (type) == SYSV_ABI
5645 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5646 ccvt = ix86_get_callcvt (type);
5647 regparm = ix86_regparm;
5648
5649 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
5650 {
5651 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5652 if (attr)
5653 {
5654 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5655 return regparm;
5656 }
5657 }
5658 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5659 return 2;
5660 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5661 return 1;
5662
5663 /* Use register calling convention for local functions when possible. */
5664 if (decl
5665 && TREE_CODE (decl) == FUNCTION_DECL
5666 /* Caller and callee must agree on the calling convention, so
5667 checking here just optimize means that with
5668 __attribute__((optimize (...))) caller could use regparm convention
5669 and callee not, or vice versa. Instead look at whether the callee
5670 is optimized or not. */
5671 && opt_for_fn (decl, optimize)
5672 && !(profile_flag && !flag_fentry))
5673 {
5674 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5675 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE (decl));
5676 if (i && i->local && i->can_change_signature)
5677 {
5678 int local_regparm, globals = 0, regno;
5679
5680 /* Make sure no regparm register is taken by a
5681 fixed register variable. */
5682 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
5683 if (fixed_regs[local_regparm])
5684 break;
5685
5686 /* We don't want to use regparm(3) for nested functions as
5687 these use a static chain pointer in the third argument. */
5688 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
5689 local_regparm = 2;
5690
5691 /* In 32-bit mode save a register for the split stack. */
5692 if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack)
5693 local_regparm = 2;
5694
5695 /* Each fixed register usage increases register pressure,
5696 so less registers should be used for argument passing.
5697 This functionality can be overriden by an explicit
5698 regparm value. */
5699 for (regno = AX_REG; regno <= DI_REG; regno++)
5700 if (fixed_regs[regno])
5701 globals++;
5702
5703 local_regparm
5704 = globals < local_regparm ? local_regparm - globals : 0;
5705
5706 if (local_regparm > regparm)
5707 regparm = local_regparm;
5708 }
5709 }
5710
5711 return regparm;
5712 }
5713
5714 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5715 DFmode (2) arguments in SSE registers for a function with the
5716 indicated TYPE and DECL. DECL may be NULL when calling function
5717 indirectly or considering a libcall. Otherwise return 0. */
5718
5719 static int
5720 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5721 {
5722 gcc_assert (!TARGET_64BIT);
5723
5724 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5725 by the sseregparm attribute. */
5726 if (TARGET_SSEREGPARM
5727 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5728 {
5729 if (!TARGET_SSE)
5730 {
5731 if (warn)
5732 {
5733 if (decl)
5734 error ("calling %qD with attribute sseregparm without "
5735 "SSE/SSE2 enabled", decl);
5736 else
5737 error ("calling %qT with attribute sseregparm without "
5738 "SSE/SSE2 enabled", type);
5739 }
5740 return 0;
5741 }
5742
5743 return 2;
5744 }
5745
5746 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5747 (and DFmode for SSE2) arguments in SSE registers. */
5748 if (decl && TARGET_SSE_MATH && optimize
5749 && !(profile_flag && !flag_fentry))
5750 {
5751 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5752 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5753 if (i && i->local && i->can_change_signature)
5754 return TARGET_SSE2 ? 2 : 1;
5755 }
5756
5757 return 0;
5758 }
5759
5760 /* Return true if EAX is live at the start of the function. Used by
5761 ix86_expand_prologue to determine if we need special help before
5762 calling allocate_stack_worker. */
5763
5764 static bool
5765 ix86_eax_live_at_start_p (void)
5766 {
5767 /* Cheat. Don't bother working forward from ix86_function_regparm
5768 to the function type to whether an actual argument is located in
5769 eax. Instead just look at cfg info, which is still close enough
5770 to correct at this point. This gives false positives for broken
5771 functions that might use uninitialized data that happens to be
5772 allocated in eax, but who cares? */
5773 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
5774 }
5775
5776 static bool
5777 ix86_keep_aggregate_return_pointer (tree fntype)
5778 {
5779 tree attr;
5780
5781 if (!TARGET_64BIT)
5782 {
5783 attr = lookup_attribute ("callee_pop_aggregate_return",
5784 TYPE_ATTRIBUTES (fntype));
5785 if (attr)
5786 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
5787
5788 /* For 32-bit MS-ABI the default is to keep aggregate
5789 return pointer. */
5790 if (ix86_function_type_abi (fntype) == MS_ABI)
5791 return true;
5792 }
5793 return KEEP_AGGREGATE_RETURN_POINTER != 0;
5794 }
5795
5796 /* Value is the number of bytes of arguments automatically
5797 popped when returning from a subroutine call.
5798 FUNDECL is the declaration node of the function (as a tree),
5799 FUNTYPE is the data type of the function (as a tree),
5800 or for a library call it is an identifier node for the subroutine name.
5801 SIZE is the number of bytes of arguments passed on the stack.
5802
5803 On the 80386, the RTD insn may be used to pop them if the number
5804 of args is fixed, but if the number is variable then the caller
5805 must pop them all. RTD can't be used for library calls now
5806 because the library is compiled with the Unix compiler.
5807 Use of RTD is a selectable option, since it is incompatible with
5808 standard Unix calling sequences. If the option is not selected,
5809 the caller must always pop the args.
5810
5811 The attribute stdcall is equivalent to RTD on a per module basis. */
5812
5813 static int
5814 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5815 {
5816 unsigned int ccvt;
5817
5818 /* None of the 64-bit ABIs pop arguments. */
5819 if (TARGET_64BIT)
5820 return 0;
5821
5822 ccvt = ix86_get_callcvt (funtype);
5823
5824 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
5825 | IX86_CALLCVT_THISCALL)) != 0
5826 && ! stdarg_p (funtype))
5827 return size;
5828
5829 /* Lose any fake structure return argument if it is passed on the stack. */
5830 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5831 && !ix86_keep_aggregate_return_pointer (funtype))
5832 {
5833 int nregs = ix86_function_regparm (funtype, fundecl);
5834 if (nregs == 0)
5835 return GET_MODE_SIZE (Pmode);
5836 }
5837
5838 return 0;
5839 }
5840
5841 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
5842
5843 static bool
5844 ix86_legitimate_combined_insn (rtx_insn *insn)
5845 {
5846 /* Check operand constraints in case hard registers were propagated
5847 into insn pattern. This check prevents combine pass from
5848 generating insn patterns with invalid hard register operands.
5849 These invalid insns can eventually confuse reload to error out
5850 with a spill failure. See also PRs 46829 and 46843. */
5851 if ((INSN_CODE (insn) = recog (PATTERN (insn), insn, 0)) >= 0)
5852 {
5853 int i;
5854
5855 extract_insn (insn);
5856 preprocess_constraints (insn);
5857
5858 int n_operands = recog_data.n_operands;
5859 int n_alternatives = recog_data.n_alternatives;
5860 for (i = 0; i < n_operands; i++)
5861 {
5862 rtx op = recog_data.operand[i];
5863 enum machine_mode mode = GET_MODE (op);
5864 const operand_alternative *op_alt;
5865 int offset = 0;
5866 bool win;
5867 int j;
5868
5869 /* For pre-AVX disallow unaligned loads/stores where the
5870 instructions don't support it. */
5871 if (!TARGET_AVX
5872 && VECTOR_MODE_P (GET_MODE (op))
5873 && misaligned_operand (op, GET_MODE (op)))
5874 {
5875 int min_align = get_attr_ssememalign (insn);
5876 if (min_align == 0)
5877 return false;
5878 }
5879
5880 /* A unary operator may be accepted by the predicate, but it
5881 is irrelevant for matching constraints. */
5882 if (UNARY_P (op))
5883 op = XEXP (op, 0);
5884
5885 if (GET_CODE (op) == SUBREG)
5886 {
5887 if (REG_P (SUBREG_REG (op))
5888 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
5889 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
5890 GET_MODE (SUBREG_REG (op)),
5891 SUBREG_BYTE (op),
5892 GET_MODE (op));
5893 op = SUBREG_REG (op);
5894 }
5895
5896 if (!(REG_P (op) && HARD_REGISTER_P (op)))
5897 continue;
5898
5899 op_alt = recog_op_alt;
5900
5901 /* Operand has no constraints, anything is OK. */
5902 win = !n_alternatives;
5903
5904 alternative_mask enabled = recog_data.enabled_alternatives;
5905 for (j = 0; j < n_alternatives; j++, op_alt += n_operands)
5906 {
5907 if (!TEST_BIT (enabled, j))
5908 continue;
5909 if (op_alt[i].anything_ok
5910 || (op_alt[i].matches != -1
5911 && operands_match_p
5912 (recog_data.operand[i],
5913 recog_data.operand[op_alt[i].matches]))
5914 || reg_fits_class_p (op, op_alt[i].cl, offset, mode))
5915 {
5916 win = true;
5917 break;
5918 }
5919 }
5920
5921 if (!win)
5922 return false;
5923 }
5924 }
5925
5926 return true;
5927 }
5928 \f
5929 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
5930
5931 static unsigned HOST_WIDE_INT
5932 ix86_asan_shadow_offset (void)
5933 {
5934 return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44)
5935 : HOST_WIDE_INT_C (0x7fff8000))
5936 : (HOST_WIDE_INT_1 << 29);
5937 }
5938 \f
5939 /* Argument support functions. */
5940
5941 /* Return true when register may be used to pass function parameters. */
5942 bool
5943 ix86_function_arg_regno_p (int regno)
5944 {
5945 int i;
5946 const int *parm_regs;
5947
5948 if (!TARGET_64BIT)
5949 {
5950 if (TARGET_MACHO)
5951 return (regno < REGPARM_MAX
5952 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
5953 else
5954 return (regno < REGPARM_MAX
5955 || (TARGET_MMX && MMX_REGNO_P (regno)
5956 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
5957 || (TARGET_SSE && SSE_REGNO_P (regno)
5958 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
5959 }
5960
5961 if (TARGET_SSE && SSE_REGNO_P (regno)
5962 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
5963 return true;
5964
5965 /* TODO: The function should depend on current function ABI but
5966 builtins.c would need updating then. Therefore we use the
5967 default ABI. */
5968
5969 /* RAX is used as hidden argument to va_arg functions. */
5970 if (ix86_abi == SYSV_ABI && regno == AX_REG)
5971 return true;
5972
5973 if (ix86_abi == MS_ABI)
5974 parm_regs = x86_64_ms_abi_int_parameter_registers;
5975 else
5976 parm_regs = x86_64_int_parameter_registers;
5977 for (i = 0; i < (ix86_abi == MS_ABI
5978 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
5979 if (regno == parm_regs[i])
5980 return true;
5981 return false;
5982 }
5983
5984 /* Return if we do not know how to pass TYPE solely in registers. */
5985
5986 static bool
5987 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
5988 {
5989 if (must_pass_in_stack_var_size_or_pad (mode, type))
5990 return true;
5991
5992 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5993 The layout_type routine is crafty and tries to trick us into passing
5994 currently unsupported vector types on the stack by using TImode. */
5995 return (!TARGET_64BIT && mode == TImode
5996 && type && TREE_CODE (type) != VECTOR_TYPE);
5997 }
5998
5999 /* It returns the size, in bytes, of the area reserved for arguments passed
6000 in registers for the function represented by fndecl dependent to the used
6001 abi format. */
6002 int
6003 ix86_reg_parm_stack_space (const_tree fndecl)
6004 {
6005 enum calling_abi call_abi = SYSV_ABI;
6006 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
6007 call_abi = ix86_function_abi (fndecl);
6008 else
6009 call_abi = ix86_function_type_abi (fndecl);
6010 if (TARGET_64BIT && call_abi == MS_ABI)
6011 return 32;
6012 return 0;
6013 }
6014
6015 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
6016 call abi used. */
6017 enum calling_abi
6018 ix86_function_type_abi (const_tree fntype)
6019 {
6020 if (fntype != NULL_TREE && TYPE_ATTRIBUTES (fntype) != NULL_TREE)
6021 {
6022 enum calling_abi abi = ix86_abi;
6023 if (abi == SYSV_ABI)
6024 {
6025 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
6026 abi = MS_ABI;
6027 }
6028 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
6029 abi = SYSV_ABI;
6030 return abi;
6031 }
6032 return ix86_abi;
6033 }
6034
6035 /* We add this as a workaround in order to use libc_has_function
6036 hook in i386.md. */
6037 bool
6038 ix86_libc_has_function (enum function_class fn_class)
6039 {
6040 return targetm.libc_has_function (fn_class);
6041 }
6042
6043 static bool
6044 ix86_function_ms_hook_prologue (const_tree fn)
6045 {
6046 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
6047 {
6048 if (decl_function_context (fn) != NULL_TREE)
6049 error_at (DECL_SOURCE_LOCATION (fn),
6050 "ms_hook_prologue is not compatible with nested function");
6051 else
6052 return true;
6053 }
6054 return false;
6055 }
6056
6057 static enum calling_abi
6058 ix86_function_abi (const_tree fndecl)
6059 {
6060 if (! fndecl)
6061 return ix86_abi;
6062 return ix86_function_type_abi (TREE_TYPE (fndecl));
6063 }
6064
6065 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
6066 call abi used. */
6067 enum calling_abi
6068 ix86_cfun_abi (void)
6069 {
6070 if (! cfun)
6071 return ix86_abi;
6072 return cfun->machine->call_abi;
6073 }
6074
6075 /* Write the extra assembler code needed to declare a function properly. */
6076
6077 void
6078 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
6079 tree decl)
6080 {
6081 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
6082
6083 if (is_ms_hook)
6084 {
6085 int i, filler_count = (TARGET_64BIT ? 32 : 16);
6086 unsigned int filler_cc = 0xcccccccc;
6087
6088 for (i = 0; i < filler_count; i += 4)
6089 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
6090 }
6091
6092 #ifdef SUBTARGET_ASM_UNWIND_INIT
6093 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
6094 #endif
6095
6096 ASM_OUTPUT_LABEL (asm_out_file, fname);
6097
6098 /* Output magic byte marker, if hot-patch attribute is set. */
6099 if (is_ms_hook)
6100 {
6101 if (TARGET_64BIT)
6102 {
6103 /* leaq [%rsp + 0], %rsp */
6104 asm_fprintf (asm_out_file, ASM_BYTE
6105 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
6106 }
6107 else
6108 {
6109 /* movl.s %edi, %edi
6110 push %ebp
6111 movl.s %esp, %ebp */
6112 asm_fprintf (asm_out_file, ASM_BYTE
6113 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
6114 }
6115 }
6116 }
6117
6118 /* regclass.c */
6119 extern void init_regs (void);
6120
6121 /* Implementation of call abi switching target hook. Specific to FNDECL
6122 the specific call register sets are set. See also
6123 ix86_conditional_register_usage for more details. */
6124 void
6125 ix86_call_abi_override (const_tree fndecl)
6126 {
6127 if (fndecl == NULL_TREE)
6128 cfun->machine->call_abi = ix86_abi;
6129 else
6130 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
6131 }
6132
6133 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
6134 expensive re-initialization of init_regs each time we switch function context
6135 since this is needed only during RTL expansion. */
6136 static void
6137 ix86_maybe_switch_abi (void)
6138 {
6139 if (TARGET_64BIT &&
6140 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
6141 reinit_regs ();
6142 }
6143
6144 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6145 for a call to a function whose data type is FNTYPE.
6146 For a library call, FNTYPE is 0. */
6147
6148 void
6149 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
6150 tree fntype, /* tree ptr for function decl */
6151 rtx libname, /* SYMBOL_REF of library name or 0 */
6152 tree fndecl,
6153 int caller)
6154 {
6155 struct cgraph_local_info *i;
6156
6157 memset (cum, 0, sizeof (*cum));
6158
6159 if (fndecl)
6160 {
6161 i = cgraph_node::local_info (fndecl);
6162 cum->call_abi = ix86_function_abi (fndecl);
6163 }
6164 else
6165 {
6166 i = NULL;
6167 cum->call_abi = ix86_function_type_abi (fntype);
6168 }
6169
6170 cum->caller = caller;
6171
6172 /* Set up the number of registers to use for passing arguments. */
6173 cum->nregs = ix86_regparm;
6174 if (TARGET_64BIT)
6175 {
6176 cum->nregs = (cum->call_abi == SYSV_ABI
6177 ? X86_64_REGPARM_MAX
6178 : X86_64_MS_REGPARM_MAX);
6179 }
6180 if (TARGET_SSE)
6181 {
6182 cum->sse_nregs = SSE_REGPARM_MAX;
6183 if (TARGET_64BIT)
6184 {
6185 cum->sse_nregs = (cum->call_abi == SYSV_ABI
6186 ? X86_64_SSE_REGPARM_MAX
6187 : X86_64_MS_SSE_REGPARM_MAX);
6188 }
6189 }
6190 if (TARGET_MMX)
6191 cum->mmx_nregs = MMX_REGPARM_MAX;
6192 cum->warn_avx512f = true;
6193 cum->warn_avx = true;
6194 cum->warn_sse = true;
6195 cum->warn_mmx = true;
6196
6197 /* Because type might mismatch in between caller and callee, we need to
6198 use actual type of function for local calls.
6199 FIXME: cgraph_analyze can be told to actually record if function uses
6200 va_start so for local functions maybe_vaarg can be made aggressive
6201 helping K&R code.
6202 FIXME: once typesytem is fixed, we won't need this code anymore. */
6203 if (i && i->local && i->can_change_signature)
6204 fntype = TREE_TYPE (fndecl);
6205 cum->maybe_vaarg = (fntype
6206 ? (!prototype_p (fntype) || stdarg_p (fntype))
6207 : !libname);
6208
6209 if (!TARGET_64BIT)
6210 {
6211 /* If there are variable arguments, then we won't pass anything
6212 in registers in 32-bit mode. */
6213 if (stdarg_p (fntype))
6214 {
6215 cum->nregs = 0;
6216 cum->sse_nregs = 0;
6217 cum->mmx_nregs = 0;
6218 cum->warn_avx512f = false;
6219 cum->warn_avx = false;
6220 cum->warn_sse = false;
6221 cum->warn_mmx = false;
6222 return;
6223 }
6224
6225 /* Use ecx and edx registers if function has fastcall attribute,
6226 else look for regparm information. */
6227 if (fntype)
6228 {
6229 unsigned int ccvt = ix86_get_callcvt (fntype);
6230 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
6231 {
6232 cum->nregs = 1;
6233 cum->fastcall = 1; /* Same first register as in fastcall. */
6234 }
6235 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
6236 {
6237 cum->nregs = 2;
6238 cum->fastcall = 1;
6239 }
6240 else
6241 cum->nregs = ix86_function_regparm (fntype, fndecl);
6242 }
6243
6244 /* Set up the number of SSE registers used for passing SFmode
6245 and DFmode arguments. Warn for mismatching ABI. */
6246 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
6247 }
6248 }
6249
6250 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
6251 But in the case of vector types, it is some vector mode.
6252
6253 When we have only some of our vector isa extensions enabled, then there
6254 are some modes for which vector_mode_supported_p is false. For these
6255 modes, the generic vector support in gcc will choose some non-vector mode
6256 in order to implement the type. By computing the natural mode, we'll
6257 select the proper ABI location for the operand and not depend on whatever
6258 the middle-end decides to do with these vector types.
6259
6260 The midde-end can't deal with the vector types > 16 bytes. In this
6261 case, we return the original mode and warn ABI change if CUM isn't
6262 NULL.
6263
6264 If INT_RETURN is true, warn ABI change if the vector mode isn't
6265 available for function return value. */
6266
6267 static enum machine_mode
6268 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
6269 bool in_return)
6270 {
6271 enum machine_mode mode = TYPE_MODE (type);
6272
6273 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
6274 {
6275 HOST_WIDE_INT size = int_size_in_bytes (type);
6276 if ((size == 8 || size == 16 || size == 32 || size == 64)
6277 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
6278 && TYPE_VECTOR_SUBPARTS (type) > 1)
6279 {
6280 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
6281
6282 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6283 mode = MIN_MODE_VECTOR_FLOAT;
6284 else
6285 mode = MIN_MODE_VECTOR_INT;
6286
6287 /* Get the mode which has this inner mode and number of units. */
6288 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
6289 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
6290 && GET_MODE_INNER (mode) == innermode)
6291 {
6292 if (size == 64 && !TARGET_AVX512F)
6293 {
6294 static bool warnedavx512f;
6295 static bool warnedavx512f_ret;
6296
6297 if (cum && cum->warn_avx512f && !warnedavx512f)
6298 {
6299 if (warning (OPT_Wpsabi, "AVX512F vector argument "
6300 "without AVX512F enabled changes the ABI"))
6301 warnedavx512f = true;
6302 }
6303 else if (in_return && !warnedavx512f_ret)
6304 {
6305 if (warning (OPT_Wpsabi, "AVX512F vector return "
6306 "without AVX512F enabled changes the ABI"))
6307 warnedavx512f_ret = true;
6308 }
6309
6310 return TYPE_MODE (type);
6311 }
6312 else if (size == 32 && !TARGET_AVX)
6313 {
6314 static bool warnedavx;
6315 static bool warnedavx_ret;
6316
6317 if (cum && cum->warn_avx && !warnedavx)
6318 {
6319 if (warning (OPT_Wpsabi, "AVX vector argument "
6320 "without AVX enabled changes the ABI"))
6321 warnedavx = true;
6322 }
6323 else if (in_return && !warnedavx_ret)
6324 {
6325 if (warning (OPT_Wpsabi, "AVX vector return "
6326 "without AVX enabled changes the ABI"))
6327 warnedavx_ret = true;
6328 }
6329
6330 return TYPE_MODE (type);
6331 }
6332 else if (((size == 8 && TARGET_64BIT) || size == 16)
6333 && !TARGET_SSE)
6334 {
6335 static bool warnedsse;
6336 static bool warnedsse_ret;
6337
6338 if (cum && cum->warn_sse && !warnedsse)
6339 {
6340 if (warning (OPT_Wpsabi, "SSE vector argument "
6341 "without SSE enabled changes the ABI"))
6342 warnedsse = true;
6343 }
6344 else if (!TARGET_64BIT && in_return && !warnedsse_ret)
6345 {
6346 if (warning (OPT_Wpsabi, "SSE vector return "
6347 "without SSE enabled changes the ABI"))
6348 warnedsse_ret = true;
6349 }
6350 }
6351 else if ((size == 8 && !TARGET_64BIT) && !TARGET_MMX)
6352 {
6353 static bool warnedmmx;
6354 static bool warnedmmx_ret;
6355
6356 if (cum && cum->warn_mmx && !warnedmmx)
6357 {
6358 if (warning (OPT_Wpsabi, "MMX vector argument "
6359 "without MMX enabled changes the ABI"))
6360 warnedmmx = true;
6361 }
6362 else if (in_return && !warnedmmx_ret)
6363 {
6364 if (warning (OPT_Wpsabi, "MMX vector return "
6365 "without MMX enabled changes the ABI"))
6366 warnedmmx_ret = true;
6367 }
6368 }
6369 return mode;
6370 }
6371
6372 gcc_unreachable ();
6373 }
6374 }
6375
6376 return mode;
6377 }
6378
6379 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
6380 this may not agree with the mode that the type system has chosen for the
6381 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
6382 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
6383
6384 static rtx
6385 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
6386 unsigned int regno)
6387 {
6388 rtx tmp;
6389
6390 if (orig_mode != BLKmode)
6391 tmp = gen_rtx_REG (orig_mode, regno);
6392 else
6393 {
6394 tmp = gen_rtx_REG (mode, regno);
6395 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
6396 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
6397 }
6398
6399 return tmp;
6400 }
6401
6402 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
6403 of this code is to classify each 8bytes of incoming argument by the register
6404 class and assign registers accordingly. */
6405
6406 /* Return the union class of CLASS1 and CLASS2.
6407 See the x86-64 PS ABI for details. */
6408
6409 static enum x86_64_reg_class
6410 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
6411 {
6412 /* Rule #1: If both classes are equal, this is the resulting class. */
6413 if (class1 == class2)
6414 return class1;
6415
6416 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
6417 the other class. */
6418 if (class1 == X86_64_NO_CLASS)
6419 return class2;
6420 if (class2 == X86_64_NO_CLASS)
6421 return class1;
6422
6423 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
6424 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
6425 return X86_64_MEMORY_CLASS;
6426
6427 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
6428 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
6429 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
6430 return X86_64_INTEGERSI_CLASS;
6431 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
6432 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
6433 return X86_64_INTEGER_CLASS;
6434
6435 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
6436 MEMORY is used. */
6437 if (class1 == X86_64_X87_CLASS
6438 || class1 == X86_64_X87UP_CLASS
6439 || class1 == X86_64_COMPLEX_X87_CLASS
6440 || class2 == X86_64_X87_CLASS
6441 || class2 == X86_64_X87UP_CLASS
6442 || class2 == X86_64_COMPLEX_X87_CLASS)
6443 return X86_64_MEMORY_CLASS;
6444
6445 /* Rule #6: Otherwise class SSE is used. */
6446 return X86_64_SSE_CLASS;
6447 }
6448
6449 /* Classify the argument of type TYPE and mode MODE.
6450 CLASSES will be filled by the register class used to pass each word
6451 of the operand. The number of words is returned. In case the parameter
6452 should be passed in memory, 0 is returned. As a special case for zero
6453 sized containers, classes[0] will be NO_CLASS and 1 is returned.
6454
6455 BIT_OFFSET is used internally for handling records and specifies offset
6456 of the offset in bits modulo 512 to avoid overflow cases.
6457
6458 See the x86-64 PS ABI for details.
6459 */
6460
6461 static int
6462 classify_argument (enum machine_mode mode, const_tree type,
6463 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
6464 {
6465 HOST_WIDE_INT bytes =
6466 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6467 int words
6468 = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6469
6470 /* Variable sized entities are always passed/returned in memory. */
6471 if (bytes < 0)
6472 return 0;
6473
6474 if (mode != VOIDmode
6475 && targetm.calls.must_pass_in_stack (mode, type))
6476 return 0;
6477
6478 if (type && AGGREGATE_TYPE_P (type))
6479 {
6480 int i;
6481 tree field;
6482 enum x86_64_reg_class subclasses[MAX_CLASSES];
6483
6484 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
6485 if (bytes > 64)
6486 return 0;
6487
6488 for (i = 0; i < words; i++)
6489 classes[i] = X86_64_NO_CLASS;
6490
6491 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
6492 signalize memory class, so handle it as special case. */
6493 if (!words)
6494 {
6495 classes[0] = X86_64_NO_CLASS;
6496 return 1;
6497 }
6498
6499 /* Classify each field of record and merge classes. */
6500 switch (TREE_CODE (type))
6501 {
6502 case RECORD_TYPE:
6503 /* And now merge the fields of structure. */
6504 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6505 {
6506 if (TREE_CODE (field) == FIELD_DECL)
6507 {
6508 int num;
6509
6510 if (TREE_TYPE (field) == error_mark_node)
6511 continue;
6512
6513 /* Bitfields are always classified as integer. Handle them
6514 early, since later code would consider them to be
6515 misaligned integers. */
6516 if (DECL_BIT_FIELD (field))
6517 {
6518 for (i = (int_bit_position (field)
6519 + (bit_offset % 64)) / 8 / 8;
6520 i < ((int_bit_position (field) + (bit_offset % 64))
6521 + tree_to_shwi (DECL_SIZE (field))
6522 + 63) / 8 / 8; i++)
6523 classes[i] =
6524 merge_classes (X86_64_INTEGER_CLASS,
6525 classes[i]);
6526 }
6527 else
6528 {
6529 int pos;
6530
6531 type = TREE_TYPE (field);
6532
6533 /* Flexible array member is ignored. */
6534 if (TYPE_MODE (type) == BLKmode
6535 && TREE_CODE (type) == ARRAY_TYPE
6536 && TYPE_SIZE (type) == NULL_TREE
6537 && TYPE_DOMAIN (type) != NULL_TREE
6538 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
6539 == NULL_TREE))
6540 {
6541 static bool warned;
6542
6543 if (!warned && warn_psabi)
6544 {
6545 warned = true;
6546 inform (input_location,
6547 "the ABI of passing struct with"
6548 " a flexible array member has"
6549 " changed in GCC 4.4");
6550 }
6551 continue;
6552 }
6553 num = classify_argument (TYPE_MODE (type), type,
6554 subclasses,
6555 (int_bit_position (field)
6556 + bit_offset) % 512);
6557 if (!num)
6558 return 0;
6559 pos = (int_bit_position (field)
6560 + (bit_offset % 64)) / 8 / 8;
6561 for (i = 0; i < num && (i + pos) < words; i++)
6562 classes[i + pos] =
6563 merge_classes (subclasses[i], classes[i + pos]);
6564 }
6565 }
6566 }
6567 break;
6568
6569 case ARRAY_TYPE:
6570 /* Arrays are handled as small records. */
6571 {
6572 int num;
6573 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
6574 TREE_TYPE (type), subclasses, bit_offset);
6575 if (!num)
6576 return 0;
6577
6578 /* The partial classes are now full classes. */
6579 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
6580 subclasses[0] = X86_64_SSE_CLASS;
6581 if (subclasses[0] == X86_64_INTEGERSI_CLASS
6582 && !((bit_offset % 64) == 0 && bytes == 4))
6583 subclasses[0] = X86_64_INTEGER_CLASS;
6584
6585 for (i = 0; i < words; i++)
6586 classes[i] = subclasses[i % num];
6587
6588 break;
6589 }
6590 case UNION_TYPE:
6591 case QUAL_UNION_TYPE:
6592 /* Unions are similar to RECORD_TYPE but offset is always 0.
6593 */
6594 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6595 {
6596 if (TREE_CODE (field) == FIELD_DECL)
6597 {
6598 int num;
6599
6600 if (TREE_TYPE (field) == error_mark_node)
6601 continue;
6602
6603 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
6604 TREE_TYPE (field), subclasses,
6605 bit_offset);
6606 if (!num)
6607 return 0;
6608 for (i = 0; i < num && i < words; i++)
6609 classes[i] = merge_classes (subclasses[i], classes[i]);
6610 }
6611 }
6612 break;
6613
6614 default:
6615 gcc_unreachable ();
6616 }
6617
6618 if (words > 2)
6619 {
6620 /* When size > 16 bytes, if the first one isn't
6621 X86_64_SSE_CLASS or any other ones aren't
6622 X86_64_SSEUP_CLASS, everything should be passed in
6623 memory. */
6624 if (classes[0] != X86_64_SSE_CLASS)
6625 return 0;
6626
6627 for (i = 1; i < words; i++)
6628 if (classes[i] != X86_64_SSEUP_CLASS)
6629 return 0;
6630 }
6631
6632 /* Final merger cleanup. */
6633 for (i = 0; i < words; i++)
6634 {
6635 /* If one class is MEMORY, everything should be passed in
6636 memory. */
6637 if (classes[i] == X86_64_MEMORY_CLASS)
6638 return 0;
6639
6640 /* The X86_64_SSEUP_CLASS should be always preceded by
6641 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6642 if (classes[i] == X86_64_SSEUP_CLASS
6643 && classes[i - 1] != X86_64_SSE_CLASS
6644 && classes[i - 1] != X86_64_SSEUP_CLASS)
6645 {
6646 /* The first one should never be X86_64_SSEUP_CLASS. */
6647 gcc_assert (i != 0);
6648 classes[i] = X86_64_SSE_CLASS;
6649 }
6650
6651 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6652 everything should be passed in memory. */
6653 if (classes[i] == X86_64_X87UP_CLASS
6654 && (classes[i - 1] != X86_64_X87_CLASS))
6655 {
6656 static bool warned;
6657
6658 /* The first one should never be X86_64_X87UP_CLASS. */
6659 gcc_assert (i != 0);
6660 if (!warned && warn_psabi)
6661 {
6662 warned = true;
6663 inform (input_location,
6664 "the ABI of passing union with long double"
6665 " has changed in GCC 4.4");
6666 }
6667 return 0;
6668 }
6669 }
6670 return words;
6671 }
6672
6673 /* Compute alignment needed. We align all types to natural boundaries with
6674 exception of XFmode that is aligned to 64bits. */
6675 if (mode != VOIDmode && mode != BLKmode)
6676 {
6677 int mode_alignment = GET_MODE_BITSIZE (mode);
6678
6679 if (mode == XFmode)
6680 mode_alignment = 128;
6681 else if (mode == XCmode)
6682 mode_alignment = 256;
6683 if (COMPLEX_MODE_P (mode))
6684 mode_alignment /= 2;
6685 /* Misaligned fields are always returned in memory. */
6686 if (bit_offset % mode_alignment)
6687 return 0;
6688 }
6689
6690 /* for V1xx modes, just use the base mode */
6691 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
6692 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
6693 mode = GET_MODE_INNER (mode);
6694
6695 /* Classification of atomic types. */
6696 switch (mode)
6697 {
6698 case SDmode:
6699 case DDmode:
6700 classes[0] = X86_64_SSE_CLASS;
6701 return 1;
6702 case TDmode:
6703 classes[0] = X86_64_SSE_CLASS;
6704 classes[1] = X86_64_SSEUP_CLASS;
6705 return 2;
6706 case DImode:
6707 case SImode:
6708 case HImode:
6709 case QImode:
6710 case CSImode:
6711 case CHImode:
6712 case CQImode:
6713 {
6714 int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
6715
6716 /* Analyze last 128 bits only. */
6717 size = (size - 1) & 0x7f;
6718
6719 if (size < 32)
6720 {
6721 classes[0] = X86_64_INTEGERSI_CLASS;
6722 return 1;
6723 }
6724 else if (size < 64)
6725 {
6726 classes[0] = X86_64_INTEGER_CLASS;
6727 return 1;
6728 }
6729 else if (size < 64+32)
6730 {
6731 classes[0] = X86_64_INTEGER_CLASS;
6732 classes[1] = X86_64_INTEGERSI_CLASS;
6733 return 2;
6734 }
6735 else if (size < 64+64)
6736 {
6737 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6738 return 2;
6739 }
6740 else
6741 gcc_unreachable ();
6742 }
6743 case CDImode:
6744 case TImode:
6745 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6746 return 2;
6747 case COImode:
6748 case OImode:
6749 /* OImode shouldn't be used directly. */
6750 gcc_unreachable ();
6751 case CTImode:
6752 return 0;
6753 case SFmode:
6754 if (!(bit_offset % 64))
6755 classes[0] = X86_64_SSESF_CLASS;
6756 else
6757 classes[0] = X86_64_SSE_CLASS;
6758 return 1;
6759 case DFmode:
6760 classes[0] = X86_64_SSEDF_CLASS;
6761 return 1;
6762 case XFmode:
6763 classes[0] = X86_64_X87_CLASS;
6764 classes[1] = X86_64_X87UP_CLASS;
6765 return 2;
6766 case TFmode:
6767 classes[0] = X86_64_SSE_CLASS;
6768 classes[1] = X86_64_SSEUP_CLASS;
6769 return 2;
6770 case SCmode:
6771 classes[0] = X86_64_SSE_CLASS;
6772 if (!(bit_offset % 64))
6773 return 1;
6774 else
6775 {
6776 static bool warned;
6777
6778 if (!warned && warn_psabi)
6779 {
6780 warned = true;
6781 inform (input_location,
6782 "the ABI of passing structure with complex float"
6783 " member has changed in GCC 4.4");
6784 }
6785 classes[1] = X86_64_SSESF_CLASS;
6786 return 2;
6787 }
6788 case DCmode:
6789 classes[0] = X86_64_SSEDF_CLASS;
6790 classes[1] = X86_64_SSEDF_CLASS;
6791 return 2;
6792 case XCmode:
6793 classes[0] = X86_64_COMPLEX_X87_CLASS;
6794 return 1;
6795 case TCmode:
6796 /* This modes is larger than 16 bytes. */
6797 return 0;
6798 case V8SFmode:
6799 case V8SImode:
6800 case V32QImode:
6801 case V16HImode:
6802 case V4DFmode:
6803 case V4DImode:
6804 classes[0] = X86_64_SSE_CLASS;
6805 classes[1] = X86_64_SSEUP_CLASS;
6806 classes[2] = X86_64_SSEUP_CLASS;
6807 classes[3] = X86_64_SSEUP_CLASS;
6808 return 4;
6809 case V8DFmode:
6810 case V16SFmode:
6811 case V8DImode:
6812 case V16SImode:
6813 case V32HImode:
6814 case V64QImode:
6815 classes[0] = X86_64_SSE_CLASS;
6816 classes[1] = X86_64_SSEUP_CLASS;
6817 classes[2] = X86_64_SSEUP_CLASS;
6818 classes[3] = X86_64_SSEUP_CLASS;
6819 classes[4] = X86_64_SSEUP_CLASS;
6820 classes[5] = X86_64_SSEUP_CLASS;
6821 classes[6] = X86_64_SSEUP_CLASS;
6822 classes[7] = X86_64_SSEUP_CLASS;
6823 return 8;
6824 case V4SFmode:
6825 case V4SImode:
6826 case V16QImode:
6827 case V8HImode:
6828 case V2DFmode:
6829 case V2DImode:
6830 classes[0] = X86_64_SSE_CLASS;
6831 classes[1] = X86_64_SSEUP_CLASS;
6832 return 2;
6833 case V1TImode:
6834 case V1DImode:
6835 case V2SFmode:
6836 case V2SImode:
6837 case V4HImode:
6838 case V8QImode:
6839 classes[0] = X86_64_SSE_CLASS;
6840 return 1;
6841 case BLKmode:
6842 case VOIDmode:
6843 return 0;
6844 default:
6845 gcc_assert (VECTOR_MODE_P (mode));
6846
6847 if (bytes > 16)
6848 return 0;
6849
6850 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
6851
6852 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
6853 classes[0] = X86_64_INTEGERSI_CLASS;
6854 else
6855 classes[0] = X86_64_INTEGER_CLASS;
6856 classes[1] = X86_64_INTEGER_CLASS;
6857 return 1 + (bytes > 8);
6858 }
6859 }
6860
6861 /* Examine the argument and return set number of register required in each
6862 class. Return true iff parameter should be passed in memory. */
6863
6864 static bool
6865 examine_argument (enum machine_mode mode, const_tree type, int in_return,
6866 int *int_nregs, int *sse_nregs)
6867 {
6868 enum x86_64_reg_class regclass[MAX_CLASSES];
6869 int n = classify_argument (mode, type, regclass, 0);
6870
6871 *int_nregs = 0;
6872 *sse_nregs = 0;
6873
6874 if (!n)
6875 return true;
6876 for (n--; n >= 0; n--)
6877 switch (regclass[n])
6878 {
6879 case X86_64_INTEGER_CLASS:
6880 case X86_64_INTEGERSI_CLASS:
6881 (*int_nregs)++;
6882 break;
6883 case X86_64_SSE_CLASS:
6884 case X86_64_SSESF_CLASS:
6885 case X86_64_SSEDF_CLASS:
6886 (*sse_nregs)++;
6887 break;
6888 case X86_64_NO_CLASS:
6889 case X86_64_SSEUP_CLASS:
6890 break;
6891 case X86_64_X87_CLASS:
6892 case X86_64_X87UP_CLASS:
6893 case X86_64_COMPLEX_X87_CLASS:
6894 if (!in_return)
6895 return true;
6896 break;
6897 case X86_64_MEMORY_CLASS:
6898 gcc_unreachable ();
6899 }
6900
6901 return false;
6902 }
6903
6904 /* Construct container for the argument used by GCC interface. See
6905 FUNCTION_ARG for the detailed description. */
6906
6907 static rtx
6908 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
6909 const_tree type, int in_return, int nintregs, int nsseregs,
6910 const int *intreg, int sse_regno)
6911 {
6912 /* The following variables hold the static issued_error state. */
6913 static bool issued_sse_arg_error;
6914 static bool issued_sse_ret_error;
6915 static bool issued_x87_ret_error;
6916
6917 enum machine_mode tmpmode;
6918 int bytes =
6919 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6920 enum x86_64_reg_class regclass[MAX_CLASSES];
6921 int n;
6922 int i;
6923 int nexps = 0;
6924 int needed_sseregs, needed_intregs;
6925 rtx exp[MAX_CLASSES];
6926 rtx ret;
6927
6928 n = classify_argument (mode, type, regclass, 0);
6929 if (!n)
6930 return NULL;
6931 if (examine_argument (mode, type, in_return, &needed_intregs,
6932 &needed_sseregs))
6933 return NULL;
6934 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
6935 return NULL;
6936
6937 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6938 some less clueful developer tries to use floating-point anyway. */
6939 if (needed_sseregs && !TARGET_SSE)
6940 {
6941 if (in_return)
6942 {
6943 if (!issued_sse_ret_error)
6944 {
6945 error ("SSE register return with SSE disabled");
6946 issued_sse_ret_error = true;
6947 }
6948 }
6949 else if (!issued_sse_arg_error)
6950 {
6951 error ("SSE register argument with SSE disabled");
6952 issued_sse_arg_error = true;
6953 }
6954 return NULL;
6955 }
6956
6957 /* Likewise, error if the ABI requires us to return values in the
6958 x87 registers and the user specified -mno-80387. */
6959 if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
6960 for (i = 0; i < n; i++)
6961 if (regclass[i] == X86_64_X87_CLASS
6962 || regclass[i] == X86_64_X87UP_CLASS
6963 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
6964 {
6965 if (!issued_x87_ret_error)
6966 {
6967 error ("x87 register return with x87 disabled");
6968 issued_x87_ret_error = true;
6969 }
6970 return NULL;
6971 }
6972
6973 /* First construct simple cases. Avoid SCmode, since we want to use
6974 single register to pass this type. */
6975 if (n == 1 && mode != SCmode)
6976 switch (regclass[0])
6977 {
6978 case X86_64_INTEGER_CLASS:
6979 case X86_64_INTEGERSI_CLASS:
6980 return gen_rtx_REG (mode, intreg[0]);
6981 case X86_64_SSE_CLASS:
6982 case X86_64_SSESF_CLASS:
6983 case X86_64_SSEDF_CLASS:
6984 if (mode != BLKmode)
6985 return gen_reg_or_parallel (mode, orig_mode,
6986 SSE_REGNO (sse_regno));
6987 break;
6988 case X86_64_X87_CLASS:
6989 case X86_64_COMPLEX_X87_CLASS:
6990 return gen_rtx_REG (mode, FIRST_STACK_REG);
6991 case X86_64_NO_CLASS:
6992 /* Zero sized array, struct or class. */
6993 return NULL;
6994 default:
6995 gcc_unreachable ();
6996 }
6997 if (n == 2
6998 && regclass[0] == X86_64_SSE_CLASS
6999 && regclass[1] == X86_64_SSEUP_CLASS
7000 && mode != BLKmode)
7001 return gen_reg_or_parallel (mode, orig_mode,
7002 SSE_REGNO (sse_regno));
7003 if (n == 4
7004 && regclass[0] == X86_64_SSE_CLASS
7005 && regclass[1] == X86_64_SSEUP_CLASS
7006 && regclass[2] == X86_64_SSEUP_CLASS
7007 && regclass[3] == X86_64_SSEUP_CLASS
7008 && mode != BLKmode)
7009 return gen_reg_or_parallel (mode, orig_mode,
7010 SSE_REGNO (sse_regno));
7011 if (n == 8
7012 && regclass[0] == X86_64_SSE_CLASS
7013 && regclass[1] == X86_64_SSEUP_CLASS
7014 && regclass[2] == X86_64_SSEUP_CLASS
7015 && regclass[3] == X86_64_SSEUP_CLASS
7016 && regclass[4] == X86_64_SSEUP_CLASS
7017 && regclass[5] == X86_64_SSEUP_CLASS
7018 && regclass[6] == X86_64_SSEUP_CLASS
7019 && regclass[7] == X86_64_SSEUP_CLASS
7020 && mode != BLKmode)
7021 return gen_reg_or_parallel (mode, orig_mode,
7022 SSE_REGNO (sse_regno));
7023 if (n == 2
7024 && regclass[0] == X86_64_X87_CLASS
7025 && regclass[1] == X86_64_X87UP_CLASS)
7026 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
7027
7028 if (n == 2
7029 && regclass[0] == X86_64_INTEGER_CLASS
7030 && regclass[1] == X86_64_INTEGER_CLASS
7031 && (mode == CDImode || mode == TImode)
7032 && intreg[0] + 1 == intreg[1])
7033 return gen_rtx_REG (mode, intreg[0]);
7034
7035 /* Otherwise figure out the entries of the PARALLEL. */
7036 for (i = 0; i < n; i++)
7037 {
7038 int pos;
7039
7040 switch (regclass[i])
7041 {
7042 case X86_64_NO_CLASS:
7043 break;
7044 case X86_64_INTEGER_CLASS:
7045 case X86_64_INTEGERSI_CLASS:
7046 /* Merge TImodes on aligned occasions here too. */
7047 if (i * 8 + 8 > bytes)
7048 tmpmode
7049 = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
7050 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
7051 tmpmode = SImode;
7052 else
7053 tmpmode = DImode;
7054 /* We've requested 24 bytes we
7055 don't have mode for. Use DImode. */
7056 if (tmpmode == BLKmode)
7057 tmpmode = DImode;
7058 exp [nexps++]
7059 = gen_rtx_EXPR_LIST (VOIDmode,
7060 gen_rtx_REG (tmpmode, *intreg),
7061 GEN_INT (i*8));
7062 intreg++;
7063 break;
7064 case X86_64_SSESF_CLASS:
7065 exp [nexps++]
7066 = gen_rtx_EXPR_LIST (VOIDmode,
7067 gen_rtx_REG (SFmode,
7068 SSE_REGNO (sse_regno)),
7069 GEN_INT (i*8));
7070 sse_regno++;
7071 break;
7072 case X86_64_SSEDF_CLASS:
7073 exp [nexps++]
7074 = gen_rtx_EXPR_LIST (VOIDmode,
7075 gen_rtx_REG (DFmode,
7076 SSE_REGNO (sse_regno)),
7077 GEN_INT (i*8));
7078 sse_regno++;
7079 break;
7080 case X86_64_SSE_CLASS:
7081 pos = i;
7082 switch (n)
7083 {
7084 case 1:
7085 tmpmode = DImode;
7086 break;
7087 case 2:
7088 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
7089 {
7090 tmpmode = TImode;
7091 i++;
7092 }
7093 else
7094 tmpmode = DImode;
7095 break;
7096 case 4:
7097 gcc_assert (i == 0
7098 && regclass[1] == X86_64_SSEUP_CLASS
7099 && regclass[2] == X86_64_SSEUP_CLASS
7100 && regclass[3] == X86_64_SSEUP_CLASS);
7101 tmpmode = OImode;
7102 i += 3;
7103 break;
7104 case 8:
7105 gcc_assert (i == 0
7106 && regclass[1] == X86_64_SSEUP_CLASS
7107 && regclass[2] == X86_64_SSEUP_CLASS
7108 && regclass[3] == X86_64_SSEUP_CLASS
7109 && regclass[4] == X86_64_SSEUP_CLASS
7110 && regclass[5] == X86_64_SSEUP_CLASS
7111 && regclass[6] == X86_64_SSEUP_CLASS
7112 && regclass[7] == X86_64_SSEUP_CLASS);
7113 tmpmode = XImode;
7114 i += 7;
7115 break;
7116 default:
7117 gcc_unreachable ();
7118 }
7119 exp [nexps++]
7120 = gen_rtx_EXPR_LIST (VOIDmode,
7121 gen_rtx_REG (tmpmode,
7122 SSE_REGNO (sse_regno)),
7123 GEN_INT (pos*8));
7124 sse_regno++;
7125 break;
7126 default:
7127 gcc_unreachable ();
7128 }
7129 }
7130
7131 /* Empty aligned struct, union or class. */
7132 if (nexps == 0)
7133 return NULL;
7134
7135 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
7136 for (i = 0; i < nexps; i++)
7137 XVECEXP (ret, 0, i) = exp [i];
7138 return ret;
7139 }
7140
7141 /* Update the data in CUM to advance over an argument of mode MODE
7142 and data type TYPE. (TYPE is null for libcalls where that information
7143 may not be available.) */
7144
7145 static void
7146 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7147 const_tree type, HOST_WIDE_INT bytes,
7148 HOST_WIDE_INT words)
7149 {
7150 switch (mode)
7151 {
7152 default:
7153 break;
7154
7155 case BLKmode:
7156 if (bytes < 0)
7157 break;
7158 /* FALLTHRU */
7159
7160 case DImode:
7161 case SImode:
7162 case HImode:
7163 case QImode:
7164 cum->words += words;
7165 cum->nregs -= words;
7166 cum->regno += words;
7167
7168 if (cum->nregs <= 0)
7169 {
7170 cum->nregs = 0;
7171 cum->regno = 0;
7172 }
7173 break;
7174
7175 case OImode:
7176 /* OImode shouldn't be used directly. */
7177 gcc_unreachable ();
7178
7179 case DFmode:
7180 if (cum->float_in_sse < 2)
7181 break;
7182 case SFmode:
7183 if (cum->float_in_sse < 1)
7184 break;
7185 /* FALLTHRU */
7186
7187 case V8SFmode:
7188 case V8SImode:
7189 case V64QImode:
7190 case V32HImode:
7191 case V16SImode:
7192 case V8DImode:
7193 case V16SFmode:
7194 case V8DFmode:
7195 case V32QImode:
7196 case V16HImode:
7197 case V4DFmode:
7198 case V4DImode:
7199 case TImode:
7200 case V16QImode:
7201 case V8HImode:
7202 case V4SImode:
7203 case V2DImode:
7204 case V4SFmode:
7205 case V2DFmode:
7206 if (!type || !AGGREGATE_TYPE_P (type))
7207 {
7208 cum->sse_words += words;
7209 cum->sse_nregs -= 1;
7210 cum->sse_regno += 1;
7211 if (cum->sse_nregs <= 0)
7212 {
7213 cum->sse_nregs = 0;
7214 cum->sse_regno = 0;
7215 }
7216 }
7217 break;
7218
7219 case V8QImode:
7220 case V4HImode:
7221 case V2SImode:
7222 case V2SFmode:
7223 case V1TImode:
7224 case V1DImode:
7225 if (!type || !AGGREGATE_TYPE_P (type))
7226 {
7227 cum->mmx_words += words;
7228 cum->mmx_nregs -= 1;
7229 cum->mmx_regno += 1;
7230 if (cum->mmx_nregs <= 0)
7231 {
7232 cum->mmx_nregs = 0;
7233 cum->mmx_regno = 0;
7234 }
7235 }
7236 break;
7237 }
7238 }
7239
7240 static void
7241 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7242 const_tree type, HOST_WIDE_INT words, bool named)
7243 {
7244 int int_nregs, sse_nregs;
7245
7246 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
7247 if (!named && (VALID_AVX512F_REG_MODE (mode)
7248 || VALID_AVX256_REG_MODE (mode)))
7249 return;
7250
7251 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
7252 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
7253 {
7254 cum->nregs -= int_nregs;
7255 cum->sse_nregs -= sse_nregs;
7256 cum->regno += int_nregs;
7257 cum->sse_regno += sse_nregs;
7258 }
7259 else
7260 {
7261 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
7262 cum->words = (cum->words + align - 1) & ~(align - 1);
7263 cum->words += words;
7264 }
7265 }
7266
7267 static void
7268 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
7269 HOST_WIDE_INT words)
7270 {
7271 /* Otherwise, this should be passed indirect. */
7272 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
7273
7274 cum->words += words;
7275 if (cum->nregs > 0)
7276 {
7277 cum->nregs -= 1;
7278 cum->regno += 1;
7279 }
7280 }
7281
7282 /* Update the data in CUM to advance over an argument of mode MODE and
7283 data type TYPE. (TYPE is null for libcalls where that information
7284 may not be available.) */
7285
7286 static void
7287 ix86_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
7288 const_tree type, bool named)
7289 {
7290 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7291 HOST_WIDE_INT bytes, words;
7292
7293 if (mode == BLKmode)
7294 bytes = int_size_in_bytes (type);
7295 else
7296 bytes = GET_MODE_SIZE (mode);
7297 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7298
7299 if (type)
7300 mode = type_natural_mode (type, NULL, false);
7301
7302 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7303 function_arg_advance_ms_64 (cum, bytes, words);
7304 else if (TARGET_64BIT)
7305 function_arg_advance_64 (cum, mode, type, words, named);
7306 else
7307 function_arg_advance_32 (cum, mode, type, bytes, words);
7308 }
7309
7310 /* Define where to put the arguments to a function.
7311 Value is zero to push the argument on the stack,
7312 or a hard register in which to store the argument.
7313
7314 MODE is the argument's machine mode.
7315 TYPE is the data type of the argument (as a tree).
7316 This is null for libcalls where that information may
7317 not be available.
7318 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7319 the preceding args and about the function being called.
7320 NAMED is nonzero if this argument is a named parameter
7321 (otherwise it is an extra parameter matching an ellipsis). */
7322
7323 static rtx
7324 function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7325 enum machine_mode orig_mode, const_tree type,
7326 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
7327 {
7328 /* Avoid the AL settings for the Unix64 ABI. */
7329 if (mode == VOIDmode)
7330 return constm1_rtx;
7331
7332 switch (mode)
7333 {
7334 default:
7335 break;
7336
7337 case BLKmode:
7338 if (bytes < 0)
7339 break;
7340 /* FALLTHRU */
7341 case DImode:
7342 case SImode:
7343 case HImode:
7344 case QImode:
7345 if (words <= cum->nregs)
7346 {
7347 int regno = cum->regno;
7348
7349 /* Fastcall allocates the first two DWORD (SImode) or
7350 smaller arguments to ECX and EDX if it isn't an
7351 aggregate type . */
7352 if (cum->fastcall)
7353 {
7354 if (mode == BLKmode
7355 || mode == DImode
7356 || (type && AGGREGATE_TYPE_P (type)))
7357 break;
7358
7359 /* ECX not EAX is the first allocated register. */
7360 if (regno == AX_REG)
7361 regno = CX_REG;
7362 }
7363 return gen_rtx_REG (mode, regno);
7364 }
7365 break;
7366
7367 case DFmode:
7368 if (cum->float_in_sse < 2)
7369 break;
7370 case SFmode:
7371 if (cum->float_in_sse < 1)
7372 break;
7373 /* FALLTHRU */
7374 case TImode:
7375 /* In 32bit, we pass TImode in xmm registers. */
7376 case V16QImode:
7377 case V8HImode:
7378 case V4SImode:
7379 case V2DImode:
7380 case V4SFmode:
7381 case V2DFmode:
7382 if (!type || !AGGREGATE_TYPE_P (type))
7383 {
7384 if (cum->sse_nregs)
7385 return gen_reg_or_parallel (mode, orig_mode,
7386 cum->sse_regno + FIRST_SSE_REG);
7387 }
7388 break;
7389
7390 case OImode:
7391 case XImode:
7392 /* OImode and XImode shouldn't be used directly. */
7393 gcc_unreachable ();
7394
7395 case V64QImode:
7396 case V32HImode:
7397 case V16SImode:
7398 case V8DImode:
7399 case V16SFmode:
7400 case V8DFmode:
7401 case V8SFmode:
7402 case V8SImode:
7403 case V32QImode:
7404 case V16HImode:
7405 case V4DFmode:
7406 case V4DImode:
7407 if (!type || !AGGREGATE_TYPE_P (type))
7408 {
7409 if (cum->sse_nregs)
7410 return gen_reg_or_parallel (mode, orig_mode,
7411 cum->sse_regno + FIRST_SSE_REG);
7412 }
7413 break;
7414
7415 case V8QImode:
7416 case V4HImode:
7417 case V2SImode:
7418 case V2SFmode:
7419 case V1TImode:
7420 case V1DImode:
7421 if (!type || !AGGREGATE_TYPE_P (type))
7422 {
7423 if (cum->mmx_nregs)
7424 return gen_reg_or_parallel (mode, orig_mode,
7425 cum->mmx_regno + FIRST_MMX_REG);
7426 }
7427 break;
7428 }
7429
7430 return NULL_RTX;
7431 }
7432
7433 static rtx
7434 function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7435 enum machine_mode orig_mode, const_tree type, bool named)
7436 {
7437 /* Handle a hidden AL argument containing number of registers
7438 for varargs x86-64 functions. */
7439 if (mode == VOIDmode)
7440 return GEN_INT (cum->maybe_vaarg
7441 ? (cum->sse_nregs < 0
7442 ? X86_64_SSE_REGPARM_MAX
7443 : cum->sse_regno)
7444 : -1);
7445
7446 switch (mode)
7447 {
7448 default:
7449 break;
7450
7451 case V8SFmode:
7452 case V8SImode:
7453 case V32QImode:
7454 case V16HImode:
7455 case V4DFmode:
7456 case V4DImode:
7457 case V16SFmode:
7458 case V16SImode:
7459 case V64QImode:
7460 case V32HImode:
7461 case V8DFmode:
7462 case V8DImode:
7463 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
7464 if (!named)
7465 return NULL;
7466 break;
7467 }
7468
7469 return construct_container (mode, orig_mode, type, 0, cum->nregs,
7470 cum->sse_nregs,
7471 &x86_64_int_parameter_registers [cum->regno],
7472 cum->sse_regno);
7473 }
7474
7475 static rtx
7476 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7477 enum machine_mode orig_mode, bool named,
7478 HOST_WIDE_INT bytes)
7479 {
7480 unsigned int regno;
7481
7482 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
7483 We use value of -2 to specify that current function call is MSABI. */
7484 if (mode == VOIDmode)
7485 return GEN_INT (-2);
7486
7487 /* If we've run out of registers, it goes on the stack. */
7488 if (cum->nregs == 0)
7489 return NULL_RTX;
7490
7491 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
7492
7493 /* Only floating point modes are passed in anything but integer regs. */
7494 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
7495 {
7496 if (named)
7497 regno = cum->regno + FIRST_SSE_REG;
7498 else
7499 {
7500 rtx t1, t2;
7501
7502 /* Unnamed floating parameters are passed in both the
7503 SSE and integer registers. */
7504 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
7505 t2 = gen_rtx_REG (mode, regno);
7506 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
7507 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
7508 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
7509 }
7510 }
7511 /* Handle aggregated types passed in register. */
7512 if (orig_mode == BLKmode)
7513 {
7514 if (bytes > 0 && bytes <= 8)
7515 mode = (bytes > 4 ? DImode : SImode);
7516 if (mode == BLKmode)
7517 mode = DImode;
7518 }
7519
7520 return gen_reg_or_parallel (mode, orig_mode, regno);
7521 }
7522
7523 /* Return where to put the arguments to a function.
7524 Return zero to push the argument on the stack, or a hard register in which to store the argument.
7525
7526 MODE is the argument's machine mode. TYPE is the data type of the
7527 argument. It is null for libcalls where that information may not be
7528 available. CUM gives information about the preceding args and about
7529 the function being called. NAMED is nonzero if this argument is a
7530 named parameter (otherwise it is an extra parameter matching an
7531 ellipsis). */
7532
7533 static rtx
7534 ix86_function_arg (cumulative_args_t cum_v, enum machine_mode omode,
7535 const_tree type, bool named)
7536 {
7537 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7538 enum machine_mode mode = omode;
7539 HOST_WIDE_INT bytes, words;
7540 rtx arg;
7541
7542 if (mode == BLKmode)
7543 bytes = int_size_in_bytes (type);
7544 else
7545 bytes = GET_MODE_SIZE (mode);
7546 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7547
7548 /* To simplify the code below, represent vector types with a vector mode
7549 even if MMX/SSE are not active. */
7550 if (type && TREE_CODE (type) == VECTOR_TYPE)
7551 mode = type_natural_mode (type, cum, false);
7552
7553 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7554 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
7555 else if (TARGET_64BIT)
7556 arg = function_arg_64 (cum, mode, omode, type, named);
7557 else
7558 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
7559
7560 return arg;
7561 }
7562
7563 /* A C expression that indicates when an argument must be passed by
7564 reference. If nonzero for an argument, a copy of that argument is
7565 made in memory and a pointer to the argument is passed instead of
7566 the argument itself. The pointer is passed in whatever way is
7567 appropriate for passing a pointer to that type. */
7568
7569 static bool
7570 ix86_pass_by_reference (cumulative_args_t cum_v, enum machine_mode mode,
7571 const_tree type, bool)
7572 {
7573 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7574
7575 /* See Windows x64 Software Convention. */
7576 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7577 {
7578 int msize = (int) GET_MODE_SIZE (mode);
7579 if (type)
7580 {
7581 /* Arrays are passed by reference. */
7582 if (TREE_CODE (type) == ARRAY_TYPE)
7583 return true;
7584
7585 if (AGGREGATE_TYPE_P (type))
7586 {
7587 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
7588 are passed by reference. */
7589 msize = int_size_in_bytes (type);
7590 }
7591 }
7592
7593 /* __m128 is passed by reference. */
7594 switch (msize) {
7595 case 1: case 2: case 4: case 8:
7596 break;
7597 default:
7598 return true;
7599 }
7600 }
7601 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
7602 return 1;
7603
7604 return 0;
7605 }
7606
7607 /* Return true when TYPE should be 128bit aligned for 32bit argument
7608 passing ABI. XXX: This function is obsolete and is only used for
7609 checking psABI compatibility with previous versions of GCC. */
7610
7611 static bool
7612 ix86_compat_aligned_value_p (const_tree type)
7613 {
7614 enum machine_mode mode = TYPE_MODE (type);
7615 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
7616 || mode == TDmode
7617 || mode == TFmode
7618 || mode == TCmode)
7619 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
7620 return true;
7621 if (TYPE_ALIGN (type) < 128)
7622 return false;
7623
7624 if (AGGREGATE_TYPE_P (type))
7625 {
7626 /* Walk the aggregates recursively. */
7627 switch (TREE_CODE (type))
7628 {
7629 case RECORD_TYPE:
7630 case UNION_TYPE:
7631 case QUAL_UNION_TYPE:
7632 {
7633 tree field;
7634
7635 /* Walk all the structure fields. */
7636 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7637 {
7638 if (TREE_CODE (field) == FIELD_DECL
7639 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
7640 return true;
7641 }
7642 break;
7643 }
7644
7645 case ARRAY_TYPE:
7646 /* Just for use if some languages passes arrays by value. */
7647 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
7648 return true;
7649 break;
7650
7651 default:
7652 gcc_unreachable ();
7653 }
7654 }
7655 return false;
7656 }
7657
7658 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
7659 XXX: This function is obsolete and is only used for checking psABI
7660 compatibility with previous versions of GCC. */
7661
7662 static unsigned int
7663 ix86_compat_function_arg_boundary (enum machine_mode mode,
7664 const_tree type, unsigned int align)
7665 {
7666 /* In 32bit, only _Decimal128 and __float128 are aligned to their
7667 natural boundaries. */
7668 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
7669 {
7670 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
7671 make an exception for SSE modes since these require 128bit
7672 alignment.
7673
7674 The handling here differs from field_alignment. ICC aligns MMX
7675 arguments to 4 byte boundaries, while structure fields are aligned
7676 to 8 byte boundaries. */
7677 if (!type)
7678 {
7679 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
7680 align = PARM_BOUNDARY;
7681 }
7682 else
7683 {
7684 if (!ix86_compat_aligned_value_p (type))
7685 align = PARM_BOUNDARY;
7686 }
7687 }
7688 if (align > BIGGEST_ALIGNMENT)
7689 align = BIGGEST_ALIGNMENT;
7690 return align;
7691 }
7692
7693 /* Return true when TYPE should be 128bit aligned for 32bit argument
7694 passing ABI. */
7695
7696 static bool
7697 ix86_contains_aligned_value_p (const_tree type)
7698 {
7699 enum machine_mode mode = TYPE_MODE (type);
7700
7701 if (mode == XFmode || mode == XCmode)
7702 return false;
7703
7704 if (TYPE_ALIGN (type) < 128)
7705 return false;
7706
7707 if (AGGREGATE_TYPE_P (type))
7708 {
7709 /* Walk the aggregates recursively. */
7710 switch (TREE_CODE (type))
7711 {
7712 case RECORD_TYPE:
7713 case UNION_TYPE:
7714 case QUAL_UNION_TYPE:
7715 {
7716 tree field;
7717
7718 /* Walk all the structure fields. */
7719 for (field = TYPE_FIELDS (type);
7720 field;
7721 field = DECL_CHAIN (field))
7722 {
7723 if (TREE_CODE (field) == FIELD_DECL
7724 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
7725 return true;
7726 }
7727 break;
7728 }
7729
7730 case ARRAY_TYPE:
7731 /* Just for use if some languages passes arrays by value. */
7732 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
7733 return true;
7734 break;
7735
7736 default:
7737 gcc_unreachable ();
7738 }
7739 }
7740 else
7741 return TYPE_ALIGN (type) >= 128;
7742
7743 return false;
7744 }
7745
7746 /* Gives the alignment boundary, in bits, of an argument with the
7747 specified mode and type. */
7748
7749 static unsigned int
7750 ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
7751 {
7752 unsigned int align;
7753 if (type)
7754 {
7755 /* Since the main variant type is used for call, we convert it to
7756 the main variant type. */
7757 type = TYPE_MAIN_VARIANT (type);
7758 align = TYPE_ALIGN (type);
7759 }
7760 else
7761 align = GET_MODE_ALIGNMENT (mode);
7762 if (align < PARM_BOUNDARY)
7763 align = PARM_BOUNDARY;
7764 else
7765 {
7766 static bool warned;
7767 unsigned int saved_align = align;
7768
7769 if (!TARGET_64BIT)
7770 {
7771 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7772 if (!type)
7773 {
7774 if (mode == XFmode || mode == XCmode)
7775 align = PARM_BOUNDARY;
7776 }
7777 else if (!ix86_contains_aligned_value_p (type))
7778 align = PARM_BOUNDARY;
7779
7780 if (align < 128)
7781 align = PARM_BOUNDARY;
7782 }
7783
7784 if (warn_psabi
7785 && !warned
7786 && align != ix86_compat_function_arg_boundary (mode, type,
7787 saved_align))
7788 {
7789 warned = true;
7790 inform (input_location,
7791 "The ABI for passing parameters with %d-byte"
7792 " alignment has changed in GCC 4.6",
7793 align / BITS_PER_UNIT);
7794 }
7795 }
7796
7797 return align;
7798 }
7799
7800 /* Return true if N is a possible register number of function value. */
7801
7802 static bool
7803 ix86_function_value_regno_p (const unsigned int regno)
7804 {
7805 switch (regno)
7806 {
7807 case AX_REG:
7808 return true;
7809 case DX_REG:
7810 return (!TARGET_64BIT || ix86_abi != MS_ABI);
7811 case DI_REG:
7812 case SI_REG:
7813 return TARGET_64BIT && ix86_abi != MS_ABI;
7814
7815 /* Complex values are returned in %st(0)/%st(1) pair. */
7816 case ST0_REG:
7817 case ST1_REG:
7818 /* TODO: The function should depend on current function ABI but
7819 builtins.c would need updating then. Therefore we use the
7820 default ABI. */
7821 if (TARGET_64BIT && ix86_abi == MS_ABI)
7822 return false;
7823 return TARGET_FLOAT_RETURNS_IN_80387;
7824
7825 /* Complex values are returned in %xmm0/%xmm1 pair. */
7826 case XMM0_REG:
7827 case XMM1_REG:
7828 return TARGET_SSE;
7829
7830 case MM0_REG:
7831 if (TARGET_MACHO || TARGET_64BIT)
7832 return false;
7833 return TARGET_MMX;
7834 }
7835
7836 return false;
7837 }
7838
7839 /* Define how to find the value returned by a function.
7840 VALTYPE is the data type of the value (as a tree).
7841 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7842 otherwise, FUNC is 0. */
7843
7844 static rtx
7845 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
7846 const_tree fntype, const_tree fn)
7847 {
7848 unsigned int regno;
7849
7850 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7851 we normally prevent this case when mmx is not available. However
7852 some ABIs may require the result to be returned like DImode. */
7853 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7854 regno = FIRST_MMX_REG;
7855
7856 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7857 we prevent this case when sse is not available. However some ABIs
7858 may require the result to be returned like integer TImode. */
7859 else if (mode == TImode
7860 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7861 regno = FIRST_SSE_REG;
7862
7863 /* 32-byte vector modes in %ymm0. */
7864 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
7865 regno = FIRST_SSE_REG;
7866
7867 /* 64-byte vector modes in %zmm0. */
7868 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
7869 regno = FIRST_SSE_REG;
7870
7871 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7872 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
7873 regno = FIRST_FLOAT_REG;
7874 else
7875 /* Most things go in %eax. */
7876 regno = AX_REG;
7877
7878 /* Override FP return register with %xmm0 for local functions when
7879 SSE math is enabled or for functions with sseregparm attribute. */
7880 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
7881 {
7882 int sse_level = ix86_function_sseregparm (fntype, fn, false);
7883 if ((sse_level >= 1 && mode == SFmode)
7884 || (sse_level == 2 && mode == DFmode))
7885 regno = FIRST_SSE_REG;
7886 }
7887
7888 /* OImode shouldn't be used directly. */
7889 gcc_assert (mode != OImode);
7890
7891 return gen_rtx_REG (orig_mode, regno);
7892 }
7893
7894 static rtx
7895 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
7896 const_tree valtype)
7897 {
7898 rtx ret;
7899
7900 /* Handle libcalls, which don't provide a type node. */
7901 if (valtype == NULL)
7902 {
7903 unsigned int regno;
7904
7905 switch (mode)
7906 {
7907 case SFmode:
7908 case SCmode:
7909 case DFmode:
7910 case DCmode:
7911 case TFmode:
7912 case SDmode:
7913 case DDmode:
7914 case TDmode:
7915 regno = FIRST_SSE_REG;
7916 break;
7917 case XFmode:
7918 case XCmode:
7919 regno = FIRST_FLOAT_REG;
7920 break;
7921 case TCmode:
7922 return NULL;
7923 default:
7924 regno = AX_REG;
7925 }
7926
7927 return gen_rtx_REG (mode, regno);
7928 }
7929 else if (POINTER_TYPE_P (valtype))
7930 {
7931 /* Pointers are always returned in word_mode. */
7932 mode = word_mode;
7933 }
7934
7935 ret = construct_container (mode, orig_mode, valtype, 1,
7936 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
7937 x86_64_int_return_registers, 0);
7938
7939 /* For zero sized structures, construct_container returns NULL, but we
7940 need to keep rest of compiler happy by returning meaningful value. */
7941 if (!ret)
7942 ret = gen_rtx_REG (orig_mode, AX_REG);
7943
7944 return ret;
7945 }
7946
7947 static rtx
7948 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode,
7949 const_tree valtype)
7950 {
7951 unsigned int regno = AX_REG;
7952
7953 if (TARGET_SSE)
7954 {
7955 switch (GET_MODE_SIZE (mode))
7956 {
7957 case 16:
7958 if (valtype != NULL_TREE
7959 && !VECTOR_INTEGER_TYPE_P (valtype)
7960 && !VECTOR_INTEGER_TYPE_P (valtype)
7961 && !INTEGRAL_TYPE_P (valtype)
7962 && !VECTOR_FLOAT_TYPE_P (valtype))
7963 break;
7964 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7965 && !COMPLEX_MODE_P (mode))
7966 regno = FIRST_SSE_REG;
7967 break;
7968 case 8:
7969 case 4:
7970 if (mode == SFmode || mode == DFmode)
7971 regno = FIRST_SSE_REG;
7972 break;
7973 default:
7974 break;
7975 }
7976 }
7977 return gen_rtx_REG (orig_mode, regno);
7978 }
7979
7980 static rtx
7981 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
7982 enum machine_mode orig_mode, enum machine_mode mode)
7983 {
7984 const_tree fn, fntype;
7985
7986 fn = NULL_TREE;
7987 if (fntype_or_decl && DECL_P (fntype_or_decl))
7988 fn = fntype_or_decl;
7989 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
7990
7991 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
7992 return function_value_ms_64 (orig_mode, mode, valtype);
7993 else if (TARGET_64BIT)
7994 return function_value_64 (orig_mode, mode, valtype);
7995 else
7996 return function_value_32 (orig_mode, mode, fntype, fn);
7997 }
7998
7999 static rtx
8000 ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool)
8001 {
8002 enum machine_mode mode, orig_mode;
8003
8004 orig_mode = TYPE_MODE (valtype);
8005 mode = type_natural_mode (valtype, NULL, true);
8006 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
8007 }
8008
8009 /* Pointer function arguments and return values are promoted to
8010 word_mode. */
8011
8012 static enum machine_mode
8013 ix86_promote_function_mode (const_tree type, enum machine_mode mode,
8014 int *punsignedp, const_tree fntype,
8015 int for_return)
8016 {
8017 if (type != NULL_TREE && POINTER_TYPE_P (type))
8018 {
8019 *punsignedp = POINTERS_EXTEND_UNSIGNED;
8020 return word_mode;
8021 }
8022 return default_promote_function_mode (type, mode, punsignedp, fntype,
8023 for_return);
8024 }
8025
8026 /* Return true if a structure, union or array with MODE containing FIELD
8027 should be accessed using BLKmode. */
8028
8029 static bool
8030 ix86_member_type_forces_blk (const_tree field, enum machine_mode mode)
8031 {
8032 /* Union with XFmode must be in BLKmode. */
8033 return (mode == XFmode
8034 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
8035 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
8036 }
8037
8038 rtx
8039 ix86_libcall_value (enum machine_mode mode)
8040 {
8041 return ix86_function_value_1 (NULL, NULL, mode, mode);
8042 }
8043
8044 /* Return true iff type is returned in memory. */
8045
8046 static bool
8047 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
8048 {
8049 #ifdef SUBTARGET_RETURN_IN_MEMORY
8050 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
8051 #else
8052 const enum machine_mode mode = type_natural_mode (type, NULL, true);
8053 HOST_WIDE_INT size;
8054
8055 if (TARGET_64BIT)
8056 {
8057 if (ix86_function_type_abi (fntype) == MS_ABI)
8058 {
8059 size = int_size_in_bytes (type);
8060
8061 /* __m128 is returned in xmm0. */
8062 if ((!type || VECTOR_INTEGER_TYPE_P (type)
8063 || INTEGRAL_TYPE_P (type)
8064 || VECTOR_FLOAT_TYPE_P (type))
8065 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8066 && !COMPLEX_MODE_P (mode)
8067 && (GET_MODE_SIZE (mode) == 16 || size == 16))
8068 return false;
8069
8070 /* Otherwise, the size must be exactly in [1248]. */
8071 return size != 1 && size != 2 && size != 4 && size != 8;
8072 }
8073 else
8074 {
8075 int needed_intregs, needed_sseregs;
8076
8077 return examine_argument (mode, type, 1,
8078 &needed_intregs, &needed_sseregs);
8079 }
8080 }
8081 else
8082 {
8083 if (mode == BLKmode)
8084 return true;
8085
8086 size = int_size_in_bytes (type);
8087
8088 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
8089 return false;
8090
8091 if (VECTOR_MODE_P (mode) || mode == TImode)
8092 {
8093 /* User-created vectors small enough to fit in EAX. */
8094 if (size < 8)
8095 return false;
8096
8097 /* Unless ABI prescibes otherwise,
8098 MMX/3dNow values are returned in MM0 if available. */
8099
8100 if (size == 8)
8101 return TARGET_VECT8_RETURNS || !TARGET_MMX;
8102
8103 /* SSE values are returned in XMM0 if available. */
8104 if (size == 16)
8105 return !TARGET_SSE;
8106
8107 /* AVX values are returned in YMM0 if available. */
8108 if (size == 32)
8109 return !TARGET_AVX;
8110
8111 /* AVX512F values are returned in ZMM0 if available. */
8112 if (size == 64)
8113 return !TARGET_AVX512F;
8114 }
8115
8116 if (mode == XFmode)
8117 return false;
8118
8119 if (size > 12)
8120 return true;
8121
8122 /* OImode shouldn't be used directly. */
8123 gcc_assert (mode != OImode);
8124
8125 return false;
8126 }
8127 #endif
8128 }
8129
8130 \f
8131 /* Create the va_list data type. */
8132
8133 /* Returns the calling convention specific va_list date type.
8134 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
8135
8136 static tree
8137 ix86_build_builtin_va_list_abi (enum calling_abi abi)
8138 {
8139 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
8140
8141 /* For i386 we use plain pointer to argument area. */
8142 if (!TARGET_64BIT || abi == MS_ABI)
8143 return build_pointer_type (char_type_node);
8144
8145 record = lang_hooks.types.make_type (RECORD_TYPE);
8146 type_decl = build_decl (BUILTINS_LOCATION,
8147 TYPE_DECL, get_identifier ("__va_list_tag"), record);
8148
8149 f_gpr = build_decl (BUILTINS_LOCATION,
8150 FIELD_DECL, get_identifier ("gp_offset"),
8151 unsigned_type_node);
8152 f_fpr = build_decl (BUILTINS_LOCATION,
8153 FIELD_DECL, get_identifier ("fp_offset"),
8154 unsigned_type_node);
8155 f_ovf = build_decl (BUILTINS_LOCATION,
8156 FIELD_DECL, get_identifier ("overflow_arg_area"),
8157 ptr_type_node);
8158 f_sav = build_decl (BUILTINS_LOCATION,
8159 FIELD_DECL, get_identifier ("reg_save_area"),
8160 ptr_type_node);
8161
8162 va_list_gpr_counter_field = f_gpr;
8163 va_list_fpr_counter_field = f_fpr;
8164
8165 DECL_FIELD_CONTEXT (f_gpr) = record;
8166 DECL_FIELD_CONTEXT (f_fpr) = record;
8167 DECL_FIELD_CONTEXT (f_ovf) = record;
8168 DECL_FIELD_CONTEXT (f_sav) = record;
8169
8170 TYPE_STUB_DECL (record) = type_decl;
8171 TYPE_NAME (record) = type_decl;
8172 TYPE_FIELDS (record) = f_gpr;
8173 DECL_CHAIN (f_gpr) = f_fpr;
8174 DECL_CHAIN (f_fpr) = f_ovf;
8175 DECL_CHAIN (f_ovf) = f_sav;
8176
8177 layout_type (record);
8178
8179 /* The correct type is an array type of one element. */
8180 return build_array_type (record, build_index_type (size_zero_node));
8181 }
8182
8183 /* Setup the builtin va_list data type and for 64-bit the additional
8184 calling convention specific va_list data types. */
8185
8186 static tree
8187 ix86_build_builtin_va_list (void)
8188 {
8189 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
8190
8191 /* Initialize abi specific va_list builtin types. */
8192 if (TARGET_64BIT)
8193 {
8194 tree t;
8195 if (ix86_abi == MS_ABI)
8196 {
8197 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
8198 if (TREE_CODE (t) != RECORD_TYPE)
8199 t = build_variant_type_copy (t);
8200 sysv_va_list_type_node = t;
8201 }
8202 else
8203 {
8204 t = ret;
8205 if (TREE_CODE (t) != RECORD_TYPE)
8206 t = build_variant_type_copy (t);
8207 sysv_va_list_type_node = t;
8208 }
8209 if (ix86_abi != MS_ABI)
8210 {
8211 t = ix86_build_builtin_va_list_abi (MS_ABI);
8212 if (TREE_CODE (t) != RECORD_TYPE)
8213 t = build_variant_type_copy (t);
8214 ms_va_list_type_node = t;
8215 }
8216 else
8217 {
8218 t = ret;
8219 if (TREE_CODE (t) != RECORD_TYPE)
8220 t = build_variant_type_copy (t);
8221 ms_va_list_type_node = t;
8222 }
8223 }
8224
8225 return ret;
8226 }
8227
8228 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
8229
8230 static void
8231 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
8232 {
8233 rtx save_area, mem;
8234 alias_set_type set;
8235 int i, max;
8236
8237 /* GPR size of varargs save area. */
8238 if (cfun->va_list_gpr_size)
8239 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
8240 else
8241 ix86_varargs_gpr_size = 0;
8242
8243 /* FPR size of varargs save area. We don't need it if we don't pass
8244 anything in SSE registers. */
8245 if (TARGET_SSE && cfun->va_list_fpr_size)
8246 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
8247 else
8248 ix86_varargs_fpr_size = 0;
8249
8250 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
8251 return;
8252
8253 save_area = frame_pointer_rtx;
8254 set = get_varargs_alias_set ();
8255
8256 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
8257 if (max > X86_64_REGPARM_MAX)
8258 max = X86_64_REGPARM_MAX;
8259
8260 for (i = cum->regno; i < max; i++)
8261 {
8262 mem = gen_rtx_MEM (word_mode,
8263 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
8264 MEM_NOTRAP_P (mem) = 1;
8265 set_mem_alias_set (mem, set);
8266 emit_move_insn (mem,
8267 gen_rtx_REG (word_mode,
8268 x86_64_int_parameter_registers[i]));
8269 }
8270
8271 if (ix86_varargs_fpr_size)
8272 {
8273 enum machine_mode smode;
8274 rtx_code_label *label;
8275 rtx test;
8276
8277 /* Now emit code to save SSE registers. The AX parameter contains number
8278 of SSE parameter registers used to call this function, though all we
8279 actually check here is the zero/non-zero status. */
8280
8281 label = gen_label_rtx ();
8282 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
8283 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
8284 label));
8285
8286 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
8287 we used movdqa (i.e. TImode) instead? Perhaps even better would
8288 be if we could determine the real mode of the data, via a hook
8289 into pass_stdarg. Ignore all that for now. */
8290 smode = V4SFmode;
8291 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
8292 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
8293
8294 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
8295 if (max > X86_64_SSE_REGPARM_MAX)
8296 max = X86_64_SSE_REGPARM_MAX;
8297
8298 for (i = cum->sse_regno; i < max; ++i)
8299 {
8300 mem = plus_constant (Pmode, save_area,
8301 i * 16 + ix86_varargs_gpr_size);
8302 mem = gen_rtx_MEM (smode, mem);
8303 MEM_NOTRAP_P (mem) = 1;
8304 set_mem_alias_set (mem, set);
8305 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
8306
8307 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
8308 }
8309
8310 emit_label (label);
8311 }
8312 }
8313
8314 static void
8315 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
8316 {
8317 alias_set_type set = get_varargs_alias_set ();
8318 int i;
8319
8320 /* Reset to zero, as there might be a sysv vaarg used
8321 before. */
8322 ix86_varargs_gpr_size = 0;
8323 ix86_varargs_fpr_size = 0;
8324
8325 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
8326 {
8327 rtx reg, mem;
8328
8329 mem = gen_rtx_MEM (Pmode,
8330 plus_constant (Pmode, virtual_incoming_args_rtx,
8331 i * UNITS_PER_WORD));
8332 MEM_NOTRAP_P (mem) = 1;
8333 set_mem_alias_set (mem, set);
8334
8335 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
8336 emit_move_insn (mem, reg);
8337 }
8338 }
8339
8340 static void
8341 ix86_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
8342 tree type, int *, int no_rtl)
8343 {
8344 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8345 CUMULATIVE_ARGS next_cum;
8346 tree fntype;
8347
8348 /* This argument doesn't appear to be used anymore. Which is good,
8349 because the old code here didn't suppress rtl generation. */
8350 gcc_assert (!no_rtl);
8351
8352 if (!TARGET_64BIT)
8353 return;
8354
8355 fntype = TREE_TYPE (current_function_decl);
8356
8357 /* For varargs, we do not want to skip the dummy va_dcl argument.
8358 For stdargs, we do want to skip the last named argument. */
8359 next_cum = *cum;
8360 if (stdarg_p (fntype))
8361 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
8362 true);
8363
8364 if (cum->call_abi == MS_ABI)
8365 setup_incoming_varargs_ms_64 (&next_cum);
8366 else
8367 setup_incoming_varargs_64 (&next_cum);
8368 }
8369
8370 /* Checks if TYPE is of kind va_list char *. */
8371
8372 static bool
8373 is_va_list_char_pointer (tree type)
8374 {
8375 tree canonic;
8376
8377 /* For 32-bit it is always true. */
8378 if (!TARGET_64BIT)
8379 return true;
8380 canonic = ix86_canonical_va_list_type (type);
8381 return (canonic == ms_va_list_type_node
8382 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
8383 }
8384
8385 /* Implement va_start. */
8386
8387 static void
8388 ix86_va_start (tree valist, rtx nextarg)
8389 {
8390 HOST_WIDE_INT words, n_gpr, n_fpr;
8391 tree f_gpr, f_fpr, f_ovf, f_sav;
8392 tree gpr, fpr, ovf, sav, t;
8393 tree type;
8394 rtx ovf_rtx;
8395
8396 if (flag_split_stack
8397 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8398 {
8399 unsigned int scratch_regno;
8400
8401 /* When we are splitting the stack, we can't refer to the stack
8402 arguments using internal_arg_pointer, because they may be on
8403 the old stack. The split stack prologue will arrange to
8404 leave a pointer to the old stack arguments in a scratch
8405 register, which we here copy to a pseudo-register. The split
8406 stack prologue can't set the pseudo-register directly because
8407 it (the prologue) runs before any registers have been saved. */
8408
8409 scratch_regno = split_stack_prologue_scratch_regno ();
8410 if (scratch_regno != INVALID_REGNUM)
8411 {
8412 rtx reg;
8413 rtx_insn *seq;
8414
8415 reg = gen_reg_rtx (Pmode);
8416 cfun->machine->split_stack_varargs_pointer = reg;
8417
8418 start_sequence ();
8419 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
8420 seq = get_insns ();
8421 end_sequence ();
8422
8423 push_topmost_sequence ();
8424 emit_insn_after (seq, entry_of_function ());
8425 pop_topmost_sequence ();
8426 }
8427 }
8428
8429 /* Only 64bit target needs something special. */
8430 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8431 {
8432 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8433 std_expand_builtin_va_start (valist, nextarg);
8434 else
8435 {
8436 rtx va_r, next;
8437
8438 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
8439 next = expand_binop (ptr_mode, add_optab,
8440 cfun->machine->split_stack_varargs_pointer,
8441 crtl->args.arg_offset_rtx,
8442 NULL_RTX, 0, OPTAB_LIB_WIDEN);
8443 convert_move (va_r, next, 0);
8444 }
8445 return;
8446 }
8447
8448 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8449 f_fpr = DECL_CHAIN (f_gpr);
8450 f_ovf = DECL_CHAIN (f_fpr);
8451 f_sav = DECL_CHAIN (f_ovf);
8452
8453 valist = build_simple_mem_ref (valist);
8454 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
8455 /* The following should be folded into the MEM_REF offset. */
8456 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
8457 f_gpr, NULL_TREE);
8458 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
8459 f_fpr, NULL_TREE);
8460 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
8461 f_ovf, NULL_TREE);
8462 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
8463 f_sav, NULL_TREE);
8464
8465 /* Count number of gp and fp argument registers used. */
8466 words = crtl->args.info.words;
8467 n_gpr = crtl->args.info.regno;
8468 n_fpr = crtl->args.info.sse_regno;
8469
8470 if (cfun->va_list_gpr_size)
8471 {
8472 type = TREE_TYPE (gpr);
8473 t = build2 (MODIFY_EXPR, type,
8474 gpr, build_int_cst (type, n_gpr * 8));
8475 TREE_SIDE_EFFECTS (t) = 1;
8476 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8477 }
8478
8479 if (TARGET_SSE && cfun->va_list_fpr_size)
8480 {
8481 type = TREE_TYPE (fpr);
8482 t = build2 (MODIFY_EXPR, type, fpr,
8483 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
8484 TREE_SIDE_EFFECTS (t) = 1;
8485 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8486 }
8487
8488 /* Find the overflow area. */
8489 type = TREE_TYPE (ovf);
8490 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8491 ovf_rtx = crtl->args.internal_arg_pointer;
8492 else
8493 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
8494 t = make_tree (type, ovf_rtx);
8495 if (words != 0)
8496 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
8497 t = build2 (MODIFY_EXPR, type, ovf, t);
8498 TREE_SIDE_EFFECTS (t) = 1;
8499 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8500
8501 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
8502 {
8503 /* Find the register save area.
8504 Prologue of the function save it right above stack frame. */
8505 type = TREE_TYPE (sav);
8506 t = make_tree (type, frame_pointer_rtx);
8507 if (!ix86_varargs_gpr_size)
8508 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
8509 t = build2 (MODIFY_EXPR, type, sav, t);
8510 TREE_SIDE_EFFECTS (t) = 1;
8511 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8512 }
8513 }
8514
8515 /* Implement va_arg. */
8516
8517 static tree
8518 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
8519 gimple_seq *post_p)
8520 {
8521 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
8522 tree f_gpr, f_fpr, f_ovf, f_sav;
8523 tree gpr, fpr, ovf, sav, t;
8524 int size, rsize;
8525 tree lab_false, lab_over = NULL_TREE;
8526 tree addr, t2;
8527 rtx container;
8528 int indirect_p = 0;
8529 tree ptrtype;
8530 enum machine_mode nat_mode;
8531 unsigned int arg_boundary;
8532
8533 /* Only 64bit target needs something special. */
8534 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8535 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
8536
8537 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8538 f_fpr = DECL_CHAIN (f_gpr);
8539 f_ovf = DECL_CHAIN (f_fpr);
8540 f_sav = DECL_CHAIN (f_ovf);
8541
8542 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
8543 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
8544 valist = build_va_arg_indirect_ref (valist);
8545 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
8546 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
8547 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
8548
8549 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
8550 if (indirect_p)
8551 type = build_pointer_type (type);
8552 size = int_size_in_bytes (type);
8553 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
8554
8555 nat_mode = type_natural_mode (type, NULL, false);
8556 switch (nat_mode)
8557 {
8558 case V8SFmode:
8559 case V8SImode:
8560 case V32QImode:
8561 case V16HImode:
8562 case V4DFmode:
8563 case V4DImode:
8564 case V16SFmode:
8565 case V16SImode:
8566 case V64QImode:
8567 case V32HImode:
8568 case V8DFmode:
8569 case V8DImode:
8570 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
8571 if (!TARGET_64BIT_MS_ABI)
8572 {
8573 container = NULL;
8574 break;
8575 }
8576
8577 default:
8578 container = construct_container (nat_mode, TYPE_MODE (type),
8579 type, 0, X86_64_REGPARM_MAX,
8580 X86_64_SSE_REGPARM_MAX, intreg,
8581 0);
8582 break;
8583 }
8584
8585 /* Pull the value out of the saved registers. */
8586
8587 addr = create_tmp_var (ptr_type_node, "addr");
8588
8589 if (container)
8590 {
8591 int needed_intregs, needed_sseregs;
8592 bool need_temp;
8593 tree int_addr, sse_addr;
8594
8595 lab_false = create_artificial_label (UNKNOWN_LOCATION);
8596 lab_over = create_artificial_label (UNKNOWN_LOCATION);
8597
8598 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
8599
8600 need_temp = (!REG_P (container)
8601 && ((needed_intregs && TYPE_ALIGN (type) > 64)
8602 || TYPE_ALIGN (type) > 128));
8603
8604 /* In case we are passing structure, verify that it is consecutive block
8605 on the register save area. If not we need to do moves. */
8606 if (!need_temp && !REG_P (container))
8607 {
8608 /* Verify that all registers are strictly consecutive */
8609 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
8610 {
8611 int i;
8612
8613 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8614 {
8615 rtx slot = XVECEXP (container, 0, i);
8616 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
8617 || INTVAL (XEXP (slot, 1)) != i * 16)
8618 need_temp = 1;
8619 }
8620 }
8621 else
8622 {
8623 int i;
8624
8625 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8626 {
8627 rtx slot = XVECEXP (container, 0, i);
8628 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
8629 || INTVAL (XEXP (slot, 1)) != i * 8)
8630 need_temp = 1;
8631 }
8632 }
8633 }
8634 if (!need_temp)
8635 {
8636 int_addr = addr;
8637 sse_addr = addr;
8638 }
8639 else
8640 {
8641 int_addr = create_tmp_var (ptr_type_node, "int_addr");
8642 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
8643 }
8644
8645 /* First ensure that we fit completely in registers. */
8646 if (needed_intregs)
8647 {
8648 t = build_int_cst (TREE_TYPE (gpr),
8649 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
8650 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
8651 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8652 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8653 gimplify_and_add (t, pre_p);
8654 }
8655 if (needed_sseregs)
8656 {
8657 t = build_int_cst (TREE_TYPE (fpr),
8658 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
8659 + X86_64_REGPARM_MAX * 8);
8660 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
8661 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8662 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8663 gimplify_and_add (t, pre_p);
8664 }
8665
8666 /* Compute index to start of area used for integer regs. */
8667 if (needed_intregs)
8668 {
8669 /* int_addr = gpr + sav; */
8670 t = fold_build_pointer_plus (sav, gpr);
8671 gimplify_assign (int_addr, t, pre_p);
8672 }
8673 if (needed_sseregs)
8674 {
8675 /* sse_addr = fpr + sav; */
8676 t = fold_build_pointer_plus (sav, fpr);
8677 gimplify_assign (sse_addr, t, pre_p);
8678 }
8679 if (need_temp)
8680 {
8681 int i, prev_size = 0;
8682 tree temp = create_tmp_var (type, "va_arg_tmp");
8683
8684 /* addr = &temp; */
8685 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
8686 gimplify_assign (addr, t, pre_p);
8687
8688 for (i = 0; i < XVECLEN (container, 0); i++)
8689 {
8690 rtx slot = XVECEXP (container, 0, i);
8691 rtx reg = XEXP (slot, 0);
8692 enum machine_mode mode = GET_MODE (reg);
8693 tree piece_type;
8694 tree addr_type;
8695 tree daddr_type;
8696 tree src_addr, src;
8697 int src_offset;
8698 tree dest_addr, dest;
8699 int cur_size = GET_MODE_SIZE (mode);
8700
8701 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
8702 prev_size = INTVAL (XEXP (slot, 1));
8703 if (prev_size + cur_size > size)
8704 {
8705 cur_size = size - prev_size;
8706 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
8707 if (mode == BLKmode)
8708 mode = QImode;
8709 }
8710 piece_type = lang_hooks.types.type_for_mode (mode, 1);
8711 if (mode == GET_MODE (reg))
8712 addr_type = build_pointer_type (piece_type);
8713 else
8714 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8715 true);
8716 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8717 true);
8718
8719 if (SSE_REGNO_P (REGNO (reg)))
8720 {
8721 src_addr = sse_addr;
8722 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
8723 }
8724 else
8725 {
8726 src_addr = int_addr;
8727 src_offset = REGNO (reg) * 8;
8728 }
8729 src_addr = fold_convert (addr_type, src_addr);
8730 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
8731
8732 dest_addr = fold_convert (daddr_type, addr);
8733 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
8734 if (cur_size == GET_MODE_SIZE (mode))
8735 {
8736 src = build_va_arg_indirect_ref (src_addr);
8737 dest = build_va_arg_indirect_ref (dest_addr);
8738
8739 gimplify_assign (dest, src, pre_p);
8740 }
8741 else
8742 {
8743 tree copy
8744 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
8745 3, dest_addr, src_addr,
8746 size_int (cur_size));
8747 gimplify_and_add (copy, pre_p);
8748 }
8749 prev_size += cur_size;
8750 }
8751 }
8752
8753 if (needed_intregs)
8754 {
8755 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
8756 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
8757 gimplify_assign (gpr, t, pre_p);
8758 }
8759
8760 if (needed_sseregs)
8761 {
8762 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
8763 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
8764 gimplify_assign (fpr, t, pre_p);
8765 }
8766
8767 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
8768
8769 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
8770 }
8771
8772 /* ... otherwise out of the overflow area. */
8773
8774 /* When we align parameter on stack for caller, if the parameter
8775 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
8776 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
8777 here with caller. */
8778 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
8779 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
8780 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
8781
8782 /* Care for on-stack alignment if needed. */
8783 if (arg_boundary <= 64 || size == 0)
8784 t = ovf;
8785 else
8786 {
8787 HOST_WIDE_INT align = arg_boundary / 8;
8788 t = fold_build_pointer_plus_hwi (ovf, align - 1);
8789 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
8790 build_int_cst (TREE_TYPE (t), -align));
8791 }
8792
8793 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
8794 gimplify_assign (addr, t, pre_p);
8795
8796 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
8797 gimplify_assign (unshare_expr (ovf), t, pre_p);
8798
8799 if (container)
8800 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
8801
8802 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
8803 addr = fold_convert (ptrtype, addr);
8804
8805 if (indirect_p)
8806 addr = build_va_arg_indirect_ref (addr);
8807 return build_va_arg_indirect_ref (addr);
8808 }
8809 \f
8810 /* Return true if OPNUM's MEM should be matched
8811 in movabs* patterns. */
8812
8813 bool
8814 ix86_check_movabs (rtx insn, int opnum)
8815 {
8816 rtx set, mem;
8817
8818 set = PATTERN (insn);
8819 if (GET_CODE (set) == PARALLEL)
8820 set = XVECEXP (set, 0, 0);
8821 gcc_assert (GET_CODE (set) == SET);
8822 mem = XEXP (set, opnum);
8823 while (GET_CODE (mem) == SUBREG)
8824 mem = SUBREG_REG (mem);
8825 gcc_assert (MEM_P (mem));
8826 return volatile_ok || !MEM_VOLATILE_P (mem);
8827 }
8828 \f
8829 /* Initialize the table of extra 80387 mathematical constants. */
8830
8831 static void
8832 init_ext_80387_constants (void)
8833 {
8834 static const char * cst[5] =
8835 {
8836 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8837 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8838 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8839 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8840 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8841 };
8842 int i;
8843
8844 for (i = 0; i < 5; i++)
8845 {
8846 real_from_string (&ext_80387_constants_table[i], cst[i]);
8847 /* Ensure each constant is rounded to XFmode precision. */
8848 real_convert (&ext_80387_constants_table[i],
8849 XFmode, &ext_80387_constants_table[i]);
8850 }
8851
8852 ext_80387_constants_init = 1;
8853 }
8854
8855 /* Return non-zero if the constant is something that
8856 can be loaded with a special instruction. */
8857
8858 int
8859 standard_80387_constant_p (rtx x)
8860 {
8861 enum machine_mode mode = GET_MODE (x);
8862
8863 REAL_VALUE_TYPE r;
8864
8865 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
8866 return -1;
8867
8868 if (x == CONST0_RTX (mode))
8869 return 1;
8870 if (x == CONST1_RTX (mode))
8871 return 2;
8872
8873 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8874
8875 /* For XFmode constants, try to find a special 80387 instruction when
8876 optimizing for size or on those CPUs that benefit from them. */
8877 if (mode == XFmode
8878 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
8879 {
8880 int i;
8881
8882 if (! ext_80387_constants_init)
8883 init_ext_80387_constants ();
8884
8885 for (i = 0; i < 5; i++)
8886 if (real_identical (&r, &ext_80387_constants_table[i]))
8887 return i + 3;
8888 }
8889
8890 /* Load of the constant -0.0 or -1.0 will be split as
8891 fldz;fchs or fld1;fchs sequence. */
8892 if (real_isnegzero (&r))
8893 return 8;
8894 if (real_identical (&r, &dconstm1))
8895 return 9;
8896
8897 return 0;
8898 }
8899
8900 /* Return the opcode of the special instruction to be used to load
8901 the constant X. */
8902
8903 const char *
8904 standard_80387_constant_opcode (rtx x)
8905 {
8906 switch (standard_80387_constant_p (x))
8907 {
8908 case 1:
8909 return "fldz";
8910 case 2:
8911 return "fld1";
8912 case 3:
8913 return "fldlg2";
8914 case 4:
8915 return "fldln2";
8916 case 5:
8917 return "fldl2e";
8918 case 6:
8919 return "fldl2t";
8920 case 7:
8921 return "fldpi";
8922 case 8:
8923 case 9:
8924 return "#";
8925 default:
8926 gcc_unreachable ();
8927 }
8928 }
8929
8930 /* Return the CONST_DOUBLE representing the 80387 constant that is
8931 loaded by the specified special instruction. The argument IDX
8932 matches the return value from standard_80387_constant_p. */
8933
8934 rtx
8935 standard_80387_constant_rtx (int idx)
8936 {
8937 int i;
8938
8939 if (! ext_80387_constants_init)
8940 init_ext_80387_constants ();
8941
8942 switch (idx)
8943 {
8944 case 3:
8945 case 4:
8946 case 5:
8947 case 6:
8948 case 7:
8949 i = idx - 3;
8950 break;
8951
8952 default:
8953 gcc_unreachable ();
8954 }
8955
8956 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
8957 XFmode);
8958 }
8959
8960 /* Return 1 if X is all 0s and 2 if x is all 1s
8961 in supported SSE/AVX vector mode. */
8962
8963 int
8964 standard_sse_constant_p (rtx x)
8965 {
8966 enum machine_mode mode = GET_MODE (x);
8967
8968 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
8969 return 1;
8970 if (vector_all_ones_operand (x, mode))
8971 switch (mode)
8972 {
8973 case V16QImode:
8974 case V8HImode:
8975 case V4SImode:
8976 case V2DImode:
8977 if (TARGET_SSE2)
8978 return 2;
8979 case V32QImode:
8980 case V16HImode:
8981 case V8SImode:
8982 case V4DImode:
8983 if (TARGET_AVX2)
8984 return 2;
8985 case V64QImode:
8986 case V32HImode:
8987 case V16SImode:
8988 case V8DImode:
8989 if (TARGET_AVX512F)
8990 return 2;
8991 default:
8992 break;
8993 }
8994
8995 return 0;
8996 }
8997
8998 /* Return the opcode of the special instruction to be used to load
8999 the constant X. */
9000
9001 const char *
9002 standard_sse_constant_opcode (rtx_insn *insn, rtx x)
9003 {
9004 switch (standard_sse_constant_p (x))
9005 {
9006 case 1:
9007 switch (get_attr_mode (insn))
9008 {
9009 case MODE_XI:
9010 return "vpxord\t%g0, %g0, %g0";
9011 case MODE_V16SF:
9012 return TARGET_AVX512DQ ? "vxorps\t%g0, %g0, %g0"
9013 : "vpxord\t%g0, %g0, %g0";
9014 case MODE_V8DF:
9015 return TARGET_AVX512DQ ? "vxorpd\t%g0, %g0, %g0"
9016 : "vpxorq\t%g0, %g0, %g0";
9017 case MODE_TI:
9018 return TARGET_AVX512VL ? "vpxord\t%t0, %t0, %t0"
9019 : "%vpxor\t%0, %d0";
9020 case MODE_V2DF:
9021 return "%vxorpd\t%0, %d0";
9022 case MODE_V4SF:
9023 return "%vxorps\t%0, %d0";
9024
9025 case MODE_OI:
9026 return TARGET_AVX512VL ? "vpxord\t%x0, %x0, %x0"
9027 : "vpxor\t%x0, %x0, %x0";
9028 case MODE_V4DF:
9029 return "vxorpd\t%x0, %x0, %x0";
9030 case MODE_V8SF:
9031 return "vxorps\t%x0, %x0, %x0";
9032
9033 default:
9034 break;
9035 }
9036
9037 case 2:
9038 if (TARGET_AVX512VL
9039 || get_attr_mode (insn) == MODE_XI
9040 || get_attr_mode (insn) == MODE_V8DF
9041 || get_attr_mode (insn) == MODE_V16SF)
9042 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
9043 if (TARGET_AVX)
9044 return "vpcmpeqd\t%0, %0, %0";
9045 else
9046 return "pcmpeqd\t%0, %0";
9047
9048 default:
9049 break;
9050 }
9051 gcc_unreachable ();
9052 }
9053
9054 /* Returns true if OP contains a symbol reference */
9055
9056 bool
9057 symbolic_reference_mentioned_p (rtx op)
9058 {
9059 const char *fmt;
9060 int i;
9061
9062 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
9063 return true;
9064
9065 fmt = GET_RTX_FORMAT (GET_CODE (op));
9066 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
9067 {
9068 if (fmt[i] == 'E')
9069 {
9070 int j;
9071
9072 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
9073 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
9074 return true;
9075 }
9076
9077 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
9078 return true;
9079 }
9080
9081 return false;
9082 }
9083
9084 /* Return true if it is appropriate to emit `ret' instructions in the
9085 body of a function. Do this only if the epilogue is simple, needing a
9086 couple of insns. Prior to reloading, we can't tell how many registers
9087 must be saved, so return false then. Return false if there is no frame
9088 marker to de-allocate. */
9089
9090 bool
9091 ix86_can_use_return_insn_p (void)
9092 {
9093 struct ix86_frame frame;
9094
9095 if (! reload_completed || frame_pointer_needed)
9096 return 0;
9097
9098 /* Don't allow more than 32k pop, since that's all we can do
9099 with one instruction. */
9100 if (crtl->args.pops_args && crtl->args.size >= 32768)
9101 return 0;
9102
9103 ix86_compute_frame_layout (&frame);
9104 return (frame.stack_pointer_offset == UNITS_PER_WORD
9105 && (frame.nregs + frame.nsseregs) == 0);
9106 }
9107 \f
9108 /* Value should be nonzero if functions must have frame pointers.
9109 Zero means the frame pointer need not be set up (and parms may
9110 be accessed via the stack pointer) in functions that seem suitable. */
9111
9112 static bool
9113 ix86_frame_pointer_required (void)
9114 {
9115 /* If we accessed previous frames, then the generated code expects
9116 to be able to access the saved ebp value in our frame. */
9117 if (cfun->machine->accesses_prev_frame)
9118 return true;
9119
9120 /* Several x86 os'es need a frame pointer for other reasons,
9121 usually pertaining to setjmp. */
9122 if (SUBTARGET_FRAME_POINTER_REQUIRED)
9123 return true;
9124
9125 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
9126 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
9127 return true;
9128
9129 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
9130 allocation is 4GB. */
9131 if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
9132 return true;
9133
9134 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
9135 turns off the frame pointer by default. Turn it back on now if
9136 we've not got a leaf function. */
9137 if (TARGET_OMIT_LEAF_FRAME_POINTER
9138 && (!crtl->is_leaf
9139 || ix86_current_function_calls_tls_descriptor))
9140 return true;
9141
9142 if (crtl->profile && !flag_fentry)
9143 return true;
9144
9145 return false;
9146 }
9147
9148 /* Record that the current function accesses previous call frames. */
9149
9150 void
9151 ix86_setup_frame_addresses (void)
9152 {
9153 cfun->machine->accesses_prev_frame = 1;
9154 }
9155 \f
9156 #ifndef USE_HIDDEN_LINKONCE
9157 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
9158 # define USE_HIDDEN_LINKONCE 1
9159 # else
9160 # define USE_HIDDEN_LINKONCE 0
9161 # endif
9162 #endif
9163
9164 static int pic_labels_used;
9165
9166 /* Fills in the label name that should be used for a pc thunk for
9167 the given register. */
9168
9169 static void
9170 get_pc_thunk_name (char name[32], unsigned int regno)
9171 {
9172 gcc_assert (!TARGET_64BIT);
9173
9174 if (USE_HIDDEN_LINKONCE)
9175 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
9176 else
9177 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
9178 }
9179
9180
9181 /* This function generates code for -fpic that loads %ebx with
9182 the return address of the caller and then returns. */
9183
9184 static void
9185 ix86_code_end (void)
9186 {
9187 rtx xops[2];
9188 int regno;
9189
9190 for (regno = AX_REG; regno <= SP_REG; regno++)
9191 {
9192 char name[32];
9193 tree decl;
9194
9195 if (!(pic_labels_used & (1 << regno)))
9196 continue;
9197
9198 get_pc_thunk_name (name, regno);
9199
9200 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
9201 get_identifier (name),
9202 build_function_type_list (void_type_node, NULL_TREE));
9203 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
9204 NULL_TREE, void_type_node);
9205 TREE_PUBLIC (decl) = 1;
9206 TREE_STATIC (decl) = 1;
9207 DECL_IGNORED_P (decl) = 1;
9208
9209 #if TARGET_MACHO
9210 if (TARGET_MACHO)
9211 {
9212 switch_to_section (darwin_sections[text_coal_section]);
9213 fputs ("\t.weak_definition\t", asm_out_file);
9214 assemble_name (asm_out_file, name);
9215 fputs ("\n\t.private_extern\t", asm_out_file);
9216 assemble_name (asm_out_file, name);
9217 putc ('\n', asm_out_file);
9218 ASM_OUTPUT_LABEL (asm_out_file, name);
9219 DECL_WEAK (decl) = 1;
9220 }
9221 else
9222 #endif
9223 if (USE_HIDDEN_LINKONCE)
9224 {
9225 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
9226
9227 targetm.asm_out.unique_section (decl, 0);
9228 switch_to_section (get_named_section (decl, NULL, 0));
9229
9230 targetm.asm_out.globalize_label (asm_out_file, name);
9231 fputs ("\t.hidden\t", asm_out_file);
9232 assemble_name (asm_out_file, name);
9233 putc ('\n', asm_out_file);
9234 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
9235 }
9236 else
9237 {
9238 switch_to_section (text_section);
9239 ASM_OUTPUT_LABEL (asm_out_file, name);
9240 }
9241
9242 DECL_INITIAL (decl) = make_node (BLOCK);
9243 current_function_decl = decl;
9244 init_function_start (decl);
9245 first_function_block_is_cold = false;
9246 /* Make sure unwind info is emitted for the thunk if needed. */
9247 final_start_function (emit_barrier (), asm_out_file, 1);
9248
9249 /* Pad stack IP move with 4 instructions (two NOPs count
9250 as one instruction). */
9251 if (TARGET_PAD_SHORT_FUNCTION)
9252 {
9253 int i = 8;
9254
9255 while (i--)
9256 fputs ("\tnop\n", asm_out_file);
9257 }
9258
9259 xops[0] = gen_rtx_REG (Pmode, regno);
9260 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
9261 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
9262 fputs ("\tret\n", asm_out_file);
9263 final_end_function ();
9264 init_insn_lengths ();
9265 free_after_compilation (cfun);
9266 set_cfun (NULL);
9267 current_function_decl = NULL;
9268 }
9269
9270 if (flag_split_stack)
9271 file_end_indicate_split_stack ();
9272 }
9273
9274 /* Emit code for the SET_GOT patterns. */
9275
9276 const char *
9277 output_set_got (rtx dest, rtx label)
9278 {
9279 rtx xops[3];
9280
9281 xops[0] = dest;
9282
9283 if (TARGET_VXWORKS_RTP && flag_pic)
9284 {
9285 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
9286 xops[2] = gen_rtx_MEM (Pmode,
9287 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
9288 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
9289
9290 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
9291 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
9292 an unadorned address. */
9293 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
9294 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
9295 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
9296 return "";
9297 }
9298
9299 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
9300
9301 if (!flag_pic)
9302 {
9303 if (TARGET_MACHO)
9304 /* We don't need a pic base, we're not producing pic. */
9305 gcc_unreachable ();
9306
9307 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
9308 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
9309 targetm.asm_out.internal_label (asm_out_file, "L",
9310 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
9311 }
9312 else
9313 {
9314 char name[32];
9315 get_pc_thunk_name (name, REGNO (dest));
9316 pic_labels_used |= 1 << REGNO (dest);
9317
9318 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
9319 xops[2] = gen_rtx_MEM (QImode, xops[2]);
9320 output_asm_insn ("call\t%X2", xops);
9321
9322 #if TARGET_MACHO
9323 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
9324 This is what will be referenced by the Mach-O PIC subsystem. */
9325 if (machopic_should_output_picbase_label () || !label)
9326 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
9327
9328 /* When we are restoring the pic base at the site of a nonlocal label,
9329 and we decided to emit the pic base above, we will still output a
9330 local label used for calculating the correction offset (even though
9331 the offset will be 0 in that case). */
9332 if (label)
9333 targetm.asm_out.internal_label (asm_out_file, "L",
9334 CODE_LABEL_NUMBER (label));
9335 #endif
9336 }
9337
9338 if (!TARGET_MACHO)
9339 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
9340
9341 return "";
9342 }
9343
9344 /* Generate an "push" pattern for input ARG. */
9345
9346 static rtx
9347 gen_push (rtx arg)
9348 {
9349 struct machine_function *m = cfun->machine;
9350
9351 if (m->fs.cfa_reg == stack_pointer_rtx)
9352 m->fs.cfa_offset += UNITS_PER_WORD;
9353 m->fs.sp_offset += UNITS_PER_WORD;
9354
9355 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9356 arg = gen_rtx_REG (word_mode, REGNO (arg));
9357
9358 return gen_rtx_SET (VOIDmode,
9359 gen_rtx_MEM (word_mode,
9360 gen_rtx_PRE_DEC (Pmode,
9361 stack_pointer_rtx)),
9362 arg);
9363 }
9364
9365 /* Generate an "pop" pattern for input ARG. */
9366
9367 static rtx
9368 gen_pop (rtx arg)
9369 {
9370 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9371 arg = gen_rtx_REG (word_mode, REGNO (arg));
9372
9373 return gen_rtx_SET (VOIDmode,
9374 arg,
9375 gen_rtx_MEM (word_mode,
9376 gen_rtx_POST_INC (Pmode,
9377 stack_pointer_rtx)));
9378 }
9379
9380 /* Return >= 0 if there is an unused call-clobbered register available
9381 for the entire function. */
9382
9383 static unsigned int
9384 ix86_select_alt_pic_regnum (void)
9385 {
9386 if (crtl->is_leaf
9387 && !crtl->profile
9388 && !ix86_current_function_calls_tls_descriptor)
9389 {
9390 int i, drap;
9391 /* Can't use the same register for both PIC and DRAP. */
9392 if (crtl->drap_reg)
9393 drap = REGNO (crtl->drap_reg);
9394 else
9395 drap = -1;
9396 for (i = 2; i >= 0; --i)
9397 if (i != drap && !df_regs_ever_live_p (i))
9398 return i;
9399 }
9400
9401 return INVALID_REGNUM;
9402 }
9403
9404 /* Return TRUE if we need to save REGNO. */
9405
9406 static bool
9407 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
9408 {
9409 if (pic_offset_table_rtx
9410 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
9411 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
9412 || crtl->profile
9413 || crtl->calls_eh_return
9414 || crtl->uses_const_pool
9415 || cfun->has_nonlocal_label))
9416 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
9417
9418 if (crtl->calls_eh_return && maybe_eh_return)
9419 {
9420 unsigned i;
9421 for (i = 0; ; i++)
9422 {
9423 unsigned test = EH_RETURN_DATA_REGNO (i);
9424 if (test == INVALID_REGNUM)
9425 break;
9426 if (test == regno)
9427 return true;
9428 }
9429 }
9430
9431 if (crtl->drap_reg
9432 && regno == REGNO (crtl->drap_reg)
9433 && !cfun->machine->no_drap_save_restore)
9434 return true;
9435
9436 return (df_regs_ever_live_p (regno)
9437 && !call_used_regs[regno]
9438 && !fixed_regs[regno]
9439 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
9440 }
9441
9442 /* Return number of saved general prupose registers. */
9443
9444 static int
9445 ix86_nsaved_regs (void)
9446 {
9447 int nregs = 0;
9448 int regno;
9449
9450 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9451 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9452 nregs ++;
9453 return nregs;
9454 }
9455
9456 /* Return number of saved SSE registrers. */
9457
9458 static int
9459 ix86_nsaved_sseregs (void)
9460 {
9461 int nregs = 0;
9462 int regno;
9463
9464 if (!TARGET_64BIT_MS_ABI)
9465 return 0;
9466 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9467 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9468 nregs ++;
9469 return nregs;
9470 }
9471
9472 /* Given FROM and TO register numbers, say whether this elimination is
9473 allowed. If stack alignment is needed, we can only replace argument
9474 pointer with hard frame pointer, or replace frame pointer with stack
9475 pointer. Otherwise, frame pointer elimination is automatically
9476 handled and all other eliminations are valid. */
9477
9478 static bool
9479 ix86_can_eliminate (const int from, const int to)
9480 {
9481 if (stack_realign_fp)
9482 return ((from == ARG_POINTER_REGNUM
9483 && to == HARD_FRAME_POINTER_REGNUM)
9484 || (from == FRAME_POINTER_REGNUM
9485 && to == STACK_POINTER_REGNUM));
9486 else
9487 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
9488 }
9489
9490 /* Return the offset between two registers, one to be eliminated, and the other
9491 its replacement, at the start of a routine. */
9492
9493 HOST_WIDE_INT
9494 ix86_initial_elimination_offset (int from, int to)
9495 {
9496 struct ix86_frame frame;
9497 ix86_compute_frame_layout (&frame);
9498
9499 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9500 return frame.hard_frame_pointer_offset;
9501 else if (from == FRAME_POINTER_REGNUM
9502 && to == HARD_FRAME_POINTER_REGNUM)
9503 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
9504 else
9505 {
9506 gcc_assert (to == STACK_POINTER_REGNUM);
9507
9508 if (from == ARG_POINTER_REGNUM)
9509 return frame.stack_pointer_offset;
9510
9511 gcc_assert (from == FRAME_POINTER_REGNUM);
9512 return frame.stack_pointer_offset - frame.frame_pointer_offset;
9513 }
9514 }
9515
9516 /* In a dynamically-aligned function, we can't know the offset from
9517 stack pointer to frame pointer, so we must ensure that setjmp
9518 eliminates fp against the hard fp (%ebp) rather than trying to
9519 index from %esp up to the top of the frame across a gap that is
9520 of unknown (at compile-time) size. */
9521 static rtx
9522 ix86_builtin_setjmp_frame_value (void)
9523 {
9524 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
9525 }
9526
9527 /* When using -fsplit-stack, the allocation routines set a field in
9528 the TCB to the bottom of the stack plus this much space, measured
9529 in bytes. */
9530
9531 #define SPLIT_STACK_AVAILABLE 256
9532
9533 /* Fill structure ix86_frame about frame of currently computed function. */
9534
9535 static void
9536 ix86_compute_frame_layout (struct ix86_frame *frame)
9537 {
9538 unsigned HOST_WIDE_INT stack_alignment_needed;
9539 HOST_WIDE_INT offset;
9540 unsigned HOST_WIDE_INT preferred_alignment;
9541 HOST_WIDE_INT size = get_frame_size ();
9542 HOST_WIDE_INT to_allocate;
9543
9544 frame->nregs = ix86_nsaved_regs ();
9545 frame->nsseregs = ix86_nsaved_sseregs ();
9546
9547 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
9548 function prologues and leaf. */
9549 if ((TARGET_64BIT_MS_ABI && crtl->preferred_stack_boundary < 128)
9550 && (!crtl->is_leaf || cfun->calls_alloca != 0
9551 || ix86_current_function_calls_tls_descriptor))
9552 {
9553 crtl->preferred_stack_boundary = 128;
9554 crtl->stack_alignment_needed = 128;
9555 }
9556 /* preferred_stack_boundary is never updated for call
9557 expanded from tls descriptor. Update it here. We don't update it in
9558 expand stage because according to the comments before
9559 ix86_current_function_calls_tls_descriptor, tls calls may be optimized
9560 away. */
9561 else if (ix86_current_function_calls_tls_descriptor
9562 && crtl->preferred_stack_boundary < PREFERRED_STACK_BOUNDARY)
9563 {
9564 crtl->preferred_stack_boundary = PREFERRED_STACK_BOUNDARY;
9565 if (crtl->stack_alignment_needed < PREFERRED_STACK_BOUNDARY)
9566 crtl->stack_alignment_needed = PREFERRED_STACK_BOUNDARY;
9567 }
9568
9569 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
9570 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
9571
9572 gcc_assert (!size || stack_alignment_needed);
9573 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
9574 gcc_assert (preferred_alignment <= stack_alignment_needed);
9575
9576 /* For SEH we have to limit the amount of code movement into the prologue.
9577 At present we do this via a BLOCKAGE, at which point there's very little
9578 scheduling that can be done, which means that there's very little point
9579 in doing anything except PUSHs. */
9580 if (TARGET_SEH)
9581 cfun->machine->use_fast_prologue_epilogue = false;
9582
9583 /* During reload iteration the amount of registers saved can change.
9584 Recompute the value as needed. Do not recompute when amount of registers
9585 didn't change as reload does multiple calls to the function and does not
9586 expect the decision to change within single iteration. */
9587 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun))
9588 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
9589 {
9590 int count = frame->nregs;
9591 struct cgraph_node *node = cgraph_node::get (current_function_decl);
9592
9593 cfun->machine->use_fast_prologue_epilogue_nregs = count;
9594
9595 /* The fast prologue uses move instead of push to save registers. This
9596 is significantly longer, but also executes faster as modern hardware
9597 can execute the moves in parallel, but can't do that for push/pop.
9598
9599 Be careful about choosing what prologue to emit: When function takes
9600 many instructions to execute we may use slow version as well as in
9601 case function is known to be outside hot spot (this is known with
9602 feedback only). Weight the size of function by number of registers
9603 to save as it is cheap to use one or two push instructions but very
9604 slow to use many of them. */
9605 if (count)
9606 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
9607 if (node->frequency < NODE_FREQUENCY_NORMAL
9608 || (flag_branch_probabilities
9609 && node->frequency < NODE_FREQUENCY_HOT))
9610 cfun->machine->use_fast_prologue_epilogue = false;
9611 else
9612 cfun->machine->use_fast_prologue_epilogue
9613 = !expensive_function_p (count);
9614 }
9615
9616 frame->save_regs_using_mov
9617 = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
9618 /* If static stack checking is enabled and done with probes,
9619 the registers need to be saved before allocating the frame. */
9620 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
9621
9622 /* Skip return address. */
9623 offset = UNITS_PER_WORD;
9624
9625 /* Skip pushed static chain. */
9626 if (ix86_static_chain_on_stack)
9627 offset += UNITS_PER_WORD;
9628
9629 /* Skip saved base pointer. */
9630 if (frame_pointer_needed)
9631 offset += UNITS_PER_WORD;
9632 frame->hfp_save_offset = offset;
9633
9634 /* The traditional frame pointer location is at the top of the frame. */
9635 frame->hard_frame_pointer_offset = offset;
9636
9637 /* Register save area */
9638 offset += frame->nregs * UNITS_PER_WORD;
9639 frame->reg_save_offset = offset;
9640
9641 /* On SEH target, registers are pushed just before the frame pointer
9642 location. */
9643 if (TARGET_SEH)
9644 frame->hard_frame_pointer_offset = offset;
9645
9646 /* Align and set SSE register save area. */
9647 if (frame->nsseregs)
9648 {
9649 /* The only ABI that has saved SSE registers (Win64) also has a
9650 16-byte aligned default stack, and thus we don't need to be
9651 within the re-aligned local stack frame to save them. */
9652 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
9653 offset = (offset + 16 - 1) & -16;
9654 offset += frame->nsseregs * 16;
9655 }
9656 frame->sse_reg_save_offset = offset;
9657
9658 /* The re-aligned stack starts here. Values before this point are not
9659 directly comparable with values below this point. In order to make
9660 sure that no value happens to be the same before and after, force
9661 the alignment computation below to add a non-zero value. */
9662 if (stack_realign_fp)
9663 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
9664
9665 /* Va-arg area */
9666 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
9667 offset += frame->va_arg_size;
9668
9669 /* Align start of frame for local function. */
9670 if (stack_realign_fp
9671 || offset != frame->sse_reg_save_offset
9672 || size != 0
9673 || !crtl->is_leaf
9674 || cfun->calls_alloca
9675 || ix86_current_function_calls_tls_descriptor)
9676 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
9677
9678 /* Frame pointer points here. */
9679 frame->frame_pointer_offset = offset;
9680
9681 offset += size;
9682
9683 /* Add outgoing arguments area. Can be skipped if we eliminated
9684 all the function calls as dead code.
9685 Skipping is however impossible when function calls alloca. Alloca
9686 expander assumes that last crtl->outgoing_args_size
9687 of stack frame are unused. */
9688 if (ACCUMULATE_OUTGOING_ARGS
9689 && (!crtl->is_leaf || cfun->calls_alloca
9690 || ix86_current_function_calls_tls_descriptor))
9691 {
9692 offset += crtl->outgoing_args_size;
9693 frame->outgoing_arguments_size = crtl->outgoing_args_size;
9694 }
9695 else
9696 frame->outgoing_arguments_size = 0;
9697
9698 /* Align stack boundary. Only needed if we're calling another function
9699 or using alloca. */
9700 if (!crtl->is_leaf || cfun->calls_alloca
9701 || ix86_current_function_calls_tls_descriptor)
9702 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
9703
9704 /* We've reached end of stack frame. */
9705 frame->stack_pointer_offset = offset;
9706
9707 /* Size prologue needs to allocate. */
9708 to_allocate = offset - frame->sse_reg_save_offset;
9709
9710 if ((!to_allocate && frame->nregs <= 1)
9711 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
9712 frame->save_regs_using_mov = false;
9713
9714 if (ix86_using_red_zone ()
9715 && crtl->sp_is_unchanging
9716 && crtl->is_leaf
9717 && !ix86_current_function_calls_tls_descriptor)
9718 {
9719 frame->red_zone_size = to_allocate;
9720 if (frame->save_regs_using_mov)
9721 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
9722 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
9723 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
9724 }
9725 else
9726 frame->red_zone_size = 0;
9727 frame->stack_pointer_offset -= frame->red_zone_size;
9728
9729 /* The SEH frame pointer location is near the bottom of the frame.
9730 This is enforced by the fact that the difference between the
9731 stack pointer and the frame pointer is limited to 240 bytes in
9732 the unwind data structure. */
9733 if (TARGET_SEH)
9734 {
9735 HOST_WIDE_INT diff;
9736
9737 /* If we can leave the frame pointer where it is, do so. Also, returns
9738 the establisher frame for __builtin_frame_address (0). */
9739 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
9740 if (diff <= SEH_MAX_FRAME_SIZE
9741 && (diff > 240 || (diff & 15) != 0)
9742 && !crtl->accesses_prior_frames)
9743 {
9744 /* Ideally we'd determine what portion of the local stack frame
9745 (within the constraint of the lowest 240) is most heavily used.
9746 But without that complication, simply bias the frame pointer
9747 by 128 bytes so as to maximize the amount of the local stack
9748 frame that is addressable with 8-bit offsets. */
9749 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
9750 }
9751 }
9752 }
9753
9754 /* This is semi-inlined memory_address_length, but simplified
9755 since we know that we're always dealing with reg+offset, and
9756 to avoid having to create and discard all that rtl. */
9757
9758 static inline int
9759 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
9760 {
9761 int len = 4;
9762
9763 if (offset == 0)
9764 {
9765 /* EBP and R13 cannot be encoded without an offset. */
9766 len = (regno == BP_REG || regno == R13_REG);
9767 }
9768 else if (IN_RANGE (offset, -128, 127))
9769 len = 1;
9770
9771 /* ESP and R12 must be encoded with a SIB byte. */
9772 if (regno == SP_REG || regno == R12_REG)
9773 len++;
9774
9775 return len;
9776 }
9777
9778 /* Return an RTX that points to CFA_OFFSET within the stack frame.
9779 The valid base registers are taken from CFUN->MACHINE->FS. */
9780
9781 static rtx
9782 choose_baseaddr (HOST_WIDE_INT cfa_offset)
9783 {
9784 const struct machine_function *m = cfun->machine;
9785 rtx base_reg = NULL;
9786 HOST_WIDE_INT base_offset = 0;
9787
9788 if (m->use_fast_prologue_epilogue)
9789 {
9790 /* Choose the base register most likely to allow the most scheduling
9791 opportunities. Generally FP is valid throughout the function,
9792 while DRAP must be reloaded within the epilogue. But choose either
9793 over the SP due to increased encoding size. */
9794
9795 if (m->fs.fp_valid)
9796 {
9797 base_reg = hard_frame_pointer_rtx;
9798 base_offset = m->fs.fp_offset - cfa_offset;
9799 }
9800 else if (m->fs.drap_valid)
9801 {
9802 base_reg = crtl->drap_reg;
9803 base_offset = 0 - cfa_offset;
9804 }
9805 else if (m->fs.sp_valid)
9806 {
9807 base_reg = stack_pointer_rtx;
9808 base_offset = m->fs.sp_offset - cfa_offset;
9809 }
9810 }
9811 else
9812 {
9813 HOST_WIDE_INT toffset;
9814 int len = 16, tlen;
9815
9816 /* Choose the base register with the smallest address encoding.
9817 With a tie, choose FP > DRAP > SP. */
9818 if (m->fs.sp_valid)
9819 {
9820 base_reg = stack_pointer_rtx;
9821 base_offset = m->fs.sp_offset - cfa_offset;
9822 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
9823 }
9824 if (m->fs.drap_valid)
9825 {
9826 toffset = 0 - cfa_offset;
9827 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
9828 if (tlen <= len)
9829 {
9830 base_reg = crtl->drap_reg;
9831 base_offset = toffset;
9832 len = tlen;
9833 }
9834 }
9835 if (m->fs.fp_valid)
9836 {
9837 toffset = m->fs.fp_offset - cfa_offset;
9838 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
9839 if (tlen <= len)
9840 {
9841 base_reg = hard_frame_pointer_rtx;
9842 base_offset = toffset;
9843 len = tlen;
9844 }
9845 }
9846 }
9847 gcc_assert (base_reg != NULL);
9848
9849 return plus_constant (Pmode, base_reg, base_offset);
9850 }
9851
9852 /* Emit code to save registers in the prologue. */
9853
9854 static void
9855 ix86_emit_save_regs (void)
9856 {
9857 unsigned int regno;
9858 rtx insn;
9859
9860 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
9861 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9862 {
9863 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
9864 RTX_FRAME_RELATED_P (insn) = 1;
9865 }
9866 }
9867
9868 /* Emit a single register save at CFA - CFA_OFFSET. */
9869
9870 static void
9871 ix86_emit_save_reg_using_mov (enum machine_mode mode, unsigned int regno,
9872 HOST_WIDE_INT cfa_offset)
9873 {
9874 struct machine_function *m = cfun->machine;
9875 rtx reg = gen_rtx_REG (mode, regno);
9876 rtx mem, addr, base, insn;
9877
9878 addr = choose_baseaddr (cfa_offset);
9879 mem = gen_frame_mem (mode, addr);
9880
9881 /* For SSE saves, we need to indicate the 128-bit alignment. */
9882 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
9883
9884 insn = emit_move_insn (mem, reg);
9885 RTX_FRAME_RELATED_P (insn) = 1;
9886
9887 base = addr;
9888 if (GET_CODE (base) == PLUS)
9889 base = XEXP (base, 0);
9890 gcc_checking_assert (REG_P (base));
9891
9892 /* When saving registers into a re-aligned local stack frame, avoid
9893 any tricky guessing by dwarf2out. */
9894 if (m->fs.realigned)
9895 {
9896 gcc_checking_assert (stack_realign_drap);
9897
9898 if (regno == REGNO (crtl->drap_reg))
9899 {
9900 /* A bit of a hack. We force the DRAP register to be saved in
9901 the re-aligned stack frame, which provides us with a copy
9902 of the CFA that will last past the prologue. Install it. */
9903 gcc_checking_assert (cfun->machine->fs.fp_valid);
9904 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
9905 cfun->machine->fs.fp_offset - cfa_offset);
9906 mem = gen_rtx_MEM (mode, addr);
9907 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
9908 }
9909 else
9910 {
9911 /* The frame pointer is a stable reference within the
9912 aligned frame. Use it. */
9913 gcc_checking_assert (cfun->machine->fs.fp_valid);
9914 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
9915 cfun->machine->fs.fp_offset - cfa_offset);
9916 mem = gen_rtx_MEM (mode, addr);
9917 add_reg_note (insn, REG_CFA_EXPRESSION,
9918 gen_rtx_SET (VOIDmode, mem, reg));
9919 }
9920 }
9921
9922 /* The memory may not be relative to the current CFA register,
9923 which means that we may need to generate a new pattern for
9924 use by the unwind info. */
9925 else if (base != m->fs.cfa_reg)
9926 {
9927 addr = plus_constant (Pmode, m->fs.cfa_reg,
9928 m->fs.cfa_offset - cfa_offset);
9929 mem = gen_rtx_MEM (mode, addr);
9930 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
9931 }
9932 }
9933
9934 /* Emit code to save registers using MOV insns.
9935 First register is stored at CFA - CFA_OFFSET. */
9936 static void
9937 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
9938 {
9939 unsigned int regno;
9940
9941 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9942 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9943 {
9944 ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
9945 cfa_offset -= UNITS_PER_WORD;
9946 }
9947 }
9948
9949 /* Emit code to save SSE registers using MOV insns.
9950 First register is stored at CFA - CFA_OFFSET. */
9951 static void
9952 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
9953 {
9954 unsigned int regno;
9955
9956 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9957 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9958 {
9959 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
9960 cfa_offset -= 16;
9961 }
9962 }
9963
9964 static GTY(()) rtx queued_cfa_restores;
9965
9966 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9967 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9968 Don't add the note if the previously saved value will be left untouched
9969 within stack red-zone till return, as unwinders can find the same value
9970 in the register and on the stack. */
9971
9972 static void
9973 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
9974 {
9975 if (!crtl->shrink_wrapped
9976 && cfa_offset <= cfun->machine->fs.red_zone_offset)
9977 return;
9978
9979 if (insn)
9980 {
9981 add_reg_note (insn, REG_CFA_RESTORE, reg);
9982 RTX_FRAME_RELATED_P (insn) = 1;
9983 }
9984 else
9985 queued_cfa_restores
9986 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
9987 }
9988
9989 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9990
9991 static void
9992 ix86_add_queued_cfa_restore_notes (rtx insn)
9993 {
9994 rtx last;
9995 if (!queued_cfa_restores)
9996 return;
9997 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
9998 ;
9999 XEXP (last, 1) = REG_NOTES (insn);
10000 REG_NOTES (insn) = queued_cfa_restores;
10001 queued_cfa_restores = NULL_RTX;
10002 RTX_FRAME_RELATED_P (insn) = 1;
10003 }
10004
10005 /* Expand prologue or epilogue stack adjustment.
10006 The pattern exist to put a dependency on all ebp-based memory accesses.
10007 STYLE should be negative if instructions should be marked as frame related,
10008 zero if %r11 register is live and cannot be freely used and positive
10009 otherwise. */
10010
10011 static void
10012 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
10013 int style, bool set_cfa)
10014 {
10015 struct machine_function *m = cfun->machine;
10016 rtx insn;
10017 bool add_frame_related_expr = false;
10018
10019 if (Pmode == SImode)
10020 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
10021 else if (x86_64_immediate_operand (offset, DImode))
10022 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
10023 else
10024 {
10025 rtx tmp;
10026 /* r11 is used by indirect sibcall return as well, set before the
10027 epilogue and used after the epilogue. */
10028 if (style)
10029 tmp = gen_rtx_REG (DImode, R11_REG);
10030 else
10031 {
10032 gcc_assert (src != hard_frame_pointer_rtx
10033 && dest != hard_frame_pointer_rtx);
10034 tmp = hard_frame_pointer_rtx;
10035 }
10036 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
10037 if (style < 0)
10038 add_frame_related_expr = true;
10039
10040 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
10041 }
10042
10043 insn = emit_insn (insn);
10044 if (style >= 0)
10045 ix86_add_queued_cfa_restore_notes (insn);
10046
10047 if (set_cfa)
10048 {
10049 rtx r;
10050
10051 gcc_assert (m->fs.cfa_reg == src);
10052 m->fs.cfa_offset += INTVAL (offset);
10053 m->fs.cfa_reg = dest;
10054
10055 r = gen_rtx_PLUS (Pmode, src, offset);
10056 r = gen_rtx_SET (VOIDmode, dest, r);
10057 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
10058 RTX_FRAME_RELATED_P (insn) = 1;
10059 }
10060 else if (style < 0)
10061 {
10062 RTX_FRAME_RELATED_P (insn) = 1;
10063 if (add_frame_related_expr)
10064 {
10065 rtx r = gen_rtx_PLUS (Pmode, src, offset);
10066 r = gen_rtx_SET (VOIDmode, dest, r);
10067 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
10068 }
10069 }
10070
10071 if (dest == stack_pointer_rtx)
10072 {
10073 HOST_WIDE_INT ooffset = m->fs.sp_offset;
10074 bool valid = m->fs.sp_valid;
10075
10076 if (src == hard_frame_pointer_rtx)
10077 {
10078 valid = m->fs.fp_valid;
10079 ooffset = m->fs.fp_offset;
10080 }
10081 else if (src == crtl->drap_reg)
10082 {
10083 valid = m->fs.drap_valid;
10084 ooffset = 0;
10085 }
10086 else
10087 {
10088 /* Else there are two possibilities: SP itself, which we set
10089 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
10090 taken care of this by hand along the eh_return path. */
10091 gcc_checking_assert (src == stack_pointer_rtx
10092 || offset == const0_rtx);
10093 }
10094
10095 m->fs.sp_offset = ooffset - INTVAL (offset);
10096 m->fs.sp_valid = valid;
10097 }
10098 }
10099
10100 /* Find an available register to be used as dynamic realign argument
10101 pointer regsiter. Such a register will be written in prologue and
10102 used in begin of body, so it must not be
10103 1. parameter passing register.
10104 2. GOT pointer.
10105 We reuse static-chain register if it is available. Otherwise, we
10106 use DI for i386 and R13 for x86-64. We chose R13 since it has
10107 shorter encoding.
10108
10109 Return: the regno of chosen register. */
10110
10111 static unsigned int
10112 find_drap_reg (void)
10113 {
10114 tree decl = cfun->decl;
10115
10116 if (TARGET_64BIT)
10117 {
10118 /* Use R13 for nested function or function need static chain.
10119 Since function with tail call may use any caller-saved
10120 registers in epilogue, DRAP must not use caller-saved
10121 register in such case. */
10122 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10123 return R13_REG;
10124
10125 return R10_REG;
10126 }
10127 else
10128 {
10129 /* Use DI for nested function or function need static chain.
10130 Since function with tail call may use any caller-saved
10131 registers in epilogue, DRAP must not use caller-saved
10132 register in such case. */
10133 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10134 return DI_REG;
10135
10136 /* Reuse static chain register if it isn't used for parameter
10137 passing. */
10138 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
10139 {
10140 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
10141 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
10142 return CX_REG;
10143 }
10144 return DI_REG;
10145 }
10146 }
10147
10148 /* Return minimum incoming stack alignment. */
10149
10150 static unsigned int
10151 ix86_minimum_incoming_stack_boundary (bool sibcall)
10152 {
10153 unsigned int incoming_stack_boundary;
10154
10155 /* Prefer the one specified at command line. */
10156 if (ix86_user_incoming_stack_boundary)
10157 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
10158 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
10159 if -mstackrealign is used, it isn't used for sibcall check and
10160 estimated stack alignment is 128bit. */
10161 else if (!sibcall
10162 && !TARGET_64BIT
10163 && ix86_force_align_arg_pointer
10164 && crtl->stack_alignment_estimated == 128)
10165 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10166 else
10167 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
10168
10169 /* Incoming stack alignment can be changed on individual functions
10170 via force_align_arg_pointer attribute. We use the smallest
10171 incoming stack boundary. */
10172 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
10173 && lookup_attribute (ix86_force_align_arg_pointer_string,
10174 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
10175 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10176
10177 /* The incoming stack frame has to be aligned at least at
10178 parm_stack_boundary. */
10179 if (incoming_stack_boundary < crtl->parm_stack_boundary)
10180 incoming_stack_boundary = crtl->parm_stack_boundary;
10181
10182 /* Stack at entrance of main is aligned by runtime. We use the
10183 smallest incoming stack boundary. */
10184 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
10185 && DECL_NAME (current_function_decl)
10186 && MAIN_NAME_P (DECL_NAME (current_function_decl))
10187 && DECL_FILE_SCOPE_P (current_function_decl))
10188 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
10189
10190 return incoming_stack_boundary;
10191 }
10192
10193 /* Update incoming stack boundary and estimated stack alignment. */
10194
10195 static void
10196 ix86_update_stack_boundary (void)
10197 {
10198 ix86_incoming_stack_boundary
10199 = ix86_minimum_incoming_stack_boundary (false);
10200
10201 /* x86_64 vararg needs 16byte stack alignment for register save
10202 area. */
10203 if (TARGET_64BIT
10204 && cfun->stdarg
10205 && crtl->stack_alignment_estimated < 128)
10206 crtl->stack_alignment_estimated = 128;
10207 }
10208
10209 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
10210 needed or an rtx for DRAP otherwise. */
10211
10212 static rtx
10213 ix86_get_drap_rtx (void)
10214 {
10215 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
10216 crtl->need_drap = true;
10217
10218 if (stack_realign_drap)
10219 {
10220 /* Assign DRAP to vDRAP and returns vDRAP */
10221 unsigned int regno = find_drap_reg ();
10222 rtx drap_vreg;
10223 rtx arg_ptr;
10224 rtx_insn *seq, *insn;
10225
10226 arg_ptr = gen_rtx_REG (Pmode, regno);
10227 crtl->drap_reg = arg_ptr;
10228
10229 start_sequence ();
10230 drap_vreg = copy_to_reg (arg_ptr);
10231 seq = get_insns ();
10232 end_sequence ();
10233
10234 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
10235 if (!optimize)
10236 {
10237 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
10238 RTX_FRAME_RELATED_P (insn) = 1;
10239 }
10240 return drap_vreg;
10241 }
10242 else
10243 return NULL;
10244 }
10245
10246 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
10247
10248 static rtx
10249 ix86_internal_arg_pointer (void)
10250 {
10251 return virtual_incoming_args_rtx;
10252 }
10253
10254 struct scratch_reg {
10255 rtx reg;
10256 bool saved;
10257 };
10258
10259 /* Return a short-lived scratch register for use on function entry.
10260 In 32-bit mode, it is valid only after the registers are saved
10261 in the prologue. This register must be released by means of
10262 release_scratch_register_on_entry once it is dead. */
10263
10264 static void
10265 get_scratch_register_on_entry (struct scratch_reg *sr)
10266 {
10267 int regno;
10268
10269 sr->saved = false;
10270
10271 if (TARGET_64BIT)
10272 {
10273 /* We always use R11 in 64-bit mode. */
10274 regno = R11_REG;
10275 }
10276 else
10277 {
10278 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
10279 bool fastcall_p
10280 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10281 bool thiscall_p
10282 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10283 bool static_chain_p = DECL_STATIC_CHAIN (decl);
10284 int regparm = ix86_function_regparm (fntype, decl);
10285 int drap_regno
10286 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
10287
10288 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
10289 for the static chain register. */
10290 if ((regparm < 1 || (fastcall_p && !static_chain_p))
10291 && drap_regno != AX_REG)
10292 regno = AX_REG;
10293 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
10294 for the static chain register. */
10295 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
10296 regno = AX_REG;
10297 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
10298 regno = DX_REG;
10299 /* ecx is the static chain register. */
10300 else if (regparm < 3 && !fastcall_p && !thiscall_p
10301 && !static_chain_p
10302 && drap_regno != CX_REG)
10303 regno = CX_REG;
10304 else if (ix86_save_reg (BX_REG, true))
10305 regno = BX_REG;
10306 /* esi is the static chain register. */
10307 else if (!(regparm == 3 && static_chain_p)
10308 && ix86_save_reg (SI_REG, true))
10309 regno = SI_REG;
10310 else if (ix86_save_reg (DI_REG, true))
10311 regno = DI_REG;
10312 else
10313 {
10314 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
10315 sr->saved = true;
10316 }
10317 }
10318
10319 sr->reg = gen_rtx_REG (Pmode, regno);
10320 if (sr->saved)
10321 {
10322 rtx insn = emit_insn (gen_push (sr->reg));
10323 RTX_FRAME_RELATED_P (insn) = 1;
10324 }
10325 }
10326
10327 /* Release a scratch register obtained from the preceding function. */
10328
10329 static void
10330 release_scratch_register_on_entry (struct scratch_reg *sr)
10331 {
10332 if (sr->saved)
10333 {
10334 struct machine_function *m = cfun->machine;
10335 rtx x, insn = emit_insn (gen_pop (sr->reg));
10336
10337 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
10338 RTX_FRAME_RELATED_P (insn) = 1;
10339 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
10340 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10341 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
10342 m->fs.sp_offset -= UNITS_PER_WORD;
10343 }
10344 }
10345
10346 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
10347
10348 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
10349
10350 static void
10351 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
10352 {
10353 /* We skip the probe for the first interval + a small dope of 4 words and
10354 probe that many bytes past the specified size to maintain a protection
10355 area at the botton of the stack. */
10356 const int dope = 4 * UNITS_PER_WORD;
10357 rtx size_rtx = GEN_INT (size), last;
10358
10359 /* See if we have a constant small number of probes to generate. If so,
10360 that's the easy case. The run-time loop is made up of 11 insns in the
10361 generic case while the compile-time loop is made up of 3+2*(n-1) insns
10362 for n # of intervals. */
10363 if (size <= 5 * PROBE_INTERVAL)
10364 {
10365 HOST_WIDE_INT i, adjust;
10366 bool first_probe = true;
10367
10368 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
10369 values of N from 1 until it exceeds SIZE. If only one probe is
10370 needed, this will not generate any code. Then adjust and probe
10371 to PROBE_INTERVAL + SIZE. */
10372 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10373 {
10374 if (first_probe)
10375 {
10376 adjust = 2 * PROBE_INTERVAL + dope;
10377 first_probe = false;
10378 }
10379 else
10380 adjust = PROBE_INTERVAL;
10381
10382 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10383 plus_constant (Pmode, stack_pointer_rtx,
10384 -adjust)));
10385 emit_stack_probe (stack_pointer_rtx);
10386 }
10387
10388 if (first_probe)
10389 adjust = size + PROBE_INTERVAL + dope;
10390 else
10391 adjust = size + PROBE_INTERVAL - i;
10392
10393 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10394 plus_constant (Pmode, stack_pointer_rtx,
10395 -adjust)));
10396 emit_stack_probe (stack_pointer_rtx);
10397
10398 /* Adjust back to account for the additional first interval. */
10399 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10400 plus_constant (Pmode, stack_pointer_rtx,
10401 PROBE_INTERVAL + dope)));
10402 }
10403
10404 /* Otherwise, do the same as above, but in a loop. Note that we must be
10405 extra careful with variables wrapping around because we might be at
10406 the very top (or the very bottom) of the address space and we have
10407 to be able to handle this case properly; in particular, we use an
10408 equality test for the loop condition. */
10409 else
10410 {
10411 HOST_WIDE_INT rounded_size;
10412 struct scratch_reg sr;
10413
10414 get_scratch_register_on_entry (&sr);
10415
10416
10417 /* Step 1: round SIZE to the previous multiple of the interval. */
10418
10419 rounded_size = size & -PROBE_INTERVAL;
10420
10421
10422 /* Step 2: compute initial and final value of the loop counter. */
10423
10424 /* SP = SP_0 + PROBE_INTERVAL. */
10425 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10426 plus_constant (Pmode, stack_pointer_rtx,
10427 - (PROBE_INTERVAL + dope))));
10428
10429 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
10430 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
10431 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
10432 gen_rtx_PLUS (Pmode, sr.reg,
10433 stack_pointer_rtx)));
10434
10435
10436 /* Step 3: the loop
10437
10438 while (SP != LAST_ADDR)
10439 {
10440 SP = SP + PROBE_INTERVAL
10441 probe at SP
10442 }
10443
10444 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
10445 values of N from 1 until it is equal to ROUNDED_SIZE. */
10446
10447 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
10448
10449
10450 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
10451 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
10452
10453 if (size != rounded_size)
10454 {
10455 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10456 plus_constant (Pmode, stack_pointer_rtx,
10457 rounded_size - size)));
10458 emit_stack_probe (stack_pointer_rtx);
10459 }
10460
10461 /* Adjust back to account for the additional first interval. */
10462 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10463 plus_constant (Pmode, stack_pointer_rtx,
10464 PROBE_INTERVAL + dope)));
10465
10466 release_scratch_register_on_entry (&sr);
10467 }
10468
10469 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
10470
10471 /* Even if the stack pointer isn't the CFA register, we need to correctly
10472 describe the adjustments made to it, in particular differentiate the
10473 frame-related ones from the frame-unrelated ones. */
10474 if (size > 0)
10475 {
10476 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
10477 XVECEXP (expr, 0, 0)
10478 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10479 plus_constant (Pmode, stack_pointer_rtx, -size));
10480 XVECEXP (expr, 0, 1)
10481 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10482 plus_constant (Pmode, stack_pointer_rtx,
10483 PROBE_INTERVAL + dope + size));
10484 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
10485 RTX_FRAME_RELATED_P (last) = 1;
10486
10487 cfun->machine->fs.sp_offset += size;
10488 }
10489
10490 /* Make sure nothing is scheduled before we are done. */
10491 emit_insn (gen_blockage ());
10492 }
10493
10494 /* Adjust the stack pointer up to REG while probing it. */
10495
10496 const char *
10497 output_adjust_stack_and_probe (rtx reg)
10498 {
10499 static int labelno = 0;
10500 char loop_lab[32], end_lab[32];
10501 rtx xops[2];
10502
10503 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10504 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10505
10506 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10507
10508 /* Jump to END_LAB if SP == LAST_ADDR. */
10509 xops[0] = stack_pointer_rtx;
10510 xops[1] = reg;
10511 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10512 fputs ("\tje\t", asm_out_file);
10513 assemble_name_raw (asm_out_file, end_lab);
10514 fputc ('\n', asm_out_file);
10515
10516 /* SP = SP + PROBE_INTERVAL. */
10517 xops[1] = GEN_INT (PROBE_INTERVAL);
10518 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10519
10520 /* Probe at SP. */
10521 xops[1] = const0_rtx;
10522 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
10523
10524 fprintf (asm_out_file, "\tjmp\t");
10525 assemble_name_raw (asm_out_file, loop_lab);
10526 fputc ('\n', asm_out_file);
10527
10528 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10529
10530 return "";
10531 }
10532
10533 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
10534 inclusive. These are offsets from the current stack pointer. */
10535
10536 static void
10537 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
10538 {
10539 /* See if we have a constant small number of probes to generate. If so,
10540 that's the easy case. The run-time loop is made up of 7 insns in the
10541 generic case while the compile-time loop is made up of n insns for n #
10542 of intervals. */
10543 if (size <= 7 * PROBE_INTERVAL)
10544 {
10545 HOST_WIDE_INT i;
10546
10547 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
10548 it exceeds SIZE. If only one probe is needed, this will not
10549 generate any code. Then probe at FIRST + SIZE. */
10550 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10551 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
10552 -(first + i)));
10553
10554 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
10555 -(first + size)));
10556 }
10557
10558 /* Otherwise, do the same as above, but in a loop. Note that we must be
10559 extra careful with variables wrapping around because we might be at
10560 the very top (or the very bottom) of the address space and we have
10561 to be able to handle this case properly; in particular, we use an
10562 equality test for the loop condition. */
10563 else
10564 {
10565 HOST_WIDE_INT rounded_size, last;
10566 struct scratch_reg sr;
10567
10568 get_scratch_register_on_entry (&sr);
10569
10570
10571 /* Step 1: round SIZE to the previous multiple of the interval. */
10572
10573 rounded_size = size & -PROBE_INTERVAL;
10574
10575
10576 /* Step 2: compute initial and final value of the loop counter. */
10577
10578 /* TEST_OFFSET = FIRST. */
10579 emit_move_insn (sr.reg, GEN_INT (-first));
10580
10581 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
10582 last = first + rounded_size;
10583
10584
10585 /* Step 3: the loop
10586
10587 while (TEST_ADDR != LAST_ADDR)
10588 {
10589 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
10590 probe at TEST_ADDR
10591 }
10592
10593 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
10594 until it is equal to ROUNDED_SIZE. */
10595
10596 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
10597
10598
10599 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
10600 that SIZE is equal to ROUNDED_SIZE. */
10601
10602 if (size != rounded_size)
10603 emit_stack_probe (plus_constant (Pmode,
10604 gen_rtx_PLUS (Pmode,
10605 stack_pointer_rtx,
10606 sr.reg),
10607 rounded_size - size));
10608
10609 release_scratch_register_on_entry (&sr);
10610 }
10611
10612 /* Make sure nothing is scheduled before we are done. */
10613 emit_insn (gen_blockage ());
10614 }
10615
10616 /* Probe a range of stack addresses from REG to END, inclusive. These are
10617 offsets from the current stack pointer. */
10618
10619 const char *
10620 output_probe_stack_range (rtx reg, rtx end)
10621 {
10622 static int labelno = 0;
10623 char loop_lab[32], end_lab[32];
10624 rtx xops[3];
10625
10626 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10627 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10628
10629 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10630
10631 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
10632 xops[0] = reg;
10633 xops[1] = end;
10634 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10635 fputs ("\tje\t", asm_out_file);
10636 assemble_name_raw (asm_out_file, end_lab);
10637 fputc ('\n', asm_out_file);
10638
10639 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
10640 xops[1] = GEN_INT (PROBE_INTERVAL);
10641 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10642
10643 /* Probe at TEST_ADDR. */
10644 xops[0] = stack_pointer_rtx;
10645 xops[1] = reg;
10646 xops[2] = const0_rtx;
10647 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
10648
10649 fprintf (asm_out_file, "\tjmp\t");
10650 assemble_name_raw (asm_out_file, loop_lab);
10651 fputc ('\n', asm_out_file);
10652
10653 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10654
10655 return "";
10656 }
10657
10658 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
10659 to be generated in correct form. */
10660 static void
10661 ix86_finalize_stack_realign_flags (void)
10662 {
10663 /* Check if stack realign is really needed after reload, and
10664 stores result in cfun */
10665 unsigned int incoming_stack_boundary
10666 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
10667 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
10668 unsigned int stack_realign = (incoming_stack_boundary
10669 < (crtl->is_leaf
10670 ? crtl->max_used_stack_slot_alignment
10671 : crtl->stack_alignment_needed));
10672
10673 if (crtl->stack_realign_finalized)
10674 {
10675 /* After stack_realign_needed is finalized, we can't no longer
10676 change it. */
10677 gcc_assert (crtl->stack_realign_needed == stack_realign);
10678 return;
10679 }
10680
10681 /* If the only reason for frame_pointer_needed is that we conservatively
10682 assumed stack realignment might be needed, but in the end nothing that
10683 needed the stack alignment had been spilled, clear frame_pointer_needed
10684 and say we don't need stack realignment. */
10685 if (stack_realign
10686 && frame_pointer_needed
10687 && crtl->is_leaf
10688 && flag_omit_frame_pointer
10689 && crtl->sp_is_unchanging
10690 && !ix86_current_function_calls_tls_descriptor
10691 && !crtl->accesses_prior_frames
10692 && !cfun->calls_alloca
10693 && !crtl->calls_eh_return
10694 && !(flag_stack_check && STACK_CHECK_MOVING_SP)
10695 && !ix86_frame_pointer_required ()
10696 && get_frame_size () == 0
10697 && ix86_nsaved_sseregs () == 0
10698 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
10699 {
10700 HARD_REG_SET set_up_by_prologue, prologue_used;
10701 basic_block bb;
10702
10703 CLEAR_HARD_REG_SET (prologue_used);
10704 CLEAR_HARD_REG_SET (set_up_by_prologue);
10705 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
10706 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
10707 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
10708 HARD_FRAME_POINTER_REGNUM);
10709 FOR_EACH_BB_FN (bb, cfun)
10710 {
10711 rtx_insn *insn;
10712 FOR_BB_INSNS (bb, insn)
10713 if (NONDEBUG_INSN_P (insn)
10714 && requires_stack_frame_p (insn, prologue_used,
10715 set_up_by_prologue))
10716 {
10717 crtl->stack_realign_needed = stack_realign;
10718 crtl->stack_realign_finalized = true;
10719 return;
10720 }
10721 }
10722
10723 /* If drap has been set, but it actually isn't live at the start
10724 of the function, there is no reason to set it up. */
10725 if (crtl->drap_reg)
10726 {
10727 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
10728 if (! REGNO_REG_SET_P (DF_LR_IN (bb), REGNO (crtl->drap_reg)))
10729 {
10730 crtl->drap_reg = NULL_RTX;
10731 crtl->need_drap = false;
10732 }
10733 }
10734 else
10735 cfun->machine->no_drap_save_restore = true;
10736
10737 frame_pointer_needed = false;
10738 stack_realign = false;
10739 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
10740 crtl->stack_alignment_needed = incoming_stack_boundary;
10741 crtl->stack_alignment_estimated = incoming_stack_boundary;
10742 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
10743 crtl->preferred_stack_boundary = incoming_stack_boundary;
10744 df_finish_pass (true);
10745 df_scan_alloc (NULL);
10746 df_scan_blocks ();
10747 df_compute_regs_ever_live (true);
10748 df_analyze ();
10749 }
10750
10751 crtl->stack_realign_needed = stack_realign;
10752 crtl->stack_realign_finalized = true;
10753 }
10754
10755 /* Expand the prologue into a bunch of separate insns. */
10756
10757 void
10758 ix86_expand_prologue (void)
10759 {
10760 struct machine_function *m = cfun->machine;
10761 rtx insn, t;
10762 bool pic_reg_used;
10763 struct ix86_frame frame;
10764 HOST_WIDE_INT allocate;
10765 bool int_registers_saved;
10766 bool sse_registers_saved;
10767
10768 ix86_finalize_stack_realign_flags ();
10769
10770 /* DRAP should not coexist with stack_realign_fp */
10771 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
10772
10773 memset (&m->fs, 0, sizeof (m->fs));
10774
10775 /* Initialize CFA state for before the prologue. */
10776 m->fs.cfa_reg = stack_pointer_rtx;
10777 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
10778
10779 /* Track SP offset to the CFA. We continue tracking this after we've
10780 swapped the CFA register away from SP. In the case of re-alignment
10781 this is fudged; we're interested to offsets within the local frame. */
10782 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10783 m->fs.sp_valid = true;
10784
10785 ix86_compute_frame_layout (&frame);
10786
10787 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
10788 {
10789 /* We should have already generated an error for any use of
10790 ms_hook on a nested function. */
10791 gcc_checking_assert (!ix86_static_chain_on_stack);
10792
10793 /* Check if profiling is active and we shall use profiling before
10794 prologue variant. If so sorry. */
10795 if (crtl->profile && flag_fentry != 0)
10796 sorry ("ms_hook_prologue attribute isn%'t compatible "
10797 "with -mfentry for 32-bit");
10798
10799 /* In ix86_asm_output_function_label we emitted:
10800 8b ff movl.s %edi,%edi
10801 55 push %ebp
10802 8b ec movl.s %esp,%ebp
10803
10804 This matches the hookable function prologue in Win32 API
10805 functions in Microsoft Windows XP Service Pack 2 and newer.
10806 Wine uses this to enable Windows apps to hook the Win32 API
10807 functions provided by Wine.
10808
10809 What that means is that we've already set up the frame pointer. */
10810
10811 if (frame_pointer_needed
10812 && !(crtl->drap_reg && crtl->stack_realign_needed))
10813 {
10814 rtx push, mov;
10815
10816 /* We've decided to use the frame pointer already set up.
10817 Describe this to the unwinder by pretending that both
10818 push and mov insns happen right here.
10819
10820 Putting the unwind info here at the end of the ms_hook
10821 is done so that we can make absolutely certain we get
10822 the required byte sequence at the start of the function,
10823 rather than relying on an assembler that can produce
10824 the exact encoding required.
10825
10826 However it does mean (in the unpatched case) that we have
10827 a 1 insn window where the asynchronous unwind info is
10828 incorrect. However, if we placed the unwind info at
10829 its correct location we would have incorrect unwind info
10830 in the patched case. Which is probably all moot since
10831 I don't expect Wine generates dwarf2 unwind info for the
10832 system libraries that use this feature. */
10833
10834 insn = emit_insn (gen_blockage ());
10835
10836 push = gen_push (hard_frame_pointer_rtx);
10837 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
10838 stack_pointer_rtx);
10839 RTX_FRAME_RELATED_P (push) = 1;
10840 RTX_FRAME_RELATED_P (mov) = 1;
10841
10842 RTX_FRAME_RELATED_P (insn) = 1;
10843 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10844 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
10845
10846 /* Note that gen_push incremented m->fs.cfa_offset, even
10847 though we didn't emit the push insn here. */
10848 m->fs.cfa_reg = hard_frame_pointer_rtx;
10849 m->fs.fp_offset = m->fs.cfa_offset;
10850 m->fs.fp_valid = true;
10851 }
10852 else
10853 {
10854 /* The frame pointer is not needed so pop %ebp again.
10855 This leaves us with a pristine state. */
10856 emit_insn (gen_pop (hard_frame_pointer_rtx));
10857 }
10858 }
10859
10860 /* The first insn of a function that accepts its static chain on the
10861 stack is to push the register that would be filled in by a direct
10862 call. This insn will be skipped by the trampoline. */
10863 else if (ix86_static_chain_on_stack)
10864 {
10865 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
10866 emit_insn (gen_blockage ());
10867
10868 /* We don't want to interpret this push insn as a register save,
10869 only as a stack adjustment. The real copy of the register as
10870 a save will be done later, if needed. */
10871 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
10872 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
10873 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
10874 RTX_FRAME_RELATED_P (insn) = 1;
10875 }
10876
10877 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
10878 of DRAP is needed and stack realignment is really needed after reload */
10879 if (stack_realign_drap)
10880 {
10881 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10882
10883 /* Only need to push parameter pointer reg if it is caller saved. */
10884 if (!call_used_regs[REGNO (crtl->drap_reg)])
10885 {
10886 /* Push arg pointer reg */
10887 insn = emit_insn (gen_push (crtl->drap_reg));
10888 RTX_FRAME_RELATED_P (insn) = 1;
10889 }
10890
10891 /* Grab the argument pointer. */
10892 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
10893 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10894 RTX_FRAME_RELATED_P (insn) = 1;
10895 m->fs.cfa_reg = crtl->drap_reg;
10896 m->fs.cfa_offset = 0;
10897
10898 /* Align the stack. */
10899 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10900 stack_pointer_rtx,
10901 GEN_INT (-align_bytes)));
10902 RTX_FRAME_RELATED_P (insn) = 1;
10903
10904 /* Replicate the return address on the stack so that return
10905 address can be reached via (argp - 1) slot. This is needed
10906 to implement macro RETURN_ADDR_RTX and intrinsic function
10907 expand_builtin_return_addr etc. */
10908 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
10909 t = gen_frame_mem (word_mode, t);
10910 insn = emit_insn (gen_push (t));
10911 RTX_FRAME_RELATED_P (insn) = 1;
10912
10913 /* For the purposes of frame and register save area addressing,
10914 we've started over with a new frame. */
10915 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10916 m->fs.realigned = true;
10917 }
10918
10919 int_registers_saved = (frame.nregs == 0);
10920 sse_registers_saved = (frame.nsseregs == 0);
10921
10922 if (frame_pointer_needed && !m->fs.fp_valid)
10923 {
10924 /* Note: AT&T enter does NOT have reversed args. Enter is probably
10925 slower on all targets. Also sdb doesn't like it. */
10926 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
10927 RTX_FRAME_RELATED_P (insn) = 1;
10928
10929 /* Push registers now, before setting the frame pointer
10930 on SEH target. */
10931 if (!int_registers_saved
10932 && TARGET_SEH
10933 && !frame.save_regs_using_mov)
10934 {
10935 ix86_emit_save_regs ();
10936 int_registers_saved = true;
10937 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
10938 }
10939
10940 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
10941 {
10942 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
10943 RTX_FRAME_RELATED_P (insn) = 1;
10944
10945 if (m->fs.cfa_reg == stack_pointer_rtx)
10946 m->fs.cfa_reg = hard_frame_pointer_rtx;
10947 m->fs.fp_offset = m->fs.sp_offset;
10948 m->fs.fp_valid = true;
10949 }
10950 }
10951
10952 if (!int_registers_saved)
10953 {
10954 /* If saving registers via PUSH, do so now. */
10955 if (!frame.save_regs_using_mov)
10956 {
10957 ix86_emit_save_regs ();
10958 int_registers_saved = true;
10959 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
10960 }
10961
10962 /* When using red zone we may start register saving before allocating
10963 the stack frame saving one cycle of the prologue. However, avoid
10964 doing this if we have to probe the stack; at least on x86_64 the
10965 stack probe can turn into a call that clobbers a red zone location. */
10966 else if (ix86_using_red_zone ()
10967 && (! TARGET_STACK_PROBE
10968 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
10969 {
10970 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10971 int_registers_saved = true;
10972 }
10973 }
10974
10975 if (stack_realign_fp)
10976 {
10977 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10978 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
10979
10980 /* The computation of the size of the re-aligned stack frame means
10981 that we must allocate the size of the register save area before
10982 performing the actual alignment. Otherwise we cannot guarantee
10983 that there's enough storage above the realignment point. */
10984 if (m->fs.sp_offset != frame.sse_reg_save_offset)
10985 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10986 GEN_INT (m->fs.sp_offset
10987 - frame.sse_reg_save_offset),
10988 -1, false);
10989
10990 /* Align the stack. */
10991 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10992 stack_pointer_rtx,
10993 GEN_INT (-align_bytes)));
10994
10995 /* For the purposes of register save area addressing, the stack
10996 pointer is no longer valid. As for the value of sp_offset,
10997 see ix86_compute_frame_layout, which we need to match in order
10998 to pass verification of stack_pointer_offset at the end. */
10999 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
11000 m->fs.sp_valid = false;
11001 }
11002
11003 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
11004
11005 if (flag_stack_usage_info)
11006 {
11007 /* We start to count from ARG_POINTER. */
11008 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
11009
11010 /* If it was realigned, take into account the fake frame. */
11011 if (stack_realign_drap)
11012 {
11013 if (ix86_static_chain_on_stack)
11014 stack_size += UNITS_PER_WORD;
11015
11016 if (!call_used_regs[REGNO (crtl->drap_reg)])
11017 stack_size += UNITS_PER_WORD;
11018
11019 /* This over-estimates by 1 minimal-stack-alignment-unit but
11020 mitigates that by counting in the new return address slot. */
11021 current_function_dynamic_stack_size
11022 += crtl->stack_alignment_needed / BITS_PER_UNIT;
11023 }
11024
11025 current_function_static_stack_size = stack_size;
11026 }
11027
11028 /* On SEH target with very large frame size, allocate an area to save
11029 SSE registers (as the very large allocation won't be described). */
11030 if (TARGET_SEH
11031 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
11032 && !sse_registers_saved)
11033 {
11034 HOST_WIDE_INT sse_size =
11035 frame.sse_reg_save_offset - frame.reg_save_offset;
11036
11037 gcc_assert (int_registers_saved);
11038
11039 /* No need to do stack checking as the area will be immediately
11040 written. */
11041 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11042 GEN_INT (-sse_size), -1,
11043 m->fs.cfa_reg == stack_pointer_rtx);
11044 allocate -= sse_size;
11045 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11046 sse_registers_saved = true;
11047 }
11048
11049 /* The stack has already been decremented by the instruction calling us
11050 so probe if the size is non-negative to preserve the protection area. */
11051 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
11052 {
11053 /* We expect the registers to be saved when probes are used. */
11054 gcc_assert (int_registers_saved);
11055
11056 if (STACK_CHECK_MOVING_SP)
11057 {
11058 if (!(crtl->is_leaf && !cfun->calls_alloca
11059 && allocate <= PROBE_INTERVAL))
11060 {
11061 ix86_adjust_stack_and_probe (allocate);
11062 allocate = 0;
11063 }
11064 }
11065 else
11066 {
11067 HOST_WIDE_INT size = allocate;
11068
11069 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
11070 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
11071
11072 if (TARGET_STACK_PROBE)
11073 {
11074 if (crtl->is_leaf && !cfun->calls_alloca)
11075 {
11076 if (size > PROBE_INTERVAL)
11077 ix86_emit_probe_stack_range (0, size);
11078 }
11079 else
11080 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
11081 }
11082 else
11083 {
11084 if (crtl->is_leaf && !cfun->calls_alloca)
11085 {
11086 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
11087 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT,
11088 size - STACK_CHECK_PROTECT);
11089 }
11090 else
11091 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
11092 }
11093 }
11094 }
11095
11096 if (allocate == 0)
11097 ;
11098 else if (!ix86_target_stack_probe ()
11099 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
11100 {
11101 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11102 GEN_INT (-allocate), -1,
11103 m->fs.cfa_reg == stack_pointer_rtx);
11104 }
11105 else
11106 {
11107 rtx eax = gen_rtx_REG (Pmode, AX_REG);
11108 rtx r10 = NULL;
11109 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
11110 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
11111 bool eax_live = ix86_eax_live_at_start_p ();
11112 bool r10_live = false;
11113
11114 if (TARGET_64BIT)
11115 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
11116
11117 if (eax_live)
11118 {
11119 insn = emit_insn (gen_push (eax));
11120 allocate -= UNITS_PER_WORD;
11121 /* Note that SEH directives need to continue tracking the stack
11122 pointer even after the frame pointer has been set up. */
11123 if (sp_is_cfa_reg || TARGET_SEH)
11124 {
11125 if (sp_is_cfa_reg)
11126 m->fs.cfa_offset += UNITS_PER_WORD;
11127 RTX_FRAME_RELATED_P (insn) = 1;
11128 }
11129 }
11130
11131 if (r10_live)
11132 {
11133 r10 = gen_rtx_REG (Pmode, R10_REG);
11134 insn = emit_insn (gen_push (r10));
11135 allocate -= UNITS_PER_WORD;
11136 if (sp_is_cfa_reg || TARGET_SEH)
11137 {
11138 if (sp_is_cfa_reg)
11139 m->fs.cfa_offset += UNITS_PER_WORD;
11140 RTX_FRAME_RELATED_P (insn) = 1;
11141 }
11142 }
11143
11144 emit_move_insn (eax, GEN_INT (allocate));
11145 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
11146
11147 /* Use the fact that AX still contains ALLOCATE. */
11148 adjust_stack_insn = (Pmode == DImode
11149 ? gen_pro_epilogue_adjust_stack_di_sub
11150 : gen_pro_epilogue_adjust_stack_si_sub);
11151
11152 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
11153 stack_pointer_rtx, eax));
11154
11155 if (sp_is_cfa_reg || TARGET_SEH)
11156 {
11157 if (sp_is_cfa_reg)
11158 m->fs.cfa_offset += allocate;
11159 RTX_FRAME_RELATED_P (insn) = 1;
11160 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11161 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11162 plus_constant (Pmode, stack_pointer_rtx,
11163 -allocate)));
11164 }
11165 m->fs.sp_offset += allocate;
11166
11167 /* Use stack_pointer_rtx for relative addressing so that code
11168 works for realigned stack, too. */
11169 if (r10_live && eax_live)
11170 {
11171 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11172 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
11173 gen_frame_mem (word_mode, t));
11174 t = plus_constant (Pmode, t, UNITS_PER_WORD);
11175 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
11176 gen_frame_mem (word_mode, t));
11177 }
11178 else if (eax_live || r10_live)
11179 {
11180 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11181 emit_move_insn (gen_rtx_REG (word_mode,
11182 (eax_live ? AX_REG : R10_REG)),
11183 gen_frame_mem (word_mode, t));
11184 }
11185 }
11186 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
11187
11188 /* If we havn't already set up the frame pointer, do so now. */
11189 if (frame_pointer_needed && !m->fs.fp_valid)
11190 {
11191 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
11192 GEN_INT (frame.stack_pointer_offset
11193 - frame.hard_frame_pointer_offset));
11194 insn = emit_insn (insn);
11195 RTX_FRAME_RELATED_P (insn) = 1;
11196 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
11197
11198 if (m->fs.cfa_reg == stack_pointer_rtx)
11199 m->fs.cfa_reg = hard_frame_pointer_rtx;
11200 m->fs.fp_offset = frame.hard_frame_pointer_offset;
11201 m->fs.fp_valid = true;
11202 }
11203
11204 if (!int_registers_saved)
11205 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11206 if (!sse_registers_saved)
11207 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11208
11209 pic_reg_used = false;
11210 /* We don't use pic-register for pe-coff target. */
11211 if (pic_offset_table_rtx
11212 && !TARGET_PECOFF
11213 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
11214 || crtl->profile))
11215 {
11216 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
11217
11218 if (alt_pic_reg_used != INVALID_REGNUM)
11219 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
11220
11221 pic_reg_used = true;
11222 }
11223
11224 if (pic_reg_used)
11225 {
11226 if (TARGET_64BIT)
11227 {
11228 if (ix86_cmodel == CM_LARGE_PIC)
11229 {
11230 rtx_code_label *label;
11231 rtx tmp_reg;
11232
11233 gcc_assert (Pmode == DImode);
11234 label = gen_label_rtx ();
11235 emit_label (label);
11236 LABEL_PRESERVE_P (label) = 1;
11237 tmp_reg = gen_rtx_REG (Pmode, R11_REG);
11238 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
11239 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
11240 label));
11241 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
11242 insn = emit_insn (ix86_gen_add3 (pic_offset_table_rtx,
11243 pic_offset_table_rtx, tmp_reg));
11244 }
11245 else
11246 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
11247 }
11248 else
11249 {
11250 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
11251 RTX_FRAME_RELATED_P (insn) = 1;
11252 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
11253 }
11254 }
11255
11256 /* In the pic_reg_used case, make sure that the got load isn't deleted
11257 when mcount needs it. Blockage to avoid call movement across mcount
11258 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
11259 note. */
11260 if (crtl->profile && !flag_fentry && pic_reg_used)
11261 emit_insn (gen_prologue_use (pic_offset_table_rtx));
11262
11263 if (crtl->drap_reg && !crtl->stack_realign_needed)
11264 {
11265 /* vDRAP is setup but after reload it turns out stack realign
11266 isn't necessary, here we will emit prologue to setup DRAP
11267 without stack realign adjustment */
11268 t = choose_baseaddr (0);
11269 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
11270 }
11271
11272 /* Prevent instructions from being scheduled into register save push
11273 sequence when access to the redzone area is done through frame pointer.
11274 The offset between the frame pointer and the stack pointer is calculated
11275 relative to the value of the stack pointer at the end of the function
11276 prologue, and moving instructions that access redzone area via frame
11277 pointer inside push sequence violates this assumption. */
11278 if (frame_pointer_needed && frame.red_zone_size)
11279 emit_insn (gen_memory_blockage ());
11280
11281 /* Emit cld instruction if stringops are used in the function. */
11282 if (TARGET_CLD && ix86_current_function_needs_cld)
11283 emit_insn (gen_cld ());
11284
11285 /* SEH requires that the prologue end within 256 bytes of the start of
11286 the function. Prevent instruction schedules that would extend that.
11287 Further, prevent alloca modifications to the stack pointer from being
11288 combined with prologue modifications. */
11289 if (TARGET_SEH)
11290 emit_insn (gen_prologue_use (stack_pointer_rtx));
11291 }
11292
11293 /* Emit code to restore REG using a POP insn. */
11294
11295 static void
11296 ix86_emit_restore_reg_using_pop (rtx reg)
11297 {
11298 struct machine_function *m = cfun->machine;
11299 rtx insn = emit_insn (gen_pop (reg));
11300
11301 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
11302 m->fs.sp_offset -= UNITS_PER_WORD;
11303
11304 if (m->fs.cfa_reg == crtl->drap_reg
11305 && REGNO (reg) == REGNO (crtl->drap_reg))
11306 {
11307 /* Previously we'd represented the CFA as an expression
11308 like *(%ebp - 8). We've just popped that value from
11309 the stack, which means we need to reset the CFA to
11310 the drap register. This will remain until we restore
11311 the stack pointer. */
11312 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11313 RTX_FRAME_RELATED_P (insn) = 1;
11314
11315 /* This means that the DRAP register is valid for addressing too. */
11316 m->fs.drap_valid = true;
11317 return;
11318 }
11319
11320 if (m->fs.cfa_reg == stack_pointer_rtx)
11321 {
11322 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
11323 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
11324 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
11325 RTX_FRAME_RELATED_P (insn) = 1;
11326
11327 m->fs.cfa_offset -= UNITS_PER_WORD;
11328 }
11329
11330 /* When the frame pointer is the CFA, and we pop it, we are
11331 swapping back to the stack pointer as the CFA. This happens
11332 for stack frames that don't allocate other data, so we assume
11333 the stack pointer is now pointing at the return address, i.e.
11334 the function entry state, which makes the offset be 1 word. */
11335 if (reg == hard_frame_pointer_rtx)
11336 {
11337 m->fs.fp_valid = false;
11338 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11339 {
11340 m->fs.cfa_reg = stack_pointer_rtx;
11341 m->fs.cfa_offset -= UNITS_PER_WORD;
11342
11343 add_reg_note (insn, REG_CFA_DEF_CFA,
11344 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11345 GEN_INT (m->fs.cfa_offset)));
11346 RTX_FRAME_RELATED_P (insn) = 1;
11347 }
11348 }
11349 }
11350
11351 /* Emit code to restore saved registers using POP insns. */
11352
11353 static void
11354 ix86_emit_restore_regs_using_pop (void)
11355 {
11356 unsigned int regno;
11357
11358 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11359 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
11360 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
11361 }
11362
11363 /* Emit code and notes for the LEAVE instruction. */
11364
11365 static void
11366 ix86_emit_leave (void)
11367 {
11368 struct machine_function *m = cfun->machine;
11369 rtx insn = emit_insn (ix86_gen_leave ());
11370
11371 ix86_add_queued_cfa_restore_notes (insn);
11372
11373 gcc_assert (m->fs.fp_valid);
11374 m->fs.sp_valid = true;
11375 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
11376 m->fs.fp_valid = false;
11377
11378 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11379 {
11380 m->fs.cfa_reg = stack_pointer_rtx;
11381 m->fs.cfa_offset = m->fs.sp_offset;
11382
11383 add_reg_note (insn, REG_CFA_DEF_CFA,
11384 plus_constant (Pmode, stack_pointer_rtx,
11385 m->fs.sp_offset));
11386 RTX_FRAME_RELATED_P (insn) = 1;
11387 }
11388 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
11389 m->fs.fp_offset);
11390 }
11391
11392 /* Emit code to restore saved registers using MOV insns.
11393 First register is restored from CFA - CFA_OFFSET. */
11394 static void
11395 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
11396 bool maybe_eh_return)
11397 {
11398 struct machine_function *m = cfun->machine;
11399 unsigned int regno;
11400
11401 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11402 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11403 {
11404 rtx reg = gen_rtx_REG (word_mode, regno);
11405 rtx insn, mem;
11406
11407 mem = choose_baseaddr (cfa_offset);
11408 mem = gen_frame_mem (word_mode, mem);
11409 insn = emit_move_insn (reg, mem);
11410
11411 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
11412 {
11413 /* Previously we'd represented the CFA as an expression
11414 like *(%ebp - 8). We've just popped that value from
11415 the stack, which means we need to reset the CFA to
11416 the drap register. This will remain until we restore
11417 the stack pointer. */
11418 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11419 RTX_FRAME_RELATED_P (insn) = 1;
11420
11421 /* This means that the DRAP register is valid for addressing. */
11422 m->fs.drap_valid = true;
11423 }
11424 else
11425 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11426
11427 cfa_offset -= UNITS_PER_WORD;
11428 }
11429 }
11430
11431 /* Emit code to restore saved registers using MOV insns.
11432 First register is restored from CFA - CFA_OFFSET. */
11433 static void
11434 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
11435 bool maybe_eh_return)
11436 {
11437 unsigned int regno;
11438
11439 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11440 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11441 {
11442 rtx reg = gen_rtx_REG (V4SFmode, regno);
11443 rtx mem;
11444
11445 mem = choose_baseaddr (cfa_offset);
11446 mem = gen_rtx_MEM (V4SFmode, mem);
11447 set_mem_align (mem, 128);
11448 emit_move_insn (reg, mem);
11449
11450 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11451
11452 cfa_offset -= 16;
11453 }
11454 }
11455
11456 /* Restore function stack, frame, and registers. */
11457
11458 void
11459 ix86_expand_epilogue (int style)
11460 {
11461 struct machine_function *m = cfun->machine;
11462 struct machine_frame_state frame_state_save = m->fs;
11463 struct ix86_frame frame;
11464 bool restore_regs_via_mov;
11465 bool using_drap;
11466
11467 ix86_finalize_stack_realign_flags ();
11468 ix86_compute_frame_layout (&frame);
11469
11470 m->fs.sp_valid = (!frame_pointer_needed
11471 || (crtl->sp_is_unchanging
11472 && !stack_realign_fp));
11473 gcc_assert (!m->fs.sp_valid
11474 || m->fs.sp_offset == frame.stack_pointer_offset);
11475
11476 /* The FP must be valid if the frame pointer is present. */
11477 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
11478 gcc_assert (!m->fs.fp_valid
11479 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
11480
11481 /* We must have *some* valid pointer to the stack frame. */
11482 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
11483
11484 /* The DRAP is never valid at this point. */
11485 gcc_assert (!m->fs.drap_valid);
11486
11487 /* See the comment about red zone and frame
11488 pointer usage in ix86_expand_prologue. */
11489 if (frame_pointer_needed && frame.red_zone_size)
11490 emit_insn (gen_memory_blockage ());
11491
11492 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
11493 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
11494
11495 /* Determine the CFA offset of the end of the red-zone. */
11496 m->fs.red_zone_offset = 0;
11497 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
11498 {
11499 /* The red-zone begins below the return address. */
11500 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
11501
11502 /* When the register save area is in the aligned portion of
11503 the stack, determine the maximum runtime displacement that
11504 matches up with the aligned frame. */
11505 if (stack_realign_drap)
11506 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
11507 + UNITS_PER_WORD);
11508 }
11509
11510 /* Special care must be taken for the normal return case of a function
11511 using eh_return: the eax and edx registers are marked as saved, but
11512 not restored along this path. Adjust the save location to match. */
11513 if (crtl->calls_eh_return && style != 2)
11514 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
11515
11516 /* EH_RETURN requires the use of moves to function properly. */
11517 if (crtl->calls_eh_return)
11518 restore_regs_via_mov = true;
11519 /* SEH requires the use of pops to identify the epilogue. */
11520 else if (TARGET_SEH)
11521 restore_regs_via_mov = false;
11522 /* If we're only restoring one register and sp is not valid then
11523 using a move instruction to restore the register since it's
11524 less work than reloading sp and popping the register. */
11525 else if (!m->fs.sp_valid && frame.nregs <= 1)
11526 restore_regs_via_mov = true;
11527 else if (TARGET_EPILOGUE_USING_MOVE
11528 && cfun->machine->use_fast_prologue_epilogue
11529 && (frame.nregs > 1
11530 || m->fs.sp_offset != frame.reg_save_offset))
11531 restore_regs_via_mov = true;
11532 else if (frame_pointer_needed
11533 && !frame.nregs
11534 && m->fs.sp_offset != frame.reg_save_offset)
11535 restore_regs_via_mov = true;
11536 else if (frame_pointer_needed
11537 && TARGET_USE_LEAVE
11538 && cfun->machine->use_fast_prologue_epilogue
11539 && frame.nregs == 1)
11540 restore_regs_via_mov = true;
11541 else
11542 restore_regs_via_mov = false;
11543
11544 if (restore_regs_via_mov || frame.nsseregs)
11545 {
11546 /* Ensure that the entire register save area is addressable via
11547 the stack pointer, if we will restore via sp. */
11548 if (TARGET_64BIT
11549 && m->fs.sp_offset > 0x7fffffff
11550 && !(m->fs.fp_valid || m->fs.drap_valid)
11551 && (frame.nsseregs + frame.nregs) != 0)
11552 {
11553 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11554 GEN_INT (m->fs.sp_offset
11555 - frame.sse_reg_save_offset),
11556 style,
11557 m->fs.cfa_reg == stack_pointer_rtx);
11558 }
11559 }
11560
11561 /* If there are any SSE registers to restore, then we have to do it
11562 via moves, since there's obviously no pop for SSE regs. */
11563 if (frame.nsseregs)
11564 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
11565 style == 2);
11566
11567 if (restore_regs_via_mov)
11568 {
11569 rtx t;
11570
11571 if (frame.nregs)
11572 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
11573
11574 /* eh_return epilogues need %ecx added to the stack pointer. */
11575 if (style == 2)
11576 {
11577 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
11578
11579 /* Stack align doesn't work with eh_return. */
11580 gcc_assert (!stack_realign_drap);
11581 /* Neither does regparm nested functions. */
11582 gcc_assert (!ix86_static_chain_on_stack);
11583
11584 if (frame_pointer_needed)
11585 {
11586 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
11587 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
11588 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
11589
11590 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
11591 insn = emit_move_insn (hard_frame_pointer_rtx, t);
11592
11593 /* Note that we use SA as a temporary CFA, as the return
11594 address is at the proper place relative to it. We
11595 pretend this happens at the FP restore insn because
11596 prior to this insn the FP would be stored at the wrong
11597 offset relative to SA, and after this insn we have no
11598 other reasonable register to use for the CFA. We don't
11599 bother resetting the CFA to the SP for the duration of
11600 the return insn. */
11601 add_reg_note (insn, REG_CFA_DEF_CFA,
11602 plus_constant (Pmode, sa, UNITS_PER_WORD));
11603 ix86_add_queued_cfa_restore_notes (insn);
11604 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
11605 RTX_FRAME_RELATED_P (insn) = 1;
11606
11607 m->fs.cfa_reg = sa;
11608 m->fs.cfa_offset = UNITS_PER_WORD;
11609 m->fs.fp_valid = false;
11610
11611 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
11612 const0_rtx, style, false);
11613 }
11614 else
11615 {
11616 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
11617 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
11618 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
11619 ix86_add_queued_cfa_restore_notes (insn);
11620
11621 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
11622 if (m->fs.cfa_offset != UNITS_PER_WORD)
11623 {
11624 m->fs.cfa_offset = UNITS_PER_WORD;
11625 add_reg_note (insn, REG_CFA_DEF_CFA,
11626 plus_constant (Pmode, stack_pointer_rtx,
11627 UNITS_PER_WORD));
11628 RTX_FRAME_RELATED_P (insn) = 1;
11629 }
11630 }
11631 m->fs.sp_offset = UNITS_PER_WORD;
11632 m->fs.sp_valid = true;
11633 }
11634 }
11635 else
11636 {
11637 /* SEH requires that the function end with (1) a stack adjustment
11638 if necessary, (2) a sequence of pops, and (3) a return or
11639 jump instruction. Prevent insns from the function body from
11640 being scheduled into this sequence. */
11641 if (TARGET_SEH)
11642 {
11643 /* Prevent a catch region from being adjacent to the standard
11644 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
11645 several other flags that would be interesting to test are
11646 not yet set up. */
11647 if (flag_non_call_exceptions)
11648 emit_insn (gen_nops (const1_rtx));
11649 else
11650 emit_insn (gen_blockage ());
11651 }
11652
11653 /* First step is to deallocate the stack frame so that we can
11654 pop the registers. Also do it on SEH target for very large
11655 frame as the emitted instructions aren't allowed by the ABI in
11656 epilogues. */
11657 if (!m->fs.sp_valid
11658 || (TARGET_SEH
11659 && (m->fs.sp_offset - frame.reg_save_offset
11660 >= SEH_MAX_FRAME_SIZE)))
11661 {
11662 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
11663 GEN_INT (m->fs.fp_offset
11664 - frame.reg_save_offset),
11665 style, false);
11666 }
11667 else if (m->fs.sp_offset != frame.reg_save_offset)
11668 {
11669 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11670 GEN_INT (m->fs.sp_offset
11671 - frame.reg_save_offset),
11672 style,
11673 m->fs.cfa_reg == stack_pointer_rtx);
11674 }
11675
11676 ix86_emit_restore_regs_using_pop ();
11677 }
11678
11679 /* If we used a stack pointer and haven't already got rid of it,
11680 then do so now. */
11681 if (m->fs.fp_valid)
11682 {
11683 /* If the stack pointer is valid and pointing at the frame
11684 pointer store address, then we only need a pop. */
11685 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
11686 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
11687 /* Leave results in shorter dependency chains on CPUs that are
11688 able to grok it fast. */
11689 else if (TARGET_USE_LEAVE
11690 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
11691 || !cfun->machine->use_fast_prologue_epilogue)
11692 ix86_emit_leave ();
11693 else
11694 {
11695 pro_epilogue_adjust_stack (stack_pointer_rtx,
11696 hard_frame_pointer_rtx,
11697 const0_rtx, style, !using_drap);
11698 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
11699 }
11700 }
11701
11702 if (using_drap)
11703 {
11704 int param_ptr_offset = UNITS_PER_WORD;
11705 rtx insn;
11706
11707 gcc_assert (stack_realign_drap);
11708
11709 if (ix86_static_chain_on_stack)
11710 param_ptr_offset += UNITS_PER_WORD;
11711 if (!call_used_regs[REGNO (crtl->drap_reg)])
11712 param_ptr_offset += UNITS_PER_WORD;
11713
11714 insn = emit_insn (gen_rtx_SET
11715 (VOIDmode, stack_pointer_rtx,
11716 gen_rtx_PLUS (Pmode,
11717 crtl->drap_reg,
11718 GEN_INT (-param_ptr_offset))));
11719 m->fs.cfa_reg = stack_pointer_rtx;
11720 m->fs.cfa_offset = param_ptr_offset;
11721 m->fs.sp_offset = param_ptr_offset;
11722 m->fs.realigned = false;
11723
11724 add_reg_note (insn, REG_CFA_DEF_CFA,
11725 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11726 GEN_INT (param_ptr_offset)));
11727 RTX_FRAME_RELATED_P (insn) = 1;
11728
11729 if (!call_used_regs[REGNO (crtl->drap_reg)])
11730 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
11731 }
11732
11733 /* At this point the stack pointer must be valid, and we must have
11734 restored all of the registers. We may not have deallocated the
11735 entire stack frame. We've delayed this until now because it may
11736 be possible to merge the local stack deallocation with the
11737 deallocation forced by ix86_static_chain_on_stack. */
11738 gcc_assert (m->fs.sp_valid);
11739 gcc_assert (!m->fs.fp_valid);
11740 gcc_assert (!m->fs.realigned);
11741 if (m->fs.sp_offset != UNITS_PER_WORD)
11742 {
11743 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11744 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
11745 style, true);
11746 }
11747 else
11748 ix86_add_queued_cfa_restore_notes (get_last_insn ());
11749
11750 /* Sibcall epilogues don't want a return instruction. */
11751 if (style == 0)
11752 {
11753 m->fs = frame_state_save;
11754 return;
11755 }
11756
11757 if (crtl->args.pops_args && crtl->args.size)
11758 {
11759 rtx popc = GEN_INT (crtl->args.pops_args);
11760
11761 /* i386 can only pop 64K bytes. If asked to pop more, pop return
11762 address, do explicit add, and jump indirectly to the caller. */
11763
11764 if (crtl->args.pops_args >= 65536)
11765 {
11766 rtx ecx = gen_rtx_REG (SImode, CX_REG);
11767 rtx insn;
11768
11769 /* There is no "pascal" calling convention in any 64bit ABI. */
11770 gcc_assert (!TARGET_64BIT);
11771
11772 insn = emit_insn (gen_pop (ecx));
11773 m->fs.cfa_offset -= UNITS_PER_WORD;
11774 m->fs.sp_offset -= UNITS_PER_WORD;
11775
11776 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
11777 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
11778 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
11779 add_reg_note (insn, REG_CFA_REGISTER,
11780 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
11781 RTX_FRAME_RELATED_P (insn) = 1;
11782
11783 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11784 popc, -1, true);
11785 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
11786 }
11787 else
11788 emit_jump_insn (gen_simple_return_pop_internal (popc));
11789 }
11790 else
11791 emit_jump_insn (gen_simple_return_internal ());
11792
11793 /* Restore the state back to the state from the prologue,
11794 so that it's correct for the next epilogue. */
11795 m->fs = frame_state_save;
11796 }
11797
11798 /* Reset from the function's potential modifications. */
11799
11800 static void
11801 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, HOST_WIDE_INT)
11802 {
11803 if (pic_offset_table_rtx)
11804 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
11805 #if TARGET_MACHO
11806 /* Mach-O doesn't support labels at the end of objects, so if
11807 it looks like we might want one, insert a NOP. */
11808 {
11809 rtx_insn *insn = get_last_insn ();
11810 rtx_insn *deleted_debug_label = NULL;
11811 while (insn
11812 && NOTE_P (insn)
11813 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
11814 {
11815 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
11816 notes only, instead set their CODE_LABEL_NUMBER to -1,
11817 otherwise there would be code generation differences
11818 in between -g and -g0. */
11819 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
11820 deleted_debug_label = insn;
11821 insn = PREV_INSN (insn);
11822 }
11823 if (insn
11824 && (LABEL_P (insn)
11825 || (NOTE_P (insn)
11826 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
11827 fputs ("\tnop\n", file);
11828 else if (deleted_debug_label)
11829 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
11830 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
11831 CODE_LABEL_NUMBER (insn) = -1;
11832 }
11833 #endif
11834
11835 }
11836
11837 /* Return a scratch register to use in the split stack prologue. The
11838 split stack prologue is used for -fsplit-stack. It is the first
11839 instructions in the function, even before the regular prologue.
11840 The scratch register can be any caller-saved register which is not
11841 used for parameters or for the static chain. */
11842
11843 static unsigned int
11844 split_stack_prologue_scratch_regno (void)
11845 {
11846 if (TARGET_64BIT)
11847 return R11_REG;
11848 else
11849 {
11850 bool is_fastcall, is_thiscall;
11851 int regparm;
11852
11853 is_fastcall = (lookup_attribute ("fastcall",
11854 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
11855 != NULL);
11856 is_thiscall = (lookup_attribute ("thiscall",
11857 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
11858 != NULL);
11859 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
11860
11861 if (is_fastcall)
11862 {
11863 if (DECL_STATIC_CHAIN (cfun->decl))
11864 {
11865 sorry ("-fsplit-stack does not support fastcall with "
11866 "nested function");
11867 return INVALID_REGNUM;
11868 }
11869 return AX_REG;
11870 }
11871 else if (is_thiscall)
11872 {
11873 if (!DECL_STATIC_CHAIN (cfun->decl))
11874 return DX_REG;
11875 return AX_REG;
11876 }
11877 else if (regparm < 3)
11878 {
11879 if (!DECL_STATIC_CHAIN (cfun->decl))
11880 return CX_REG;
11881 else
11882 {
11883 if (regparm >= 2)
11884 {
11885 sorry ("-fsplit-stack does not support 2 register "
11886 "parameters for a nested function");
11887 return INVALID_REGNUM;
11888 }
11889 return DX_REG;
11890 }
11891 }
11892 else
11893 {
11894 /* FIXME: We could make this work by pushing a register
11895 around the addition and comparison. */
11896 sorry ("-fsplit-stack does not support 3 register parameters");
11897 return INVALID_REGNUM;
11898 }
11899 }
11900 }
11901
11902 /* A SYMBOL_REF for the function which allocates new stackspace for
11903 -fsplit-stack. */
11904
11905 static GTY(()) rtx split_stack_fn;
11906
11907 /* A SYMBOL_REF for the more stack function when using the large
11908 model. */
11909
11910 static GTY(()) rtx split_stack_fn_large;
11911
11912 /* Handle -fsplit-stack. These are the first instructions in the
11913 function, even before the regular prologue. */
11914
11915 void
11916 ix86_expand_split_stack_prologue (void)
11917 {
11918 struct ix86_frame frame;
11919 HOST_WIDE_INT allocate;
11920 unsigned HOST_WIDE_INT args_size;
11921 rtx_code_label *label;
11922 rtx limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
11923 rtx scratch_reg = NULL_RTX;
11924 rtx_code_label *varargs_label = NULL;
11925 rtx fn;
11926
11927 gcc_assert (flag_split_stack && reload_completed);
11928
11929 ix86_finalize_stack_realign_flags ();
11930 ix86_compute_frame_layout (&frame);
11931 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
11932
11933 /* This is the label we will branch to if we have enough stack
11934 space. We expect the basic block reordering pass to reverse this
11935 branch if optimizing, so that we branch in the unlikely case. */
11936 label = gen_label_rtx ();
11937
11938 /* We need to compare the stack pointer minus the frame size with
11939 the stack boundary in the TCB. The stack boundary always gives
11940 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
11941 can compare directly. Otherwise we need to do an addition. */
11942
11943 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
11944 UNSPEC_STACK_CHECK);
11945 limit = gen_rtx_CONST (Pmode, limit);
11946 limit = gen_rtx_MEM (Pmode, limit);
11947 if (allocate < SPLIT_STACK_AVAILABLE)
11948 current = stack_pointer_rtx;
11949 else
11950 {
11951 unsigned int scratch_regno;
11952 rtx offset;
11953
11954 /* We need a scratch register to hold the stack pointer minus
11955 the required frame size. Since this is the very start of the
11956 function, the scratch register can be any caller-saved
11957 register which is not used for parameters. */
11958 offset = GEN_INT (- allocate);
11959 scratch_regno = split_stack_prologue_scratch_regno ();
11960 if (scratch_regno == INVALID_REGNUM)
11961 return;
11962 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11963 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
11964 {
11965 /* We don't use ix86_gen_add3 in this case because it will
11966 want to split to lea, but when not optimizing the insn
11967 will not be split after this point. */
11968 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11969 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11970 offset)));
11971 }
11972 else
11973 {
11974 emit_move_insn (scratch_reg, offset);
11975 emit_insn (ix86_gen_add3 (scratch_reg, scratch_reg,
11976 stack_pointer_rtx));
11977 }
11978 current = scratch_reg;
11979 }
11980
11981 ix86_expand_branch (GEU, current, limit, label);
11982 jump_insn = get_last_insn ();
11983 JUMP_LABEL (jump_insn) = label;
11984
11985 /* Mark the jump as very likely to be taken. */
11986 add_int_reg_note (jump_insn, REG_BR_PROB,
11987 REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100);
11988
11989 if (split_stack_fn == NULL_RTX)
11990 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
11991 fn = split_stack_fn;
11992
11993 /* Get more stack space. We pass in the desired stack space and the
11994 size of the arguments to copy to the new stack. In 32-bit mode
11995 we push the parameters; __morestack will return on a new stack
11996 anyhow. In 64-bit mode we pass the parameters in r10 and
11997 r11. */
11998 allocate_rtx = GEN_INT (allocate);
11999 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
12000 call_fusage = NULL_RTX;
12001 if (TARGET_64BIT)
12002 {
12003 rtx reg10, reg11;
12004
12005 reg10 = gen_rtx_REG (Pmode, R10_REG);
12006 reg11 = gen_rtx_REG (Pmode, R11_REG);
12007
12008 /* If this function uses a static chain, it will be in %r10.
12009 Preserve it across the call to __morestack. */
12010 if (DECL_STATIC_CHAIN (cfun->decl))
12011 {
12012 rtx rax;
12013
12014 rax = gen_rtx_REG (word_mode, AX_REG);
12015 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
12016 use_reg (&call_fusage, rax);
12017 }
12018
12019 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
12020 && !TARGET_PECOFF)
12021 {
12022 HOST_WIDE_INT argval;
12023
12024 gcc_assert (Pmode == DImode);
12025 /* When using the large model we need to load the address
12026 into a register, and we've run out of registers. So we
12027 switch to a different calling convention, and we call a
12028 different function: __morestack_large. We pass the
12029 argument size in the upper 32 bits of r10 and pass the
12030 frame size in the lower 32 bits. */
12031 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
12032 gcc_assert ((args_size & 0xffffffff) == args_size);
12033
12034 if (split_stack_fn_large == NULL_RTX)
12035 split_stack_fn_large =
12036 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
12037
12038 if (ix86_cmodel == CM_LARGE_PIC)
12039 {
12040 rtx_code_label *label;
12041 rtx x;
12042
12043 label = gen_label_rtx ();
12044 emit_label (label);
12045 LABEL_PRESERVE_P (label) = 1;
12046 emit_insn (gen_set_rip_rex64 (reg10, label));
12047 emit_insn (gen_set_got_offset_rex64 (reg11, label));
12048 emit_insn (ix86_gen_add3 (reg10, reg10, reg11));
12049 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
12050 UNSPEC_GOT);
12051 x = gen_rtx_CONST (Pmode, x);
12052 emit_move_insn (reg11, x);
12053 x = gen_rtx_PLUS (Pmode, reg10, reg11);
12054 x = gen_const_mem (Pmode, x);
12055 emit_move_insn (reg11, x);
12056 }
12057 else
12058 emit_move_insn (reg11, split_stack_fn_large);
12059
12060 fn = reg11;
12061
12062 argval = ((args_size << 16) << 16) + allocate;
12063 emit_move_insn (reg10, GEN_INT (argval));
12064 }
12065 else
12066 {
12067 emit_move_insn (reg10, allocate_rtx);
12068 emit_move_insn (reg11, GEN_INT (args_size));
12069 use_reg (&call_fusage, reg11);
12070 }
12071
12072 use_reg (&call_fusage, reg10);
12073 }
12074 else
12075 {
12076 emit_insn (gen_push (GEN_INT (args_size)));
12077 emit_insn (gen_push (allocate_rtx));
12078 }
12079 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
12080 GEN_INT (UNITS_PER_WORD), constm1_rtx,
12081 NULL_RTX, false);
12082 add_function_usage_to (call_insn, call_fusage);
12083
12084 /* In order to make call/return prediction work right, we now need
12085 to execute a return instruction. See
12086 libgcc/config/i386/morestack.S for the details on how this works.
12087
12088 For flow purposes gcc must not see this as a return
12089 instruction--we need control flow to continue at the subsequent
12090 label. Therefore, we use an unspec. */
12091 gcc_assert (crtl->args.pops_args < 65536);
12092 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
12093
12094 /* If we are in 64-bit mode and this function uses a static chain,
12095 we saved %r10 in %rax before calling _morestack. */
12096 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
12097 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
12098 gen_rtx_REG (word_mode, AX_REG));
12099
12100 /* If this function calls va_start, we need to store a pointer to
12101 the arguments on the old stack, because they may not have been
12102 all copied to the new stack. At this point the old stack can be
12103 found at the frame pointer value used by __morestack, because
12104 __morestack has set that up before calling back to us. Here we
12105 store that pointer in a scratch register, and in
12106 ix86_expand_prologue we store the scratch register in a stack
12107 slot. */
12108 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12109 {
12110 unsigned int scratch_regno;
12111 rtx frame_reg;
12112 int words;
12113
12114 scratch_regno = split_stack_prologue_scratch_regno ();
12115 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12116 frame_reg = gen_rtx_REG (Pmode, BP_REG);
12117
12118 /* 64-bit:
12119 fp -> old fp value
12120 return address within this function
12121 return address of caller of this function
12122 stack arguments
12123 So we add three words to get to the stack arguments.
12124
12125 32-bit:
12126 fp -> old fp value
12127 return address within this function
12128 first argument to __morestack
12129 second argument to __morestack
12130 return address of caller of this function
12131 stack arguments
12132 So we add five words to get to the stack arguments.
12133 */
12134 words = TARGET_64BIT ? 3 : 5;
12135 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12136 gen_rtx_PLUS (Pmode, frame_reg,
12137 GEN_INT (words * UNITS_PER_WORD))));
12138
12139 varargs_label = gen_label_rtx ();
12140 emit_jump_insn (gen_jump (varargs_label));
12141 JUMP_LABEL (get_last_insn ()) = varargs_label;
12142
12143 emit_barrier ();
12144 }
12145
12146 emit_label (label);
12147 LABEL_NUSES (label) = 1;
12148
12149 /* If this function calls va_start, we now have to set the scratch
12150 register for the case where we do not call __morestack. In this
12151 case we need to set it based on the stack pointer. */
12152 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12153 {
12154 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12155 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12156 GEN_INT (UNITS_PER_WORD))));
12157
12158 emit_label (varargs_label);
12159 LABEL_NUSES (varargs_label) = 1;
12160 }
12161 }
12162
12163 /* We may have to tell the dataflow pass that the split stack prologue
12164 is initializing a scratch register. */
12165
12166 static void
12167 ix86_live_on_entry (bitmap regs)
12168 {
12169 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12170 {
12171 gcc_assert (flag_split_stack);
12172 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
12173 }
12174 }
12175 \f
12176 /* Extract the parts of an RTL expression that is a valid memory address
12177 for an instruction. Return 0 if the structure of the address is
12178 grossly off. Return -1 if the address contains ASHIFT, so it is not
12179 strictly valid, but still used for computing length of lea instruction. */
12180
12181 int
12182 ix86_decompose_address (rtx addr, struct ix86_address *out)
12183 {
12184 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
12185 rtx base_reg, index_reg;
12186 HOST_WIDE_INT scale = 1;
12187 rtx scale_rtx = NULL_RTX;
12188 rtx tmp;
12189 int retval = 1;
12190 enum ix86_address_seg seg = SEG_DEFAULT;
12191
12192 /* Allow zero-extended SImode addresses,
12193 they will be emitted with addr32 prefix. */
12194 if (TARGET_64BIT && GET_MODE (addr) == DImode)
12195 {
12196 if (GET_CODE (addr) == ZERO_EXTEND
12197 && GET_MODE (XEXP (addr, 0)) == SImode)
12198 {
12199 addr = XEXP (addr, 0);
12200 if (CONST_INT_P (addr))
12201 return 0;
12202 }
12203 else if (GET_CODE (addr) == AND
12204 && const_32bit_mask (XEXP (addr, 1), DImode))
12205 {
12206 addr = simplify_gen_subreg (SImode, XEXP (addr, 0), DImode, 0);
12207 if (addr == NULL_RTX)
12208 return 0;
12209
12210 if (CONST_INT_P (addr))
12211 return 0;
12212 }
12213 }
12214
12215 /* Allow SImode subregs of DImode addresses,
12216 they will be emitted with addr32 prefix. */
12217 if (TARGET_64BIT && GET_MODE (addr) == SImode)
12218 {
12219 if (GET_CODE (addr) == SUBREG
12220 && GET_MODE (SUBREG_REG (addr)) == DImode)
12221 {
12222 addr = SUBREG_REG (addr);
12223 if (CONST_INT_P (addr))
12224 return 0;
12225 }
12226 }
12227
12228 if (REG_P (addr))
12229 base = addr;
12230 else if (GET_CODE (addr) == SUBREG)
12231 {
12232 if (REG_P (SUBREG_REG (addr)))
12233 base = addr;
12234 else
12235 return 0;
12236 }
12237 else if (GET_CODE (addr) == PLUS)
12238 {
12239 rtx addends[4], op;
12240 int n = 0, i;
12241
12242 op = addr;
12243 do
12244 {
12245 if (n >= 4)
12246 return 0;
12247 addends[n++] = XEXP (op, 1);
12248 op = XEXP (op, 0);
12249 }
12250 while (GET_CODE (op) == PLUS);
12251 if (n >= 4)
12252 return 0;
12253 addends[n] = op;
12254
12255 for (i = n; i >= 0; --i)
12256 {
12257 op = addends[i];
12258 switch (GET_CODE (op))
12259 {
12260 case MULT:
12261 if (index)
12262 return 0;
12263 index = XEXP (op, 0);
12264 scale_rtx = XEXP (op, 1);
12265 break;
12266
12267 case ASHIFT:
12268 if (index)
12269 return 0;
12270 index = XEXP (op, 0);
12271 tmp = XEXP (op, 1);
12272 if (!CONST_INT_P (tmp))
12273 return 0;
12274 scale = INTVAL (tmp);
12275 if ((unsigned HOST_WIDE_INT) scale > 3)
12276 return 0;
12277 scale = 1 << scale;
12278 break;
12279
12280 case ZERO_EXTEND:
12281 op = XEXP (op, 0);
12282 if (GET_CODE (op) != UNSPEC)
12283 return 0;
12284 /* FALLTHRU */
12285
12286 case UNSPEC:
12287 if (XINT (op, 1) == UNSPEC_TP
12288 && TARGET_TLS_DIRECT_SEG_REFS
12289 && seg == SEG_DEFAULT)
12290 seg = DEFAULT_TLS_SEG_REG;
12291 else
12292 return 0;
12293 break;
12294
12295 case SUBREG:
12296 if (!REG_P (SUBREG_REG (op)))
12297 return 0;
12298 /* FALLTHRU */
12299
12300 case REG:
12301 if (!base)
12302 base = op;
12303 else if (!index)
12304 index = op;
12305 else
12306 return 0;
12307 break;
12308
12309 case CONST:
12310 case CONST_INT:
12311 case SYMBOL_REF:
12312 case LABEL_REF:
12313 if (disp)
12314 return 0;
12315 disp = op;
12316 break;
12317
12318 default:
12319 return 0;
12320 }
12321 }
12322 }
12323 else if (GET_CODE (addr) == MULT)
12324 {
12325 index = XEXP (addr, 0); /* index*scale */
12326 scale_rtx = XEXP (addr, 1);
12327 }
12328 else if (GET_CODE (addr) == ASHIFT)
12329 {
12330 /* We're called for lea too, which implements ashift on occasion. */
12331 index = XEXP (addr, 0);
12332 tmp = XEXP (addr, 1);
12333 if (!CONST_INT_P (tmp))
12334 return 0;
12335 scale = INTVAL (tmp);
12336 if ((unsigned HOST_WIDE_INT) scale > 3)
12337 return 0;
12338 scale = 1 << scale;
12339 retval = -1;
12340 }
12341 else
12342 disp = addr; /* displacement */
12343
12344 if (index)
12345 {
12346 if (REG_P (index))
12347 ;
12348 else if (GET_CODE (index) == SUBREG
12349 && REG_P (SUBREG_REG (index)))
12350 ;
12351 else
12352 return 0;
12353 }
12354
12355 /* Extract the integral value of scale. */
12356 if (scale_rtx)
12357 {
12358 if (!CONST_INT_P (scale_rtx))
12359 return 0;
12360 scale = INTVAL (scale_rtx);
12361 }
12362
12363 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
12364 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
12365
12366 /* Avoid useless 0 displacement. */
12367 if (disp == const0_rtx && (base || index))
12368 disp = NULL_RTX;
12369
12370 /* Allow arg pointer and stack pointer as index if there is not scaling. */
12371 if (base_reg && index_reg && scale == 1
12372 && (index_reg == arg_pointer_rtx
12373 || index_reg == frame_pointer_rtx
12374 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
12375 {
12376 rtx tmp;
12377 tmp = base, base = index, index = tmp;
12378 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
12379 }
12380
12381 /* Special case: %ebp cannot be encoded as a base without a displacement.
12382 Similarly %r13. */
12383 if (!disp
12384 && base_reg
12385 && (base_reg == hard_frame_pointer_rtx
12386 || base_reg == frame_pointer_rtx
12387 || base_reg == arg_pointer_rtx
12388 || (REG_P (base_reg)
12389 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
12390 || REGNO (base_reg) == R13_REG))))
12391 disp = const0_rtx;
12392
12393 /* Special case: on K6, [%esi] makes the instruction vector decoded.
12394 Avoid this by transforming to [%esi+0].
12395 Reload calls address legitimization without cfun defined, so we need
12396 to test cfun for being non-NULL. */
12397 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
12398 && base_reg && !index_reg && !disp
12399 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
12400 disp = const0_rtx;
12401
12402 /* Special case: encode reg+reg instead of reg*2. */
12403 if (!base && index && scale == 2)
12404 base = index, base_reg = index_reg, scale = 1;
12405
12406 /* Special case: scaling cannot be encoded without base or displacement. */
12407 if (!base && !disp && index && scale != 1)
12408 disp = const0_rtx;
12409
12410 out->base = base;
12411 out->index = index;
12412 out->disp = disp;
12413 out->scale = scale;
12414 out->seg = seg;
12415
12416 return retval;
12417 }
12418 \f
12419 /* Return cost of the memory address x.
12420 For i386, it is better to use a complex address than let gcc copy
12421 the address into a reg and make a new pseudo. But not if the address
12422 requires to two regs - that would mean more pseudos with longer
12423 lifetimes. */
12424 static int
12425 ix86_address_cost (rtx x, enum machine_mode, addr_space_t, bool)
12426 {
12427 struct ix86_address parts;
12428 int cost = 1;
12429 int ok = ix86_decompose_address (x, &parts);
12430
12431 gcc_assert (ok);
12432
12433 if (parts.base && GET_CODE (parts.base) == SUBREG)
12434 parts.base = SUBREG_REG (parts.base);
12435 if (parts.index && GET_CODE (parts.index) == SUBREG)
12436 parts.index = SUBREG_REG (parts.index);
12437
12438 /* Attempt to minimize number of registers in the address. */
12439 if ((parts.base
12440 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
12441 || (parts.index
12442 && (!REG_P (parts.index)
12443 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
12444 cost++;
12445
12446 if (parts.base
12447 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
12448 && parts.index
12449 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
12450 && parts.base != parts.index)
12451 cost++;
12452
12453 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
12454 since it's predecode logic can't detect the length of instructions
12455 and it degenerates to vector decoded. Increase cost of such
12456 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
12457 to split such addresses or even refuse such addresses at all.
12458
12459 Following addressing modes are affected:
12460 [base+scale*index]
12461 [scale*index+disp]
12462 [base+index]
12463
12464 The first and last case may be avoidable by explicitly coding the zero in
12465 memory address, but I don't have AMD-K6 machine handy to check this
12466 theory. */
12467
12468 if (TARGET_K6
12469 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
12470 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
12471 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
12472 cost += 10;
12473
12474 return cost;
12475 }
12476 \f
12477 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
12478 this is used for to form addresses to local data when -fPIC is in
12479 use. */
12480
12481 static bool
12482 darwin_local_data_pic (rtx disp)
12483 {
12484 return (GET_CODE (disp) == UNSPEC
12485 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
12486 }
12487
12488 /* Determine if a given RTX is a valid constant. We already know this
12489 satisfies CONSTANT_P. */
12490
12491 static bool
12492 ix86_legitimate_constant_p (enum machine_mode, rtx x)
12493 {
12494 switch (GET_CODE (x))
12495 {
12496 case CONST:
12497 x = XEXP (x, 0);
12498
12499 if (GET_CODE (x) == PLUS)
12500 {
12501 if (!CONST_INT_P (XEXP (x, 1)))
12502 return false;
12503 x = XEXP (x, 0);
12504 }
12505
12506 if (TARGET_MACHO && darwin_local_data_pic (x))
12507 return true;
12508
12509 /* Only some unspecs are valid as "constants". */
12510 if (GET_CODE (x) == UNSPEC)
12511 switch (XINT (x, 1))
12512 {
12513 case UNSPEC_GOT:
12514 case UNSPEC_GOTOFF:
12515 case UNSPEC_PLTOFF:
12516 return TARGET_64BIT;
12517 case UNSPEC_TPOFF:
12518 case UNSPEC_NTPOFF:
12519 x = XVECEXP (x, 0, 0);
12520 return (GET_CODE (x) == SYMBOL_REF
12521 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
12522 case UNSPEC_DTPOFF:
12523 x = XVECEXP (x, 0, 0);
12524 return (GET_CODE (x) == SYMBOL_REF
12525 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
12526 default:
12527 return false;
12528 }
12529
12530 /* We must have drilled down to a symbol. */
12531 if (GET_CODE (x) == LABEL_REF)
12532 return true;
12533 if (GET_CODE (x) != SYMBOL_REF)
12534 return false;
12535 /* FALLTHRU */
12536
12537 case SYMBOL_REF:
12538 /* TLS symbols are never valid. */
12539 if (SYMBOL_REF_TLS_MODEL (x))
12540 return false;
12541
12542 /* DLLIMPORT symbols are never valid. */
12543 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12544 && SYMBOL_REF_DLLIMPORT_P (x))
12545 return false;
12546
12547 #if TARGET_MACHO
12548 /* mdynamic-no-pic */
12549 if (MACHO_DYNAMIC_NO_PIC_P)
12550 return machopic_symbol_defined_p (x);
12551 #endif
12552 break;
12553
12554 case CONST_DOUBLE:
12555 if (GET_MODE (x) == TImode
12556 && x != CONST0_RTX (TImode)
12557 && !TARGET_64BIT)
12558 return false;
12559 break;
12560
12561 case CONST_VECTOR:
12562 if (!standard_sse_constant_p (x))
12563 return false;
12564
12565 default:
12566 break;
12567 }
12568
12569 /* Otherwise we handle everything else in the move patterns. */
12570 return true;
12571 }
12572
12573 /* Determine if it's legal to put X into the constant pool. This
12574 is not possible for the address of thread-local symbols, which
12575 is checked above. */
12576
12577 static bool
12578 ix86_cannot_force_const_mem (enum machine_mode mode, rtx x)
12579 {
12580 /* We can always put integral constants and vectors in memory. */
12581 switch (GET_CODE (x))
12582 {
12583 case CONST_INT:
12584 case CONST_DOUBLE:
12585 case CONST_VECTOR:
12586 return false;
12587
12588 default:
12589 break;
12590 }
12591 return !ix86_legitimate_constant_p (mode, x);
12592 }
12593
12594 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
12595 otherwise zero. */
12596
12597 static bool
12598 is_imported_p (rtx x)
12599 {
12600 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
12601 || GET_CODE (x) != SYMBOL_REF)
12602 return false;
12603
12604 return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
12605 }
12606
12607
12608 /* Nonzero if the constant value X is a legitimate general operand
12609 when generating PIC code. It is given that flag_pic is on and
12610 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
12611
12612 bool
12613 legitimate_pic_operand_p (rtx x)
12614 {
12615 rtx inner;
12616
12617 switch (GET_CODE (x))
12618 {
12619 case CONST:
12620 inner = XEXP (x, 0);
12621 if (GET_CODE (inner) == PLUS
12622 && CONST_INT_P (XEXP (inner, 1)))
12623 inner = XEXP (inner, 0);
12624
12625 /* Only some unspecs are valid as "constants". */
12626 if (GET_CODE (inner) == UNSPEC)
12627 switch (XINT (inner, 1))
12628 {
12629 case UNSPEC_GOT:
12630 case UNSPEC_GOTOFF:
12631 case UNSPEC_PLTOFF:
12632 return TARGET_64BIT;
12633 case UNSPEC_TPOFF:
12634 x = XVECEXP (inner, 0, 0);
12635 return (GET_CODE (x) == SYMBOL_REF
12636 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
12637 case UNSPEC_MACHOPIC_OFFSET:
12638 return legitimate_pic_address_disp_p (x);
12639 default:
12640 return false;
12641 }
12642 /* FALLTHRU */
12643
12644 case SYMBOL_REF:
12645 case LABEL_REF:
12646 return legitimate_pic_address_disp_p (x);
12647
12648 default:
12649 return true;
12650 }
12651 }
12652
12653 /* Determine if a given CONST RTX is a valid memory displacement
12654 in PIC mode. */
12655
12656 bool
12657 legitimate_pic_address_disp_p (rtx disp)
12658 {
12659 bool saw_plus;
12660
12661 /* In 64bit mode we can allow direct addresses of symbols and labels
12662 when they are not dynamic symbols. */
12663 if (TARGET_64BIT)
12664 {
12665 rtx op0 = disp, op1;
12666
12667 switch (GET_CODE (disp))
12668 {
12669 case LABEL_REF:
12670 return true;
12671
12672 case CONST:
12673 if (GET_CODE (XEXP (disp, 0)) != PLUS)
12674 break;
12675 op0 = XEXP (XEXP (disp, 0), 0);
12676 op1 = XEXP (XEXP (disp, 0), 1);
12677 if (!CONST_INT_P (op1)
12678 || INTVAL (op1) >= 16*1024*1024
12679 || INTVAL (op1) < -16*1024*1024)
12680 break;
12681 if (GET_CODE (op0) == LABEL_REF)
12682 return true;
12683 if (GET_CODE (op0) == CONST
12684 && GET_CODE (XEXP (op0, 0)) == UNSPEC
12685 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
12686 return true;
12687 if (GET_CODE (op0) == UNSPEC
12688 && XINT (op0, 1) == UNSPEC_PCREL)
12689 return true;
12690 if (GET_CODE (op0) != SYMBOL_REF)
12691 break;
12692 /* FALLTHRU */
12693
12694 case SYMBOL_REF:
12695 /* TLS references should always be enclosed in UNSPEC.
12696 The dllimported symbol needs always to be resolved. */
12697 if (SYMBOL_REF_TLS_MODEL (op0)
12698 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
12699 return false;
12700
12701 if (TARGET_PECOFF)
12702 {
12703 if (is_imported_p (op0))
12704 return true;
12705
12706 if (SYMBOL_REF_FAR_ADDR_P (op0)
12707 || !SYMBOL_REF_LOCAL_P (op0))
12708 break;
12709
12710 /* Function-symbols need to be resolved only for
12711 large-model.
12712 For the small-model we don't need to resolve anything
12713 here. */
12714 if ((ix86_cmodel != CM_LARGE_PIC
12715 && SYMBOL_REF_FUNCTION_P (op0))
12716 || ix86_cmodel == CM_SMALL_PIC)
12717 return true;
12718 /* Non-external symbols don't need to be resolved for
12719 large, and medium-model. */
12720 if ((ix86_cmodel == CM_LARGE_PIC
12721 || ix86_cmodel == CM_MEDIUM_PIC)
12722 && !SYMBOL_REF_EXTERNAL_P (op0))
12723 return true;
12724 }
12725 else if (!SYMBOL_REF_FAR_ADDR_P (op0)
12726 && SYMBOL_REF_LOCAL_P (op0)
12727 && ix86_cmodel != CM_LARGE_PIC)
12728 return true;
12729 break;
12730
12731 default:
12732 break;
12733 }
12734 }
12735 if (GET_CODE (disp) != CONST)
12736 return false;
12737 disp = XEXP (disp, 0);
12738
12739 if (TARGET_64BIT)
12740 {
12741 /* We are unsafe to allow PLUS expressions. This limit allowed distance
12742 of GOT tables. We should not need these anyway. */
12743 if (GET_CODE (disp) != UNSPEC
12744 || (XINT (disp, 1) != UNSPEC_GOTPCREL
12745 && XINT (disp, 1) != UNSPEC_GOTOFF
12746 && XINT (disp, 1) != UNSPEC_PCREL
12747 && XINT (disp, 1) != UNSPEC_PLTOFF))
12748 return false;
12749
12750 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
12751 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
12752 return false;
12753 return true;
12754 }
12755
12756 saw_plus = false;
12757 if (GET_CODE (disp) == PLUS)
12758 {
12759 if (!CONST_INT_P (XEXP (disp, 1)))
12760 return false;
12761 disp = XEXP (disp, 0);
12762 saw_plus = true;
12763 }
12764
12765 if (TARGET_MACHO && darwin_local_data_pic (disp))
12766 return true;
12767
12768 if (GET_CODE (disp) != UNSPEC)
12769 return false;
12770
12771 switch (XINT (disp, 1))
12772 {
12773 case UNSPEC_GOT:
12774 if (saw_plus)
12775 return false;
12776 /* We need to check for both symbols and labels because VxWorks loads
12777 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
12778 details. */
12779 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
12780 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
12781 case UNSPEC_GOTOFF:
12782 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
12783 While ABI specify also 32bit relocation but we don't produce it in
12784 small PIC model at all. */
12785 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
12786 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
12787 && !TARGET_64BIT)
12788 return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
12789 return false;
12790 case UNSPEC_GOTTPOFF:
12791 case UNSPEC_GOTNTPOFF:
12792 case UNSPEC_INDNTPOFF:
12793 if (saw_plus)
12794 return false;
12795 disp = XVECEXP (disp, 0, 0);
12796 return (GET_CODE (disp) == SYMBOL_REF
12797 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
12798 case UNSPEC_NTPOFF:
12799 disp = XVECEXP (disp, 0, 0);
12800 return (GET_CODE (disp) == SYMBOL_REF
12801 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
12802 case UNSPEC_DTPOFF:
12803 disp = XVECEXP (disp, 0, 0);
12804 return (GET_CODE (disp) == SYMBOL_REF
12805 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
12806 }
12807
12808 return false;
12809 }
12810
12811 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
12812 replace the input X, or the original X if no replacement is called for.
12813 The output parameter *WIN is 1 if the calling macro should goto WIN,
12814 0 if it should not. */
12815
12816 bool
12817 ix86_legitimize_reload_address (rtx x, enum machine_mode, int opnum, int type,
12818 int)
12819 {
12820 /* Reload can generate:
12821
12822 (plus:DI (plus:DI (unspec:DI [(const_int 0 [0])] UNSPEC_TP)
12823 (reg:DI 97))
12824 (reg:DI 2 cx))
12825
12826 This RTX is rejected from ix86_legitimate_address_p due to
12827 non-strictness of base register 97. Following this rejection,
12828 reload pushes all three components into separate registers,
12829 creating invalid memory address RTX.
12830
12831 Following code reloads only the invalid part of the
12832 memory address RTX. */
12833
12834 if (GET_CODE (x) == PLUS
12835 && REG_P (XEXP (x, 1))
12836 && GET_CODE (XEXP (x, 0)) == PLUS
12837 && REG_P (XEXP (XEXP (x, 0), 1)))
12838 {
12839 rtx base, index;
12840 bool something_reloaded = false;
12841
12842 base = XEXP (XEXP (x, 0), 1);
12843 if (!REG_OK_FOR_BASE_STRICT_P (base))
12844 {
12845 push_reload (base, NULL_RTX, &XEXP (XEXP (x, 0), 1), NULL,
12846 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
12847 opnum, (enum reload_type) type);
12848 something_reloaded = true;
12849 }
12850
12851 index = XEXP (x, 1);
12852 if (!REG_OK_FOR_INDEX_STRICT_P (index))
12853 {
12854 push_reload (index, NULL_RTX, &XEXP (x, 1), NULL,
12855 INDEX_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
12856 opnum, (enum reload_type) type);
12857 something_reloaded = true;
12858 }
12859
12860 gcc_assert (something_reloaded);
12861 return true;
12862 }
12863
12864 return false;
12865 }
12866
12867 /* Determine if op is suitable RTX for an address register.
12868 Return naked register if a register or a register subreg is
12869 found, otherwise return NULL_RTX. */
12870
12871 static rtx
12872 ix86_validate_address_register (rtx op)
12873 {
12874 enum machine_mode mode = GET_MODE (op);
12875
12876 /* Only SImode or DImode registers can form the address. */
12877 if (mode != SImode && mode != DImode)
12878 return NULL_RTX;
12879
12880 if (REG_P (op))
12881 return op;
12882 else if (GET_CODE (op) == SUBREG)
12883 {
12884 rtx reg = SUBREG_REG (op);
12885
12886 if (!REG_P (reg))
12887 return NULL_RTX;
12888
12889 mode = GET_MODE (reg);
12890
12891 /* Don't allow SUBREGs that span more than a word. It can
12892 lead to spill failures when the register is one word out
12893 of a two word structure. */
12894 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
12895 return NULL_RTX;
12896
12897 /* Allow only SUBREGs of non-eliminable hard registers. */
12898 if (register_no_elim_operand (reg, mode))
12899 return reg;
12900 }
12901
12902 /* Op is not a register. */
12903 return NULL_RTX;
12904 }
12905
12906 /* Recognizes RTL expressions that are valid memory addresses for an
12907 instruction. The MODE argument is the machine mode for the MEM
12908 expression that wants to use this address.
12909
12910 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
12911 convert common non-canonical forms to canonical form so that they will
12912 be recognized. */
12913
12914 static bool
12915 ix86_legitimate_address_p (enum machine_mode, rtx addr, bool strict)
12916 {
12917 struct ix86_address parts;
12918 rtx base, index, disp;
12919 HOST_WIDE_INT scale;
12920 enum ix86_address_seg seg;
12921
12922 if (ix86_decompose_address (addr, &parts) <= 0)
12923 /* Decomposition failed. */
12924 return false;
12925
12926 base = parts.base;
12927 index = parts.index;
12928 disp = parts.disp;
12929 scale = parts.scale;
12930 seg = parts.seg;
12931
12932 /* Validate base register. */
12933 if (base)
12934 {
12935 rtx reg = ix86_validate_address_register (base);
12936
12937 if (reg == NULL_RTX)
12938 return false;
12939
12940 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
12941 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
12942 /* Base is not valid. */
12943 return false;
12944 }
12945
12946 /* Validate index register. */
12947 if (index)
12948 {
12949 rtx reg = ix86_validate_address_register (index);
12950
12951 if (reg == NULL_RTX)
12952 return false;
12953
12954 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
12955 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
12956 /* Index is not valid. */
12957 return false;
12958 }
12959
12960 /* Index and base should have the same mode. */
12961 if (base && index
12962 && GET_MODE (base) != GET_MODE (index))
12963 return false;
12964
12965 /* Address override works only on the (%reg) part of %fs:(%reg). */
12966 if (seg != SEG_DEFAULT
12967 && ((base && GET_MODE (base) != word_mode)
12968 || (index && GET_MODE (index) != word_mode)))
12969 return false;
12970
12971 /* Validate scale factor. */
12972 if (scale != 1)
12973 {
12974 if (!index)
12975 /* Scale without index. */
12976 return false;
12977
12978 if (scale != 2 && scale != 4 && scale != 8)
12979 /* Scale is not a valid multiplier. */
12980 return false;
12981 }
12982
12983 /* Validate displacement. */
12984 if (disp)
12985 {
12986 if (GET_CODE (disp) == CONST
12987 && GET_CODE (XEXP (disp, 0)) == UNSPEC
12988 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
12989 switch (XINT (XEXP (disp, 0), 1))
12990 {
12991 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
12992 used. While ABI specify also 32bit relocations, we don't produce
12993 them at all and use IP relative instead. */
12994 case UNSPEC_GOT:
12995 case UNSPEC_GOTOFF:
12996 gcc_assert (flag_pic);
12997 if (!TARGET_64BIT)
12998 goto is_legitimate_pic;
12999
13000 /* 64bit address unspec. */
13001 return false;
13002
13003 case UNSPEC_GOTPCREL:
13004 case UNSPEC_PCREL:
13005 gcc_assert (flag_pic);
13006 goto is_legitimate_pic;
13007
13008 case UNSPEC_GOTTPOFF:
13009 case UNSPEC_GOTNTPOFF:
13010 case UNSPEC_INDNTPOFF:
13011 case UNSPEC_NTPOFF:
13012 case UNSPEC_DTPOFF:
13013 break;
13014
13015 case UNSPEC_STACK_CHECK:
13016 gcc_assert (flag_split_stack);
13017 break;
13018
13019 default:
13020 /* Invalid address unspec. */
13021 return false;
13022 }
13023
13024 else if (SYMBOLIC_CONST (disp)
13025 && (flag_pic
13026 || (TARGET_MACHO
13027 #if TARGET_MACHO
13028 && MACHOPIC_INDIRECT
13029 && !machopic_operand_p (disp)
13030 #endif
13031 )))
13032 {
13033
13034 is_legitimate_pic:
13035 if (TARGET_64BIT && (index || base))
13036 {
13037 /* foo@dtpoff(%rX) is ok. */
13038 if (GET_CODE (disp) != CONST
13039 || GET_CODE (XEXP (disp, 0)) != PLUS
13040 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
13041 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
13042 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
13043 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
13044 /* Non-constant pic memory reference. */
13045 return false;
13046 }
13047 else if ((!TARGET_MACHO || flag_pic)
13048 && ! legitimate_pic_address_disp_p (disp))
13049 /* Displacement is an invalid pic construct. */
13050 return false;
13051 #if TARGET_MACHO
13052 else if (MACHO_DYNAMIC_NO_PIC_P
13053 && !ix86_legitimate_constant_p (Pmode, disp))
13054 /* displacment must be referenced via non_lazy_pointer */
13055 return false;
13056 #endif
13057
13058 /* This code used to verify that a symbolic pic displacement
13059 includes the pic_offset_table_rtx register.
13060
13061 While this is good idea, unfortunately these constructs may
13062 be created by "adds using lea" optimization for incorrect
13063 code like:
13064
13065 int a;
13066 int foo(int i)
13067 {
13068 return *(&a+i);
13069 }
13070
13071 This code is nonsensical, but results in addressing
13072 GOT table with pic_offset_table_rtx base. We can't
13073 just refuse it easily, since it gets matched by
13074 "addsi3" pattern, that later gets split to lea in the
13075 case output register differs from input. While this
13076 can be handled by separate addsi pattern for this case
13077 that never results in lea, this seems to be easier and
13078 correct fix for crash to disable this test. */
13079 }
13080 else if (GET_CODE (disp) != LABEL_REF
13081 && !CONST_INT_P (disp)
13082 && (GET_CODE (disp) != CONST
13083 || !ix86_legitimate_constant_p (Pmode, disp))
13084 && (GET_CODE (disp) != SYMBOL_REF
13085 || !ix86_legitimate_constant_p (Pmode, disp)))
13086 /* Displacement is not constant. */
13087 return false;
13088 else if (TARGET_64BIT
13089 && !x86_64_immediate_operand (disp, VOIDmode))
13090 /* Displacement is out of range. */
13091 return false;
13092 /* In x32 mode, constant addresses are sign extended to 64bit, so
13093 we have to prevent addresses from 0x80000000 to 0xffffffff. */
13094 else if (TARGET_X32 && !(index || base)
13095 && CONST_INT_P (disp)
13096 && val_signbit_known_set_p (SImode, INTVAL (disp)))
13097 return false;
13098 }
13099
13100 /* Everything looks valid. */
13101 return true;
13102 }
13103
13104 /* Determine if a given RTX is a valid constant address. */
13105
13106 bool
13107 constant_address_p (rtx x)
13108 {
13109 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
13110 }
13111 \f
13112 /* Return a unique alias set for the GOT. */
13113
13114 static alias_set_type
13115 ix86_GOT_alias_set (void)
13116 {
13117 static alias_set_type set = -1;
13118 if (set == -1)
13119 set = new_alias_set ();
13120 return set;
13121 }
13122
13123 /* Return a legitimate reference for ORIG (an address) using the
13124 register REG. If REG is 0, a new pseudo is generated.
13125
13126 There are two types of references that must be handled:
13127
13128 1. Global data references must load the address from the GOT, via
13129 the PIC reg. An insn is emitted to do this load, and the reg is
13130 returned.
13131
13132 2. Static data references, constant pool addresses, and code labels
13133 compute the address as an offset from the GOT, whose base is in
13134 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
13135 differentiate them from global data objects. The returned
13136 address is the PIC reg + an unspec constant.
13137
13138 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
13139 reg also appears in the address. */
13140
13141 static rtx
13142 legitimize_pic_address (rtx orig, rtx reg)
13143 {
13144 rtx addr = orig;
13145 rtx new_rtx = orig;
13146
13147 #if TARGET_MACHO
13148 if (TARGET_MACHO && !TARGET_64BIT)
13149 {
13150 if (reg == 0)
13151 reg = gen_reg_rtx (Pmode);
13152 /* Use the generic Mach-O PIC machinery. */
13153 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
13154 }
13155 #endif
13156
13157 if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13158 {
13159 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13160 if (tmp)
13161 return tmp;
13162 }
13163
13164 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
13165 new_rtx = addr;
13166 else if (TARGET_64BIT && !TARGET_PECOFF
13167 && ix86_cmodel != CM_SMALL_PIC && gotoff_operand (addr, Pmode))
13168 {
13169 rtx tmpreg;
13170 /* This symbol may be referenced via a displacement from the PIC
13171 base address (@GOTOFF). */
13172
13173 if (reload_in_progress)
13174 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13175 if (GET_CODE (addr) == CONST)
13176 addr = XEXP (addr, 0);
13177 if (GET_CODE (addr) == PLUS)
13178 {
13179 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13180 UNSPEC_GOTOFF);
13181 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13182 }
13183 else
13184 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13185 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13186 if (!reg)
13187 tmpreg = gen_reg_rtx (Pmode);
13188 else
13189 tmpreg = reg;
13190 emit_move_insn (tmpreg, new_rtx);
13191
13192 if (reg != 0)
13193 {
13194 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
13195 tmpreg, 1, OPTAB_DIRECT);
13196 new_rtx = reg;
13197 }
13198 else
13199 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
13200 }
13201 else if (!TARGET_64BIT && !TARGET_PECOFF && gotoff_operand (addr, Pmode))
13202 {
13203 /* This symbol may be referenced via a displacement from the PIC
13204 base address (@GOTOFF). */
13205
13206 if (reload_in_progress)
13207 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13208 if (GET_CODE (addr) == CONST)
13209 addr = XEXP (addr, 0);
13210 if (GET_CODE (addr) == PLUS)
13211 {
13212 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13213 UNSPEC_GOTOFF);
13214 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13215 }
13216 else
13217 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13218 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13219 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13220
13221 if (reg != 0)
13222 {
13223 emit_move_insn (reg, new_rtx);
13224 new_rtx = reg;
13225 }
13226 }
13227 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
13228 /* We can't use @GOTOFF for text labels on VxWorks;
13229 see gotoff_operand. */
13230 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
13231 {
13232 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13233 if (tmp)
13234 return tmp;
13235
13236 /* For x64 PE-COFF there is no GOT table. So we use address
13237 directly. */
13238 if (TARGET_64BIT && TARGET_PECOFF)
13239 {
13240 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
13241 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13242
13243 if (reg == 0)
13244 reg = gen_reg_rtx (Pmode);
13245 emit_move_insn (reg, new_rtx);
13246 new_rtx = reg;
13247 }
13248 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
13249 {
13250 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
13251 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13252 new_rtx = gen_const_mem (Pmode, new_rtx);
13253 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13254
13255 if (reg == 0)
13256 reg = gen_reg_rtx (Pmode);
13257 /* Use directly gen_movsi, otherwise the address is loaded
13258 into register for CSE. We don't want to CSE this addresses,
13259 instead we CSE addresses from the GOT table, so skip this. */
13260 emit_insn (gen_movsi (reg, new_rtx));
13261 new_rtx = reg;
13262 }
13263 else
13264 {
13265 /* This symbol must be referenced via a load from the
13266 Global Offset Table (@GOT). */
13267
13268 if (reload_in_progress)
13269 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13270 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
13271 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13272 if (TARGET_64BIT)
13273 new_rtx = force_reg (Pmode, new_rtx);
13274 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13275 new_rtx = gen_const_mem (Pmode, new_rtx);
13276 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13277
13278 if (reg == 0)
13279 reg = gen_reg_rtx (Pmode);
13280 emit_move_insn (reg, new_rtx);
13281 new_rtx = reg;
13282 }
13283 }
13284 else
13285 {
13286 if (CONST_INT_P (addr)
13287 && !x86_64_immediate_operand (addr, VOIDmode))
13288 {
13289 if (reg)
13290 {
13291 emit_move_insn (reg, addr);
13292 new_rtx = reg;
13293 }
13294 else
13295 new_rtx = force_reg (Pmode, addr);
13296 }
13297 else if (GET_CODE (addr) == CONST)
13298 {
13299 addr = XEXP (addr, 0);
13300
13301 /* We must match stuff we generate before. Assume the only
13302 unspecs that can get here are ours. Not that we could do
13303 anything with them anyway.... */
13304 if (GET_CODE (addr) == UNSPEC
13305 || (GET_CODE (addr) == PLUS
13306 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
13307 return orig;
13308 gcc_assert (GET_CODE (addr) == PLUS);
13309 }
13310 if (GET_CODE (addr) == PLUS)
13311 {
13312 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
13313
13314 /* Check first to see if this is a constant offset from a @GOTOFF
13315 symbol reference. */
13316 if (!TARGET_PECOFF && gotoff_operand (op0, Pmode)
13317 && CONST_INT_P (op1))
13318 {
13319 if (!TARGET_64BIT)
13320 {
13321 if (reload_in_progress)
13322 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13323 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
13324 UNSPEC_GOTOFF);
13325 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
13326 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13327 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13328
13329 if (reg != 0)
13330 {
13331 emit_move_insn (reg, new_rtx);
13332 new_rtx = reg;
13333 }
13334 }
13335 else
13336 {
13337 if (INTVAL (op1) < -16*1024*1024
13338 || INTVAL (op1) >= 16*1024*1024)
13339 {
13340 if (!x86_64_immediate_operand (op1, Pmode))
13341 op1 = force_reg (Pmode, op1);
13342 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
13343 }
13344 }
13345 }
13346 else
13347 {
13348 rtx base = legitimize_pic_address (op0, reg);
13349 enum machine_mode mode = GET_MODE (base);
13350 new_rtx
13351 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
13352
13353 if (CONST_INT_P (new_rtx))
13354 {
13355 if (INTVAL (new_rtx) < -16*1024*1024
13356 || INTVAL (new_rtx) >= 16*1024*1024)
13357 {
13358 if (!x86_64_immediate_operand (new_rtx, mode))
13359 new_rtx = force_reg (mode, new_rtx);
13360 new_rtx
13361 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
13362 }
13363 else
13364 new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
13365 }
13366 else
13367 {
13368 if (GET_CODE (new_rtx) == PLUS
13369 && CONSTANT_P (XEXP (new_rtx, 1)))
13370 {
13371 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
13372 new_rtx = XEXP (new_rtx, 1);
13373 }
13374 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
13375 }
13376 }
13377 }
13378 }
13379 return new_rtx;
13380 }
13381 \f
13382 /* Load the thread pointer. If TO_REG is true, force it into a register. */
13383
13384 static rtx
13385 get_thread_pointer (enum machine_mode tp_mode, bool to_reg)
13386 {
13387 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
13388
13389 if (GET_MODE (tp) != tp_mode)
13390 {
13391 gcc_assert (GET_MODE (tp) == SImode);
13392 gcc_assert (tp_mode == DImode);
13393
13394 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
13395 }
13396
13397 if (to_reg)
13398 tp = copy_to_mode_reg (tp_mode, tp);
13399
13400 return tp;
13401 }
13402
13403 /* Construct the SYMBOL_REF for the tls_get_addr function. */
13404
13405 static GTY(()) rtx ix86_tls_symbol;
13406
13407 static rtx
13408 ix86_tls_get_addr (void)
13409 {
13410 if (!ix86_tls_symbol)
13411 {
13412 const char *sym
13413 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
13414 ? "___tls_get_addr" : "__tls_get_addr");
13415
13416 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
13417 }
13418
13419 if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
13420 {
13421 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
13422 UNSPEC_PLTOFF);
13423 return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
13424 gen_rtx_CONST (Pmode, unspec));
13425 }
13426
13427 return ix86_tls_symbol;
13428 }
13429
13430 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13431
13432 static GTY(()) rtx ix86_tls_module_base_symbol;
13433
13434 rtx
13435 ix86_tls_module_base (void)
13436 {
13437 if (!ix86_tls_module_base_symbol)
13438 {
13439 ix86_tls_module_base_symbol
13440 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
13441
13442 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
13443 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
13444 }
13445
13446 return ix86_tls_module_base_symbol;
13447 }
13448
13449 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
13450 false if we expect this to be used for a memory address and true if
13451 we expect to load the address into a register. */
13452
13453 static rtx
13454 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
13455 {
13456 rtx dest, base, off;
13457 rtx pic = NULL_RTX, tp = NULL_RTX;
13458 enum machine_mode tp_mode = Pmode;
13459 int type;
13460
13461 /* Fall back to global dynamic model if tool chain cannot support local
13462 dynamic. */
13463 if (TARGET_SUN_TLS && !TARGET_64BIT
13464 && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
13465 && model == TLS_MODEL_LOCAL_DYNAMIC)
13466 model = TLS_MODEL_GLOBAL_DYNAMIC;
13467
13468 switch (model)
13469 {
13470 case TLS_MODEL_GLOBAL_DYNAMIC:
13471 dest = gen_reg_rtx (Pmode);
13472
13473 if (!TARGET_64BIT)
13474 {
13475 if (flag_pic && !TARGET_PECOFF)
13476 pic = pic_offset_table_rtx;
13477 else
13478 {
13479 pic = gen_reg_rtx (Pmode);
13480 emit_insn (gen_set_got (pic));
13481 }
13482 }
13483
13484 if (TARGET_GNU2_TLS)
13485 {
13486 if (TARGET_64BIT)
13487 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
13488 else
13489 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
13490
13491 tp = get_thread_pointer (Pmode, true);
13492 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
13493
13494 if (GET_MODE (x) != Pmode)
13495 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13496
13497 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
13498 }
13499 else
13500 {
13501 rtx caddr = ix86_tls_get_addr ();
13502
13503 if (TARGET_64BIT)
13504 {
13505 rtx rax = gen_rtx_REG (Pmode, AX_REG);
13506 rtx_insn *insns;
13507
13508 start_sequence ();
13509 emit_call_insn
13510 (ix86_gen_tls_global_dynamic_64 (rax, x, caddr));
13511 insns = get_insns ();
13512 end_sequence ();
13513
13514 if (GET_MODE (x) != Pmode)
13515 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13516
13517 RTL_CONST_CALL_P (insns) = 1;
13518 emit_libcall_block (insns, dest, rax, x);
13519 }
13520 else
13521 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
13522 }
13523 break;
13524
13525 case TLS_MODEL_LOCAL_DYNAMIC:
13526 base = gen_reg_rtx (Pmode);
13527
13528 if (!TARGET_64BIT)
13529 {
13530 if (flag_pic)
13531 pic = pic_offset_table_rtx;
13532 else
13533 {
13534 pic = gen_reg_rtx (Pmode);
13535 emit_insn (gen_set_got (pic));
13536 }
13537 }
13538
13539 if (TARGET_GNU2_TLS)
13540 {
13541 rtx tmp = ix86_tls_module_base ();
13542
13543 if (TARGET_64BIT)
13544 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
13545 else
13546 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
13547
13548 tp = get_thread_pointer (Pmode, true);
13549 set_unique_reg_note (get_last_insn (), REG_EQUAL,
13550 gen_rtx_MINUS (Pmode, tmp, tp));
13551 }
13552 else
13553 {
13554 rtx caddr = ix86_tls_get_addr ();
13555
13556 if (TARGET_64BIT)
13557 {
13558 rtx rax = gen_rtx_REG (Pmode, AX_REG);
13559 rtx_insn *insns;
13560 rtx eqv;
13561
13562 start_sequence ();
13563 emit_call_insn
13564 (ix86_gen_tls_local_dynamic_base_64 (rax, caddr));
13565 insns = get_insns ();
13566 end_sequence ();
13567
13568 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
13569 share the LD_BASE result with other LD model accesses. */
13570 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
13571 UNSPEC_TLS_LD_BASE);
13572
13573 RTL_CONST_CALL_P (insns) = 1;
13574 emit_libcall_block (insns, base, rax, eqv);
13575 }
13576 else
13577 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
13578 }
13579
13580 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
13581 off = gen_rtx_CONST (Pmode, off);
13582
13583 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
13584
13585 if (TARGET_GNU2_TLS)
13586 {
13587 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
13588
13589 if (GET_MODE (x) != Pmode)
13590 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13591
13592 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
13593 }
13594 break;
13595
13596 case TLS_MODEL_INITIAL_EXEC:
13597 if (TARGET_64BIT)
13598 {
13599 if (TARGET_SUN_TLS && !TARGET_X32)
13600 {
13601 /* The Sun linker took the AMD64 TLS spec literally
13602 and can only handle %rax as destination of the
13603 initial executable code sequence. */
13604
13605 dest = gen_reg_rtx (DImode);
13606 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
13607 return dest;
13608 }
13609
13610 /* Generate DImode references to avoid %fs:(%reg32)
13611 problems and linker IE->LE relaxation bug. */
13612 tp_mode = DImode;
13613 pic = NULL;
13614 type = UNSPEC_GOTNTPOFF;
13615 }
13616 else if (flag_pic)
13617 {
13618 if (reload_in_progress)
13619 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13620 pic = pic_offset_table_rtx;
13621 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
13622 }
13623 else if (!TARGET_ANY_GNU_TLS)
13624 {
13625 pic = gen_reg_rtx (Pmode);
13626 emit_insn (gen_set_got (pic));
13627 type = UNSPEC_GOTTPOFF;
13628 }
13629 else
13630 {
13631 pic = NULL;
13632 type = UNSPEC_INDNTPOFF;
13633 }
13634
13635 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
13636 off = gen_rtx_CONST (tp_mode, off);
13637 if (pic)
13638 off = gen_rtx_PLUS (tp_mode, pic, off);
13639 off = gen_const_mem (tp_mode, off);
13640 set_mem_alias_set (off, ix86_GOT_alias_set ());
13641
13642 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13643 {
13644 base = get_thread_pointer (tp_mode,
13645 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
13646 off = force_reg (tp_mode, off);
13647 return gen_rtx_PLUS (tp_mode, base, off);
13648 }
13649 else
13650 {
13651 base = get_thread_pointer (Pmode, true);
13652 dest = gen_reg_rtx (Pmode);
13653 emit_insn (ix86_gen_sub3 (dest, base, off));
13654 }
13655 break;
13656
13657 case TLS_MODEL_LOCAL_EXEC:
13658 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
13659 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13660 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
13661 off = gen_rtx_CONST (Pmode, off);
13662
13663 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13664 {
13665 base = get_thread_pointer (Pmode,
13666 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
13667 return gen_rtx_PLUS (Pmode, base, off);
13668 }
13669 else
13670 {
13671 base = get_thread_pointer (Pmode, true);
13672 dest = gen_reg_rtx (Pmode);
13673 emit_insn (ix86_gen_sub3 (dest, base, off));
13674 }
13675 break;
13676
13677 default:
13678 gcc_unreachable ();
13679 }
13680
13681 return dest;
13682 }
13683
13684 /* Create or return the unique __imp_DECL dllimport symbol corresponding
13685 to symbol DECL if BEIMPORT is true. Otherwise create or return the
13686 unique refptr-DECL symbol corresponding to symbol DECL. */
13687
13688 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
13689 htab_t dllimport_map;
13690
13691 static tree
13692 get_dllimport_decl (tree decl, bool beimport)
13693 {
13694 struct tree_map *h, in;
13695 void **loc;
13696 const char *name;
13697 const char *prefix;
13698 size_t namelen, prefixlen;
13699 char *imp_name;
13700 tree to;
13701 rtx rtl;
13702
13703 if (!dllimport_map)
13704 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
13705
13706 in.hash = htab_hash_pointer (decl);
13707 in.base.from = decl;
13708 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
13709 h = (struct tree_map *) *loc;
13710 if (h)
13711 return h->to;
13712
13713 *loc = h = ggc_alloc<tree_map> ();
13714 h->hash = in.hash;
13715 h->base.from = decl;
13716 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
13717 VAR_DECL, NULL, ptr_type_node);
13718 DECL_ARTIFICIAL (to) = 1;
13719 DECL_IGNORED_P (to) = 1;
13720 DECL_EXTERNAL (to) = 1;
13721 TREE_READONLY (to) = 1;
13722
13723 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
13724 name = targetm.strip_name_encoding (name);
13725 if (beimport)
13726 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
13727 ? "*__imp_" : "*__imp__";
13728 else
13729 prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
13730 namelen = strlen (name);
13731 prefixlen = strlen (prefix);
13732 imp_name = (char *) alloca (namelen + prefixlen + 1);
13733 memcpy (imp_name, prefix, prefixlen);
13734 memcpy (imp_name + prefixlen, name, namelen + 1);
13735
13736 name = ggc_alloc_string (imp_name, namelen + prefixlen);
13737 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
13738 SET_SYMBOL_REF_DECL (rtl, to);
13739 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
13740 if (!beimport)
13741 {
13742 SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
13743 #ifdef SUB_TARGET_RECORD_STUB
13744 SUB_TARGET_RECORD_STUB (name);
13745 #endif
13746 }
13747
13748 rtl = gen_const_mem (Pmode, rtl);
13749 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
13750
13751 SET_DECL_RTL (to, rtl);
13752 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
13753
13754 return to;
13755 }
13756
13757 /* Expand SYMBOL into its corresponding far-addresse symbol.
13758 WANT_REG is true if we require the result be a register. */
13759
13760 static rtx
13761 legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
13762 {
13763 tree imp_decl;
13764 rtx x;
13765
13766 gcc_assert (SYMBOL_REF_DECL (symbol));
13767 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false);
13768
13769 x = DECL_RTL (imp_decl);
13770 if (want_reg)
13771 x = force_reg (Pmode, x);
13772 return x;
13773 }
13774
13775 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
13776 true if we require the result be a register. */
13777
13778 static rtx
13779 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
13780 {
13781 tree imp_decl;
13782 rtx x;
13783
13784 gcc_assert (SYMBOL_REF_DECL (symbol));
13785 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true);
13786
13787 x = DECL_RTL (imp_decl);
13788 if (want_reg)
13789 x = force_reg (Pmode, x);
13790 return x;
13791 }
13792
13793 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
13794 is true if we require the result be a register. */
13795
13796 static rtx
13797 legitimize_pe_coff_symbol (rtx addr, bool inreg)
13798 {
13799 if (!TARGET_PECOFF)
13800 return NULL_RTX;
13801
13802 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13803 {
13804 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
13805 return legitimize_dllimport_symbol (addr, inreg);
13806 if (GET_CODE (addr) == CONST
13807 && GET_CODE (XEXP (addr, 0)) == PLUS
13808 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
13809 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
13810 {
13811 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg);
13812 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
13813 }
13814 }
13815
13816 if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
13817 return NULL_RTX;
13818 if (GET_CODE (addr) == SYMBOL_REF
13819 && !is_imported_p (addr)
13820 && SYMBOL_REF_EXTERNAL_P (addr)
13821 && SYMBOL_REF_DECL (addr))
13822 return legitimize_pe_coff_extern_decl (addr, inreg);
13823
13824 if (GET_CODE (addr) == CONST
13825 && GET_CODE (XEXP (addr, 0)) == PLUS
13826 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
13827 && !is_imported_p (XEXP (XEXP (addr, 0), 0))
13828 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
13829 && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
13830 {
13831 rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg);
13832 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
13833 }
13834 return NULL_RTX;
13835 }
13836
13837 /* Try machine-dependent ways of modifying an illegitimate address
13838 to be legitimate. If we find one, return the new, valid address.
13839 This macro is used in only one place: `memory_address' in explow.c.
13840
13841 OLDX is the address as it was before break_out_memory_refs was called.
13842 In some cases it is useful to look at this to decide what needs to be done.
13843
13844 It is always safe for this macro to do nothing. It exists to recognize
13845 opportunities to optimize the output.
13846
13847 For the 80386, we handle X+REG by loading X into a register R and
13848 using R+REG. R will go in a general reg and indexing will be used.
13849 However, if REG is a broken-out memory address or multiplication,
13850 nothing needs to be done because REG can certainly go in a general reg.
13851
13852 When -fpic is used, special handling is needed for symbolic references.
13853 See comments by legitimize_pic_address in i386.c for details. */
13854
13855 static rtx
13856 ix86_legitimize_address (rtx x, rtx, enum machine_mode mode)
13857 {
13858 int changed = 0;
13859 unsigned log;
13860
13861 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
13862 if (log)
13863 return legitimize_tls_address (x, (enum tls_model) log, false);
13864 if (GET_CODE (x) == CONST
13865 && GET_CODE (XEXP (x, 0)) == PLUS
13866 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
13867 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
13868 {
13869 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
13870 (enum tls_model) log, false);
13871 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
13872 }
13873
13874 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13875 {
13876 rtx tmp = legitimize_pe_coff_symbol (x, true);
13877 if (tmp)
13878 return tmp;
13879 }
13880
13881 if (flag_pic && SYMBOLIC_CONST (x))
13882 return legitimize_pic_address (x, 0);
13883
13884 #if TARGET_MACHO
13885 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
13886 return machopic_indirect_data_reference (x, 0);
13887 #endif
13888
13889 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
13890 if (GET_CODE (x) == ASHIFT
13891 && CONST_INT_P (XEXP (x, 1))
13892 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
13893 {
13894 changed = 1;
13895 log = INTVAL (XEXP (x, 1));
13896 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
13897 GEN_INT (1 << log));
13898 }
13899
13900 if (GET_CODE (x) == PLUS)
13901 {
13902 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
13903
13904 if (GET_CODE (XEXP (x, 0)) == ASHIFT
13905 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
13906 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
13907 {
13908 changed = 1;
13909 log = INTVAL (XEXP (XEXP (x, 0), 1));
13910 XEXP (x, 0) = gen_rtx_MULT (Pmode,
13911 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
13912 GEN_INT (1 << log));
13913 }
13914
13915 if (GET_CODE (XEXP (x, 1)) == ASHIFT
13916 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
13917 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
13918 {
13919 changed = 1;
13920 log = INTVAL (XEXP (XEXP (x, 1), 1));
13921 XEXP (x, 1) = gen_rtx_MULT (Pmode,
13922 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
13923 GEN_INT (1 << log));
13924 }
13925
13926 /* Put multiply first if it isn't already. */
13927 if (GET_CODE (XEXP (x, 1)) == MULT)
13928 {
13929 rtx tmp = XEXP (x, 0);
13930 XEXP (x, 0) = XEXP (x, 1);
13931 XEXP (x, 1) = tmp;
13932 changed = 1;
13933 }
13934
13935 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
13936 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
13937 created by virtual register instantiation, register elimination, and
13938 similar optimizations. */
13939 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
13940 {
13941 changed = 1;
13942 x = gen_rtx_PLUS (Pmode,
13943 gen_rtx_PLUS (Pmode, XEXP (x, 0),
13944 XEXP (XEXP (x, 1), 0)),
13945 XEXP (XEXP (x, 1), 1));
13946 }
13947
13948 /* Canonicalize
13949 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
13950 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
13951 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
13952 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
13953 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
13954 && CONSTANT_P (XEXP (x, 1)))
13955 {
13956 rtx constant;
13957 rtx other = NULL_RTX;
13958
13959 if (CONST_INT_P (XEXP (x, 1)))
13960 {
13961 constant = XEXP (x, 1);
13962 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
13963 }
13964 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
13965 {
13966 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
13967 other = XEXP (x, 1);
13968 }
13969 else
13970 constant = 0;
13971
13972 if (constant)
13973 {
13974 changed = 1;
13975 x = gen_rtx_PLUS (Pmode,
13976 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
13977 XEXP (XEXP (XEXP (x, 0), 1), 0)),
13978 plus_constant (Pmode, other,
13979 INTVAL (constant)));
13980 }
13981 }
13982
13983 if (changed && ix86_legitimate_address_p (mode, x, false))
13984 return x;
13985
13986 if (GET_CODE (XEXP (x, 0)) == MULT)
13987 {
13988 changed = 1;
13989 XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
13990 }
13991
13992 if (GET_CODE (XEXP (x, 1)) == MULT)
13993 {
13994 changed = 1;
13995 XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
13996 }
13997
13998 if (changed
13999 && REG_P (XEXP (x, 1))
14000 && REG_P (XEXP (x, 0)))
14001 return x;
14002
14003 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
14004 {
14005 changed = 1;
14006 x = legitimize_pic_address (x, 0);
14007 }
14008
14009 if (changed && ix86_legitimate_address_p (mode, x, false))
14010 return x;
14011
14012 if (REG_P (XEXP (x, 0)))
14013 {
14014 rtx temp = gen_reg_rtx (Pmode);
14015 rtx val = force_operand (XEXP (x, 1), temp);
14016 if (val != temp)
14017 {
14018 val = convert_to_mode (Pmode, val, 1);
14019 emit_move_insn (temp, val);
14020 }
14021
14022 XEXP (x, 1) = temp;
14023 return x;
14024 }
14025
14026 else if (REG_P (XEXP (x, 1)))
14027 {
14028 rtx temp = gen_reg_rtx (Pmode);
14029 rtx val = force_operand (XEXP (x, 0), temp);
14030 if (val != temp)
14031 {
14032 val = convert_to_mode (Pmode, val, 1);
14033 emit_move_insn (temp, val);
14034 }
14035
14036 XEXP (x, 0) = temp;
14037 return x;
14038 }
14039 }
14040
14041 return x;
14042 }
14043 \f
14044 /* Print an integer constant expression in assembler syntax. Addition
14045 and subtraction are the only arithmetic that may appear in these
14046 expressions. FILE is the stdio stream to write to, X is the rtx, and
14047 CODE is the operand print code from the output string. */
14048
14049 static void
14050 output_pic_addr_const (FILE *file, rtx x, int code)
14051 {
14052 char buf[256];
14053
14054 switch (GET_CODE (x))
14055 {
14056 case PC:
14057 gcc_assert (flag_pic);
14058 putc ('.', file);
14059 break;
14060
14061 case SYMBOL_REF:
14062 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
14063 output_addr_const (file, x);
14064 else
14065 {
14066 const char *name = XSTR (x, 0);
14067
14068 /* Mark the decl as referenced so that cgraph will
14069 output the function. */
14070 if (SYMBOL_REF_DECL (x))
14071 mark_decl_referenced (SYMBOL_REF_DECL (x));
14072
14073 #if TARGET_MACHO
14074 if (MACHOPIC_INDIRECT
14075 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
14076 name = machopic_indirection_name (x, /*stub_p=*/true);
14077 #endif
14078 assemble_name (file, name);
14079 }
14080 if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
14081 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
14082 fputs ("@PLT", file);
14083 break;
14084
14085 case LABEL_REF:
14086 x = XEXP (x, 0);
14087 /* FALLTHRU */
14088 case CODE_LABEL:
14089 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
14090 assemble_name (asm_out_file, buf);
14091 break;
14092
14093 case CONST_INT:
14094 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14095 break;
14096
14097 case CONST:
14098 /* This used to output parentheses around the expression,
14099 but that does not work on the 386 (either ATT or BSD assembler). */
14100 output_pic_addr_const (file, XEXP (x, 0), code);
14101 break;
14102
14103 case CONST_DOUBLE:
14104 if (GET_MODE (x) == VOIDmode)
14105 {
14106 /* We can use %d if the number is <32 bits and positive. */
14107 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
14108 fprintf (file, "0x%lx%08lx",
14109 (unsigned long) CONST_DOUBLE_HIGH (x),
14110 (unsigned long) CONST_DOUBLE_LOW (x));
14111 else
14112 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
14113 }
14114 else
14115 /* We can't handle floating point constants;
14116 TARGET_PRINT_OPERAND must handle them. */
14117 output_operand_lossage ("floating constant misused");
14118 break;
14119
14120 case PLUS:
14121 /* Some assemblers need integer constants to appear first. */
14122 if (CONST_INT_P (XEXP (x, 0)))
14123 {
14124 output_pic_addr_const (file, XEXP (x, 0), code);
14125 putc ('+', file);
14126 output_pic_addr_const (file, XEXP (x, 1), code);
14127 }
14128 else
14129 {
14130 gcc_assert (CONST_INT_P (XEXP (x, 1)));
14131 output_pic_addr_const (file, XEXP (x, 1), code);
14132 putc ('+', file);
14133 output_pic_addr_const (file, XEXP (x, 0), code);
14134 }
14135 break;
14136
14137 case MINUS:
14138 if (!TARGET_MACHO)
14139 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
14140 output_pic_addr_const (file, XEXP (x, 0), code);
14141 putc ('-', file);
14142 output_pic_addr_const (file, XEXP (x, 1), code);
14143 if (!TARGET_MACHO)
14144 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
14145 break;
14146
14147 case UNSPEC:
14148 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
14149 {
14150 bool f = i386_asm_output_addr_const_extra (file, x);
14151 gcc_assert (f);
14152 break;
14153 }
14154
14155 gcc_assert (XVECLEN (x, 0) == 1);
14156 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
14157 switch (XINT (x, 1))
14158 {
14159 case UNSPEC_GOT:
14160 fputs ("@GOT", file);
14161 break;
14162 case UNSPEC_GOTOFF:
14163 fputs ("@GOTOFF", file);
14164 break;
14165 case UNSPEC_PLTOFF:
14166 fputs ("@PLTOFF", file);
14167 break;
14168 case UNSPEC_PCREL:
14169 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14170 "(%rip)" : "[rip]", file);
14171 break;
14172 case UNSPEC_GOTPCREL:
14173 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14174 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
14175 break;
14176 case UNSPEC_GOTTPOFF:
14177 /* FIXME: This might be @TPOFF in Sun ld too. */
14178 fputs ("@gottpoff", file);
14179 break;
14180 case UNSPEC_TPOFF:
14181 fputs ("@tpoff", file);
14182 break;
14183 case UNSPEC_NTPOFF:
14184 if (TARGET_64BIT)
14185 fputs ("@tpoff", file);
14186 else
14187 fputs ("@ntpoff", file);
14188 break;
14189 case UNSPEC_DTPOFF:
14190 fputs ("@dtpoff", file);
14191 break;
14192 case UNSPEC_GOTNTPOFF:
14193 if (TARGET_64BIT)
14194 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14195 "@gottpoff(%rip)": "@gottpoff[rip]", file);
14196 else
14197 fputs ("@gotntpoff", file);
14198 break;
14199 case UNSPEC_INDNTPOFF:
14200 fputs ("@indntpoff", file);
14201 break;
14202 #if TARGET_MACHO
14203 case UNSPEC_MACHOPIC_OFFSET:
14204 putc ('-', file);
14205 machopic_output_function_base_name (file);
14206 break;
14207 #endif
14208 default:
14209 output_operand_lossage ("invalid UNSPEC as operand");
14210 break;
14211 }
14212 break;
14213
14214 default:
14215 output_operand_lossage ("invalid expression as operand");
14216 }
14217 }
14218
14219 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
14220 We need to emit DTP-relative relocations. */
14221
14222 static void ATTRIBUTE_UNUSED
14223 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
14224 {
14225 fputs (ASM_LONG, file);
14226 output_addr_const (file, x);
14227 fputs ("@dtpoff", file);
14228 switch (size)
14229 {
14230 case 4:
14231 break;
14232 case 8:
14233 fputs (", 0", file);
14234 break;
14235 default:
14236 gcc_unreachable ();
14237 }
14238 }
14239
14240 /* Return true if X is a representation of the PIC register. This copes
14241 with calls from ix86_find_base_term, where the register might have
14242 been replaced by a cselib value. */
14243
14244 static bool
14245 ix86_pic_register_p (rtx x)
14246 {
14247 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
14248 return (pic_offset_table_rtx
14249 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
14250 else
14251 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
14252 }
14253
14254 /* Helper function for ix86_delegitimize_address.
14255 Attempt to delegitimize TLS local-exec accesses. */
14256
14257 static rtx
14258 ix86_delegitimize_tls_address (rtx orig_x)
14259 {
14260 rtx x = orig_x, unspec;
14261 struct ix86_address addr;
14262
14263 if (!TARGET_TLS_DIRECT_SEG_REFS)
14264 return orig_x;
14265 if (MEM_P (x))
14266 x = XEXP (x, 0);
14267 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
14268 return orig_x;
14269 if (ix86_decompose_address (x, &addr) == 0
14270 || addr.seg != DEFAULT_TLS_SEG_REG
14271 || addr.disp == NULL_RTX
14272 || GET_CODE (addr.disp) != CONST)
14273 return orig_x;
14274 unspec = XEXP (addr.disp, 0);
14275 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
14276 unspec = XEXP (unspec, 0);
14277 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
14278 return orig_x;
14279 x = XVECEXP (unspec, 0, 0);
14280 gcc_assert (GET_CODE (x) == SYMBOL_REF);
14281 if (unspec != XEXP (addr.disp, 0))
14282 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
14283 if (addr.index)
14284 {
14285 rtx idx = addr.index;
14286 if (addr.scale != 1)
14287 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
14288 x = gen_rtx_PLUS (Pmode, idx, x);
14289 }
14290 if (addr.base)
14291 x = gen_rtx_PLUS (Pmode, addr.base, x);
14292 if (MEM_P (orig_x))
14293 x = replace_equiv_address_nv (orig_x, x);
14294 return x;
14295 }
14296
14297 /* In the name of slightly smaller debug output, and to cater to
14298 general assembler lossage, recognize PIC+GOTOFF and turn it back
14299 into a direct symbol reference.
14300
14301 On Darwin, this is necessary to avoid a crash, because Darwin
14302 has a different PIC label for each routine but the DWARF debugging
14303 information is not associated with any particular routine, so it's
14304 necessary to remove references to the PIC label from RTL stored by
14305 the DWARF output code. */
14306
14307 static rtx
14308 ix86_delegitimize_address (rtx x)
14309 {
14310 rtx orig_x = delegitimize_mem_from_attrs (x);
14311 /* addend is NULL or some rtx if x is something+GOTOFF where
14312 something doesn't include the PIC register. */
14313 rtx addend = NULL_RTX;
14314 /* reg_addend is NULL or a multiple of some register. */
14315 rtx reg_addend = NULL_RTX;
14316 /* const_addend is NULL or a const_int. */
14317 rtx const_addend = NULL_RTX;
14318 /* This is the result, or NULL. */
14319 rtx result = NULL_RTX;
14320
14321 x = orig_x;
14322
14323 if (MEM_P (x))
14324 x = XEXP (x, 0);
14325
14326 if (TARGET_64BIT)
14327 {
14328 if (GET_CODE (x) == CONST
14329 && GET_CODE (XEXP (x, 0)) == PLUS
14330 && GET_MODE (XEXP (x, 0)) == Pmode
14331 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
14332 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
14333 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
14334 {
14335 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
14336 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
14337 if (MEM_P (orig_x))
14338 x = replace_equiv_address_nv (orig_x, x);
14339 return x;
14340 }
14341
14342 if (GET_CODE (x) == CONST
14343 && GET_CODE (XEXP (x, 0)) == UNSPEC
14344 && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
14345 || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
14346 && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
14347 {
14348 x = XVECEXP (XEXP (x, 0), 0, 0);
14349 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
14350 {
14351 x = simplify_gen_subreg (GET_MODE (orig_x), x,
14352 GET_MODE (x), 0);
14353 if (x == NULL_RTX)
14354 return orig_x;
14355 }
14356 return x;
14357 }
14358
14359 if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
14360 return ix86_delegitimize_tls_address (orig_x);
14361
14362 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
14363 and -mcmodel=medium -fpic. */
14364 }
14365
14366 if (GET_CODE (x) != PLUS
14367 || GET_CODE (XEXP (x, 1)) != CONST)
14368 return ix86_delegitimize_tls_address (orig_x);
14369
14370 if (ix86_pic_register_p (XEXP (x, 0)))
14371 /* %ebx + GOT/GOTOFF */
14372 ;
14373 else if (GET_CODE (XEXP (x, 0)) == PLUS)
14374 {
14375 /* %ebx + %reg * scale + GOT/GOTOFF */
14376 reg_addend = XEXP (x, 0);
14377 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
14378 reg_addend = XEXP (reg_addend, 1);
14379 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
14380 reg_addend = XEXP (reg_addend, 0);
14381 else
14382 {
14383 reg_addend = NULL_RTX;
14384 addend = XEXP (x, 0);
14385 }
14386 }
14387 else
14388 addend = XEXP (x, 0);
14389
14390 x = XEXP (XEXP (x, 1), 0);
14391 if (GET_CODE (x) == PLUS
14392 && CONST_INT_P (XEXP (x, 1)))
14393 {
14394 const_addend = XEXP (x, 1);
14395 x = XEXP (x, 0);
14396 }
14397
14398 if (GET_CODE (x) == UNSPEC
14399 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
14400 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
14401 || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
14402 && !MEM_P (orig_x) && !addend)))
14403 result = XVECEXP (x, 0, 0);
14404
14405 if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
14406 && !MEM_P (orig_x))
14407 result = XVECEXP (x, 0, 0);
14408
14409 if (! result)
14410 return ix86_delegitimize_tls_address (orig_x);
14411
14412 if (const_addend)
14413 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
14414 if (reg_addend)
14415 result = gen_rtx_PLUS (Pmode, reg_addend, result);
14416 if (addend)
14417 {
14418 /* If the rest of original X doesn't involve the PIC register, add
14419 addend and subtract pic_offset_table_rtx. This can happen e.g.
14420 for code like:
14421 leal (%ebx, %ecx, 4), %ecx
14422 ...
14423 movl foo@GOTOFF(%ecx), %edx
14424 in which case we return (%ecx - %ebx) + foo. */
14425 if (pic_offset_table_rtx)
14426 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
14427 pic_offset_table_rtx),
14428 result);
14429 else
14430 return orig_x;
14431 }
14432 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
14433 {
14434 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
14435 if (result == NULL_RTX)
14436 return orig_x;
14437 }
14438 return result;
14439 }
14440
14441 /* If X is a machine specific address (i.e. a symbol or label being
14442 referenced as a displacement from the GOT implemented using an
14443 UNSPEC), then return the base term. Otherwise return X. */
14444
14445 rtx
14446 ix86_find_base_term (rtx x)
14447 {
14448 rtx term;
14449
14450 if (TARGET_64BIT)
14451 {
14452 if (GET_CODE (x) != CONST)
14453 return x;
14454 term = XEXP (x, 0);
14455 if (GET_CODE (term) == PLUS
14456 && (CONST_INT_P (XEXP (term, 1))
14457 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
14458 term = XEXP (term, 0);
14459 if (GET_CODE (term) != UNSPEC
14460 || (XINT (term, 1) != UNSPEC_GOTPCREL
14461 && XINT (term, 1) != UNSPEC_PCREL))
14462 return x;
14463
14464 return XVECEXP (term, 0, 0);
14465 }
14466
14467 return ix86_delegitimize_address (x);
14468 }
14469 \f
14470 static void
14471 put_condition_code (enum rtx_code code, enum machine_mode mode, bool reverse,
14472 bool fp, FILE *file)
14473 {
14474 const char *suffix;
14475
14476 if (mode == CCFPmode || mode == CCFPUmode)
14477 {
14478 code = ix86_fp_compare_code_to_integer (code);
14479 mode = CCmode;
14480 }
14481 if (reverse)
14482 code = reverse_condition (code);
14483
14484 switch (code)
14485 {
14486 case EQ:
14487 switch (mode)
14488 {
14489 case CCAmode:
14490 suffix = "a";
14491 break;
14492
14493 case CCCmode:
14494 suffix = "c";
14495 break;
14496
14497 case CCOmode:
14498 suffix = "o";
14499 break;
14500
14501 case CCSmode:
14502 suffix = "s";
14503 break;
14504
14505 default:
14506 suffix = "e";
14507 }
14508 break;
14509 case NE:
14510 switch (mode)
14511 {
14512 case CCAmode:
14513 suffix = "na";
14514 break;
14515
14516 case CCCmode:
14517 suffix = "nc";
14518 break;
14519
14520 case CCOmode:
14521 suffix = "no";
14522 break;
14523
14524 case CCSmode:
14525 suffix = "ns";
14526 break;
14527
14528 default:
14529 suffix = "ne";
14530 }
14531 break;
14532 case GT:
14533 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
14534 suffix = "g";
14535 break;
14536 case GTU:
14537 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
14538 Those same assemblers have the same but opposite lossage on cmov. */
14539 if (mode == CCmode)
14540 suffix = fp ? "nbe" : "a";
14541 else
14542 gcc_unreachable ();
14543 break;
14544 case LT:
14545 switch (mode)
14546 {
14547 case CCNOmode:
14548 case CCGOCmode:
14549 suffix = "s";
14550 break;
14551
14552 case CCmode:
14553 case CCGCmode:
14554 suffix = "l";
14555 break;
14556
14557 default:
14558 gcc_unreachable ();
14559 }
14560 break;
14561 case LTU:
14562 if (mode == CCmode)
14563 suffix = "b";
14564 else if (mode == CCCmode)
14565 suffix = "c";
14566 else
14567 gcc_unreachable ();
14568 break;
14569 case GE:
14570 switch (mode)
14571 {
14572 case CCNOmode:
14573 case CCGOCmode:
14574 suffix = "ns";
14575 break;
14576
14577 case CCmode:
14578 case CCGCmode:
14579 suffix = "ge";
14580 break;
14581
14582 default:
14583 gcc_unreachable ();
14584 }
14585 break;
14586 case GEU:
14587 if (mode == CCmode)
14588 suffix = fp ? "nb" : "ae";
14589 else if (mode == CCCmode)
14590 suffix = "nc";
14591 else
14592 gcc_unreachable ();
14593 break;
14594 case LE:
14595 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
14596 suffix = "le";
14597 break;
14598 case LEU:
14599 if (mode == CCmode)
14600 suffix = "be";
14601 else
14602 gcc_unreachable ();
14603 break;
14604 case UNORDERED:
14605 suffix = fp ? "u" : "p";
14606 break;
14607 case ORDERED:
14608 suffix = fp ? "nu" : "np";
14609 break;
14610 default:
14611 gcc_unreachable ();
14612 }
14613 fputs (suffix, file);
14614 }
14615
14616 /* Print the name of register X to FILE based on its machine mode and number.
14617 If CODE is 'w', pretend the mode is HImode.
14618 If CODE is 'b', pretend the mode is QImode.
14619 If CODE is 'k', pretend the mode is SImode.
14620 If CODE is 'q', pretend the mode is DImode.
14621 If CODE is 'x', pretend the mode is V4SFmode.
14622 If CODE is 't', pretend the mode is V8SFmode.
14623 If CODE is 'g', pretend the mode is V16SFmode.
14624 If CODE is 'h', pretend the reg is the 'high' byte register.
14625 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
14626 If CODE is 'd', duplicate the operand for AVX instruction.
14627 */
14628
14629 void
14630 print_reg (rtx x, int code, FILE *file)
14631 {
14632 const char *reg;
14633 unsigned int regno;
14634 bool duplicated = code == 'd' && TARGET_AVX;
14635
14636 if (ASSEMBLER_DIALECT == ASM_ATT)
14637 putc ('%', file);
14638
14639 if (x == pc_rtx)
14640 {
14641 gcc_assert (TARGET_64BIT);
14642 fputs ("rip", file);
14643 return;
14644 }
14645
14646 regno = true_regnum (x);
14647 gcc_assert (regno != ARG_POINTER_REGNUM
14648 && regno != FRAME_POINTER_REGNUM
14649 && regno != FLAGS_REG
14650 && regno != FPSR_REG
14651 && regno != FPCR_REG);
14652
14653 if (code == 'w' || MMX_REG_P (x))
14654 code = 2;
14655 else if (code == 'b')
14656 code = 1;
14657 else if (code == 'k')
14658 code = 4;
14659 else if (code == 'q')
14660 code = 8;
14661 else if (code == 'y')
14662 code = 3;
14663 else if (code == 'h')
14664 code = 0;
14665 else if (code == 'x')
14666 code = 16;
14667 else if (code == 't')
14668 code = 32;
14669 else if (code == 'g')
14670 code = 64;
14671 else
14672 code = GET_MODE_SIZE (GET_MODE (x));
14673
14674 /* Irritatingly, AMD extended registers use different naming convention
14675 from the normal registers: "r%d[bwd]" */
14676 if (REX_INT_REGNO_P (regno))
14677 {
14678 gcc_assert (TARGET_64BIT);
14679 putc ('r', file);
14680 fprint_ul (file, regno - FIRST_REX_INT_REG + 8);
14681 switch (code)
14682 {
14683 case 0:
14684 error ("extended registers have no high halves");
14685 break;
14686 case 1:
14687 putc ('b', file);
14688 break;
14689 case 2:
14690 putc ('w', file);
14691 break;
14692 case 4:
14693 putc ('d', file);
14694 break;
14695 case 8:
14696 /* no suffix */
14697 break;
14698 default:
14699 error ("unsupported operand size for extended register");
14700 break;
14701 }
14702 return;
14703 }
14704
14705 reg = NULL;
14706 switch (code)
14707 {
14708 case 3:
14709 if (STACK_TOP_P (x))
14710 {
14711 reg = "st(0)";
14712 break;
14713 }
14714 /* FALLTHRU */
14715 case 8:
14716 case 4:
14717 case 12:
14718 if (! ANY_FP_REG_P (x) && ! ANY_MASK_REG_P (x))
14719 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
14720 /* FALLTHRU */
14721 case 16:
14722 case 2:
14723 normal:
14724 reg = hi_reg_name[regno];
14725 break;
14726 case 1:
14727 if (regno >= ARRAY_SIZE (qi_reg_name))
14728 goto normal;
14729 reg = qi_reg_name[regno];
14730 break;
14731 case 0:
14732 if (regno >= ARRAY_SIZE (qi_high_reg_name))
14733 goto normal;
14734 reg = qi_high_reg_name[regno];
14735 break;
14736 case 32:
14737 if (SSE_REG_P (x))
14738 {
14739 gcc_assert (!duplicated);
14740 putc ('y', file);
14741 fputs (hi_reg_name[regno] + 1, file);
14742 return;
14743 }
14744 case 64:
14745 if (SSE_REG_P (x))
14746 {
14747 gcc_assert (!duplicated);
14748 putc ('z', file);
14749 fputs (hi_reg_name[REGNO (x)] + 1, file);
14750 return;
14751 }
14752 break;
14753 default:
14754 gcc_unreachable ();
14755 }
14756
14757 fputs (reg, file);
14758 if (duplicated)
14759 {
14760 if (ASSEMBLER_DIALECT == ASM_ATT)
14761 fprintf (file, ", %%%s", reg);
14762 else
14763 fprintf (file, ", %s", reg);
14764 }
14765 }
14766
14767 /* Meaning of CODE:
14768 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
14769 C -- print opcode suffix for set/cmov insn.
14770 c -- like C, but print reversed condition
14771 F,f -- likewise, but for floating-point.
14772 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
14773 otherwise nothing
14774 R -- print embeded rounding and sae.
14775 r -- print only sae.
14776 z -- print the opcode suffix for the size of the current operand.
14777 Z -- likewise, with special suffixes for x87 instructions.
14778 * -- print a star (in certain assembler syntax)
14779 A -- print an absolute memory reference.
14780 E -- print address with DImode register names if TARGET_64BIT.
14781 w -- print the operand as if it's a "word" (HImode) even if it isn't.
14782 s -- print a shift double count, followed by the assemblers argument
14783 delimiter.
14784 b -- print the QImode name of the register for the indicated operand.
14785 %b0 would print %al if operands[0] is reg 0.
14786 w -- likewise, print the HImode name of the register.
14787 k -- likewise, print the SImode name of the register.
14788 q -- likewise, print the DImode name of the register.
14789 x -- likewise, print the V4SFmode name of the register.
14790 t -- likewise, print the V8SFmode name of the register.
14791 g -- likewise, print the V16SFmode name of the register.
14792 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
14793 y -- print "st(0)" instead of "st" as a register.
14794 d -- print duplicated register operand for AVX instruction.
14795 D -- print condition for SSE cmp instruction.
14796 P -- if PIC, print an @PLT suffix.
14797 p -- print raw symbol name.
14798 X -- don't print any sort of PIC '@' suffix for a symbol.
14799 & -- print some in-use local-dynamic symbol name.
14800 H -- print a memory address offset by 8; used for sse high-parts
14801 Y -- print condition for XOP pcom* instruction.
14802 + -- print a branch hint as 'cs' or 'ds' prefix
14803 ; -- print a semicolon (after prefixes due to bug in older gas).
14804 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
14805 @ -- print a segment register of thread base pointer load
14806 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
14807 */
14808
14809 void
14810 ix86_print_operand (FILE *file, rtx x, int code)
14811 {
14812 if (code)
14813 {
14814 switch (code)
14815 {
14816 case 'A':
14817 switch (ASSEMBLER_DIALECT)
14818 {
14819 case ASM_ATT:
14820 putc ('*', file);
14821 break;
14822
14823 case ASM_INTEL:
14824 /* Intel syntax. For absolute addresses, registers should not
14825 be surrounded by braces. */
14826 if (!REG_P (x))
14827 {
14828 putc ('[', file);
14829 ix86_print_operand (file, x, 0);
14830 putc (']', file);
14831 return;
14832 }
14833 break;
14834
14835 default:
14836 gcc_unreachable ();
14837 }
14838
14839 ix86_print_operand (file, x, 0);
14840 return;
14841
14842 case 'E':
14843 /* Wrap address in an UNSPEC to declare special handling. */
14844 if (TARGET_64BIT)
14845 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
14846
14847 output_address (x);
14848 return;
14849
14850 case 'L':
14851 if (ASSEMBLER_DIALECT == ASM_ATT)
14852 putc ('l', file);
14853 return;
14854
14855 case 'W':
14856 if (ASSEMBLER_DIALECT == ASM_ATT)
14857 putc ('w', file);
14858 return;
14859
14860 case 'B':
14861 if (ASSEMBLER_DIALECT == ASM_ATT)
14862 putc ('b', file);
14863 return;
14864
14865 case 'Q':
14866 if (ASSEMBLER_DIALECT == ASM_ATT)
14867 putc ('l', file);
14868 return;
14869
14870 case 'S':
14871 if (ASSEMBLER_DIALECT == ASM_ATT)
14872 putc ('s', file);
14873 return;
14874
14875 case 'T':
14876 if (ASSEMBLER_DIALECT == ASM_ATT)
14877 putc ('t', file);
14878 return;
14879
14880 case 'O':
14881 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14882 if (ASSEMBLER_DIALECT != ASM_ATT)
14883 return;
14884
14885 switch (GET_MODE_SIZE (GET_MODE (x)))
14886 {
14887 case 2:
14888 putc ('w', file);
14889 break;
14890
14891 case 4:
14892 putc ('l', file);
14893 break;
14894
14895 case 8:
14896 putc ('q', file);
14897 break;
14898
14899 default:
14900 output_operand_lossage
14901 ("invalid operand size for operand code 'O'");
14902 return;
14903 }
14904
14905 putc ('.', file);
14906 #endif
14907 return;
14908
14909 case 'z':
14910 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
14911 {
14912 /* Opcodes don't get size suffixes if using Intel opcodes. */
14913 if (ASSEMBLER_DIALECT == ASM_INTEL)
14914 return;
14915
14916 switch (GET_MODE_SIZE (GET_MODE (x)))
14917 {
14918 case 1:
14919 putc ('b', file);
14920 return;
14921
14922 case 2:
14923 putc ('w', file);
14924 return;
14925
14926 case 4:
14927 putc ('l', file);
14928 return;
14929
14930 case 8:
14931 putc ('q', file);
14932 return;
14933
14934 default:
14935 output_operand_lossage
14936 ("invalid operand size for operand code 'z'");
14937 return;
14938 }
14939 }
14940
14941 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14942 warning
14943 (0, "non-integer operand used with operand code 'z'");
14944 /* FALLTHRU */
14945
14946 case 'Z':
14947 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
14948 if (ASSEMBLER_DIALECT == ASM_INTEL)
14949 return;
14950
14951 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
14952 {
14953 switch (GET_MODE_SIZE (GET_MODE (x)))
14954 {
14955 case 2:
14956 #ifdef HAVE_AS_IX86_FILDS
14957 putc ('s', file);
14958 #endif
14959 return;
14960
14961 case 4:
14962 putc ('l', file);
14963 return;
14964
14965 case 8:
14966 #ifdef HAVE_AS_IX86_FILDQ
14967 putc ('q', file);
14968 #else
14969 fputs ("ll", file);
14970 #endif
14971 return;
14972
14973 default:
14974 break;
14975 }
14976 }
14977 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14978 {
14979 /* 387 opcodes don't get size suffixes
14980 if the operands are registers. */
14981 if (STACK_REG_P (x))
14982 return;
14983
14984 switch (GET_MODE_SIZE (GET_MODE (x)))
14985 {
14986 case 4:
14987 putc ('s', file);
14988 return;
14989
14990 case 8:
14991 putc ('l', file);
14992 return;
14993
14994 case 12:
14995 case 16:
14996 putc ('t', file);
14997 return;
14998
14999 default:
15000 break;
15001 }
15002 }
15003 else
15004 {
15005 output_operand_lossage
15006 ("invalid operand type used with operand code 'Z'");
15007 return;
15008 }
15009
15010 output_operand_lossage
15011 ("invalid operand size for operand code 'Z'");
15012 return;
15013
15014 case 'd':
15015 case 'b':
15016 case 'w':
15017 case 'k':
15018 case 'q':
15019 case 'h':
15020 case 't':
15021 case 'g':
15022 case 'y':
15023 case 'x':
15024 case 'X':
15025 case 'P':
15026 case 'p':
15027 break;
15028
15029 case 's':
15030 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
15031 {
15032 ix86_print_operand (file, x, 0);
15033 fputs (", ", file);
15034 }
15035 return;
15036
15037 case 'Y':
15038 switch (GET_CODE (x))
15039 {
15040 case NE:
15041 fputs ("neq", file);
15042 break;
15043 case EQ:
15044 fputs ("eq", file);
15045 break;
15046 case GE:
15047 case GEU:
15048 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
15049 break;
15050 case GT:
15051 case GTU:
15052 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
15053 break;
15054 case LE:
15055 case LEU:
15056 fputs ("le", file);
15057 break;
15058 case LT:
15059 case LTU:
15060 fputs ("lt", file);
15061 break;
15062 case UNORDERED:
15063 fputs ("unord", file);
15064 break;
15065 case ORDERED:
15066 fputs ("ord", file);
15067 break;
15068 case UNEQ:
15069 fputs ("ueq", file);
15070 break;
15071 case UNGE:
15072 fputs ("nlt", file);
15073 break;
15074 case UNGT:
15075 fputs ("nle", file);
15076 break;
15077 case UNLE:
15078 fputs ("ule", file);
15079 break;
15080 case UNLT:
15081 fputs ("ult", file);
15082 break;
15083 case LTGT:
15084 fputs ("une", file);
15085 break;
15086 default:
15087 output_operand_lossage ("operand is not a condition code, "
15088 "invalid operand code 'Y'");
15089 return;
15090 }
15091 return;
15092
15093 case 'D':
15094 /* Little bit of braindamage here. The SSE compare instructions
15095 does use completely different names for the comparisons that the
15096 fp conditional moves. */
15097 switch (GET_CODE (x))
15098 {
15099 case UNEQ:
15100 if (TARGET_AVX)
15101 {
15102 fputs ("eq_us", file);
15103 break;
15104 }
15105 case EQ:
15106 fputs ("eq", file);
15107 break;
15108 case UNLT:
15109 if (TARGET_AVX)
15110 {
15111 fputs ("nge", file);
15112 break;
15113 }
15114 case LT:
15115 fputs ("lt", file);
15116 break;
15117 case UNLE:
15118 if (TARGET_AVX)
15119 {
15120 fputs ("ngt", file);
15121 break;
15122 }
15123 case LE:
15124 fputs ("le", file);
15125 break;
15126 case UNORDERED:
15127 fputs ("unord", file);
15128 break;
15129 case LTGT:
15130 if (TARGET_AVX)
15131 {
15132 fputs ("neq_oq", file);
15133 break;
15134 }
15135 case NE:
15136 fputs ("neq", file);
15137 break;
15138 case GE:
15139 if (TARGET_AVX)
15140 {
15141 fputs ("ge", file);
15142 break;
15143 }
15144 case UNGE:
15145 fputs ("nlt", file);
15146 break;
15147 case GT:
15148 if (TARGET_AVX)
15149 {
15150 fputs ("gt", file);
15151 break;
15152 }
15153 case UNGT:
15154 fputs ("nle", file);
15155 break;
15156 case ORDERED:
15157 fputs ("ord", file);
15158 break;
15159 default:
15160 output_operand_lossage ("operand is not a condition code, "
15161 "invalid operand code 'D'");
15162 return;
15163 }
15164 return;
15165
15166 case 'F':
15167 case 'f':
15168 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15169 if (ASSEMBLER_DIALECT == ASM_ATT)
15170 putc ('.', file);
15171 #endif
15172
15173 case 'C':
15174 case 'c':
15175 if (!COMPARISON_P (x))
15176 {
15177 output_operand_lossage ("operand is not a condition code, "
15178 "invalid operand code '%c'", code);
15179 return;
15180 }
15181 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
15182 code == 'c' || code == 'f',
15183 code == 'F' || code == 'f',
15184 file);
15185 return;
15186
15187 case 'H':
15188 if (!offsettable_memref_p (x))
15189 {
15190 output_operand_lossage ("operand is not an offsettable memory "
15191 "reference, invalid operand code 'H'");
15192 return;
15193 }
15194 /* It doesn't actually matter what mode we use here, as we're
15195 only going to use this for printing. */
15196 x = adjust_address_nv (x, DImode, 8);
15197 /* Output 'qword ptr' for intel assembler dialect. */
15198 if (ASSEMBLER_DIALECT == ASM_INTEL)
15199 code = 'q';
15200 break;
15201
15202 case 'K':
15203 gcc_assert (CONST_INT_P (x));
15204
15205 if (INTVAL (x) & IX86_HLE_ACQUIRE)
15206 #ifdef HAVE_AS_IX86_HLE
15207 fputs ("xacquire ", file);
15208 #else
15209 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
15210 #endif
15211 else if (INTVAL (x) & IX86_HLE_RELEASE)
15212 #ifdef HAVE_AS_IX86_HLE
15213 fputs ("xrelease ", file);
15214 #else
15215 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
15216 #endif
15217 /* We do not want to print value of the operand. */
15218 return;
15219
15220 case 'N':
15221 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
15222 fputs ("{z}", file);
15223 return;
15224
15225 case 'r':
15226 gcc_assert (CONST_INT_P (x));
15227 gcc_assert (INTVAL (x) == ROUND_SAE);
15228
15229 if (ASSEMBLER_DIALECT == ASM_INTEL)
15230 fputs (", ", file);
15231
15232 fputs ("{sae}", file);
15233
15234 if (ASSEMBLER_DIALECT == ASM_ATT)
15235 fputs (", ", file);
15236
15237 return;
15238
15239 case 'R':
15240 gcc_assert (CONST_INT_P (x));
15241
15242 if (ASSEMBLER_DIALECT == ASM_INTEL)
15243 fputs (", ", file);
15244
15245 switch (INTVAL (x))
15246 {
15247 case ROUND_NEAREST_INT | ROUND_SAE:
15248 fputs ("{rn-sae}", file);
15249 break;
15250 case ROUND_NEG_INF | ROUND_SAE:
15251 fputs ("{rd-sae}", file);
15252 break;
15253 case ROUND_POS_INF | ROUND_SAE:
15254 fputs ("{ru-sae}", file);
15255 break;
15256 case ROUND_ZERO | ROUND_SAE:
15257 fputs ("{rz-sae}", file);
15258 break;
15259 default:
15260 gcc_unreachable ();
15261 }
15262
15263 if (ASSEMBLER_DIALECT == ASM_ATT)
15264 fputs (", ", file);
15265
15266 return;
15267
15268 case '*':
15269 if (ASSEMBLER_DIALECT == ASM_ATT)
15270 putc ('*', file);
15271 return;
15272
15273 case '&':
15274 {
15275 const char *name = get_some_local_dynamic_name ();
15276 if (name == NULL)
15277 output_operand_lossage ("'%%&' used without any "
15278 "local dynamic TLS references");
15279 else
15280 assemble_name (file, name);
15281 return;
15282 }
15283
15284 case '+':
15285 {
15286 rtx x;
15287
15288 if (!optimize
15289 || optimize_function_for_size_p (cfun)
15290 || !TARGET_BRANCH_PREDICTION_HINTS)
15291 return;
15292
15293 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
15294 if (x)
15295 {
15296 int pred_val = XINT (x, 0);
15297
15298 if (pred_val < REG_BR_PROB_BASE * 45 / 100
15299 || pred_val > REG_BR_PROB_BASE * 55 / 100)
15300 {
15301 bool taken = pred_val > REG_BR_PROB_BASE / 2;
15302 bool cputaken
15303 = final_forward_branch_p (current_output_insn) == 0;
15304
15305 /* Emit hints only in the case default branch prediction
15306 heuristics would fail. */
15307 if (taken != cputaken)
15308 {
15309 /* We use 3e (DS) prefix for taken branches and
15310 2e (CS) prefix for not taken branches. */
15311 if (taken)
15312 fputs ("ds ; ", file);
15313 else
15314 fputs ("cs ; ", file);
15315 }
15316 }
15317 }
15318 return;
15319 }
15320
15321 case ';':
15322 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
15323 putc (';', file);
15324 #endif
15325 return;
15326
15327 case '@':
15328 if (ASSEMBLER_DIALECT == ASM_ATT)
15329 putc ('%', file);
15330
15331 /* The kernel uses a different segment register for performance
15332 reasons; a system call would not have to trash the userspace
15333 segment register, which would be expensive. */
15334 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
15335 fputs ("fs", file);
15336 else
15337 fputs ("gs", file);
15338 return;
15339
15340 case '~':
15341 putc (TARGET_AVX2 ? 'i' : 'f', file);
15342 return;
15343
15344 case '^':
15345 if (TARGET_64BIT && Pmode != word_mode)
15346 fputs ("addr32 ", file);
15347 return;
15348
15349 default:
15350 output_operand_lossage ("invalid operand code '%c'", code);
15351 }
15352 }
15353
15354 if (REG_P (x))
15355 print_reg (x, code, file);
15356
15357 else if (MEM_P (x))
15358 {
15359 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
15360 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
15361 && GET_MODE (x) != BLKmode)
15362 {
15363 const char * size;
15364 switch (GET_MODE_SIZE (GET_MODE (x)))
15365 {
15366 case 1: size = "BYTE"; break;
15367 case 2: size = "WORD"; break;
15368 case 4: size = "DWORD"; break;
15369 case 8: size = "QWORD"; break;
15370 case 12: size = "TBYTE"; break;
15371 case 16:
15372 if (GET_MODE (x) == XFmode)
15373 size = "TBYTE";
15374 else
15375 size = "XMMWORD";
15376 break;
15377 case 32: size = "YMMWORD"; break;
15378 case 64: size = "ZMMWORD"; break;
15379 default:
15380 gcc_unreachable ();
15381 }
15382
15383 /* Check for explicit size override (codes 'b', 'w', 'k',
15384 'q' and 'x') */
15385 if (code == 'b')
15386 size = "BYTE";
15387 else if (code == 'w')
15388 size = "WORD";
15389 else if (code == 'k')
15390 size = "DWORD";
15391 else if (code == 'q')
15392 size = "QWORD";
15393 else if (code == 'x')
15394 size = "XMMWORD";
15395
15396 fputs (size, file);
15397 fputs (" PTR ", file);
15398 }
15399
15400 x = XEXP (x, 0);
15401 /* Avoid (%rip) for call operands. */
15402 if (CONSTANT_ADDRESS_P (x) && code == 'P'
15403 && !CONST_INT_P (x))
15404 output_addr_const (file, x);
15405 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
15406 output_operand_lossage ("invalid constraints for operand");
15407 else
15408 output_address (x);
15409 }
15410
15411 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
15412 {
15413 REAL_VALUE_TYPE r;
15414 long l;
15415
15416 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15417 REAL_VALUE_TO_TARGET_SINGLE (r, l);
15418
15419 if (ASSEMBLER_DIALECT == ASM_ATT)
15420 putc ('$', file);
15421 /* Sign extend 32bit SFmode immediate to 8 bytes. */
15422 if (code == 'q')
15423 fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
15424 (unsigned long long) (int) l);
15425 else
15426 fprintf (file, "0x%08x", (unsigned int) l);
15427 }
15428
15429 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
15430 {
15431 REAL_VALUE_TYPE r;
15432 long l[2];
15433
15434 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15435 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
15436
15437 if (ASSEMBLER_DIALECT == ASM_ATT)
15438 putc ('$', file);
15439 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
15440 }
15441
15442 /* These float cases don't actually occur as immediate operands. */
15443 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
15444 {
15445 char dstr[30];
15446
15447 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
15448 fputs (dstr, file);
15449 }
15450
15451 else
15452 {
15453 /* We have patterns that allow zero sets of memory, for instance.
15454 In 64-bit mode, we should probably support all 8-byte vectors,
15455 since we can in fact encode that into an immediate. */
15456 if (GET_CODE (x) == CONST_VECTOR)
15457 {
15458 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
15459 x = const0_rtx;
15460 }
15461
15462 if (code != 'P' && code != 'p')
15463 {
15464 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
15465 {
15466 if (ASSEMBLER_DIALECT == ASM_ATT)
15467 putc ('$', file);
15468 }
15469 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
15470 || GET_CODE (x) == LABEL_REF)
15471 {
15472 if (ASSEMBLER_DIALECT == ASM_ATT)
15473 putc ('$', file);
15474 else
15475 fputs ("OFFSET FLAT:", file);
15476 }
15477 }
15478 if (CONST_INT_P (x))
15479 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
15480 else if (flag_pic || MACHOPIC_INDIRECT)
15481 output_pic_addr_const (file, x, code);
15482 else
15483 output_addr_const (file, x);
15484 }
15485 }
15486
15487 static bool
15488 ix86_print_operand_punct_valid_p (unsigned char code)
15489 {
15490 return (code == '@' || code == '*' || code == '+' || code == '&'
15491 || code == ';' || code == '~' || code == '^');
15492 }
15493 \f
15494 /* Print a memory operand whose address is ADDR. */
15495
15496 static void
15497 ix86_print_operand_address (FILE *file, rtx addr)
15498 {
15499 struct ix86_address parts;
15500 rtx base, index, disp;
15501 int scale;
15502 int ok;
15503 bool vsib = false;
15504 int code = 0;
15505
15506 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
15507 {
15508 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15509 gcc_assert (parts.index == NULL_RTX);
15510 parts.index = XVECEXP (addr, 0, 1);
15511 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
15512 addr = XVECEXP (addr, 0, 0);
15513 vsib = true;
15514 }
15515 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
15516 {
15517 gcc_assert (TARGET_64BIT);
15518 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15519 code = 'q';
15520 }
15521 else
15522 ok = ix86_decompose_address (addr, &parts);
15523
15524 gcc_assert (ok);
15525
15526 base = parts.base;
15527 index = parts.index;
15528 disp = parts.disp;
15529 scale = parts.scale;
15530
15531 switch (parts.seg)
15532 {
15533 case SEG_DEFAULT:
15534 break;
15535 case SEG_FS:
15536 case SEG_GS:
15537 if (ASSEMBLER_DIALECT == ASM_ATT)
15538 putc ('%', file);
15539 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
15540 break;
15541 default:
15542 gcc_unreachable ();
15543 }
15544
15545 /* Use one byte shorter RIP relative addressing for 64bit mode. */
15546 if (TARGET_64BIT && !base && !index)
15547 {
15548 rtx symbol = disp;
15549
15550 if (GET_CODE (disp) == CONST
15551 && GET_CODE (XEXP (disp, 0)) == PLUS
15552 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
15553 symbol = XEXP (XEXP (disp, 0), 0);
15554
15555 if (GET_CODE (symbol) == LABEL_REF
15556 || (GET_CODE (symbol) == SYMBOL_REF
15557 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
15558 base = pc_rtx;
15559 }
15560 if (!base && !index)
15561 {
15562 /* Displacement only requires special attention. */
15563
15564 if (CONST_INT_P (disp))
15565 {
15566 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
15567 fputs ("ds:", file);
15568 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
15569 }
15570 else if (flag_pic)
15571 output_pic_addr_const (file, disp, 0);
15572 else
15573 output_addr_const (file, disp);
15574 }
15575 else
15576 {
15577 /* Print SImode register names to force addr32 prefix. */
15578 if (SImode_address_operand (addr, VOIDmode))
15579 {
15580 #ifdef ENABLE_CHECKING
15581 gcc_assert (TARGET_64BIT);
15582 switch (GET_CODE (addr))
15583 {
15584 case SUBREG:
15585 gcc_assert (GET_MODE (addr) == SImode);
15586 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
15587 break;
15588 case ZERO_EXTEND:
15589 case AND:
15590 gcc_assert (GET_MODE (addr) == DImode);
15591 break;
15592 default:
15593 gcc_unreachable ();
15594 }
15595 #endif
15596 gcc_assert (!code);
15597 code = 'k';
15598 }
15599 else if (code == 0
15600 && TARGET_X32
15601 && disp
15602 && CONST_INT_P (disp)
15603 && INTVAL (disp) < -16*1024*1024)
15604 {
15605 /* X32 runs in 64-bit mode, where displacement, DISP, in
15606 address DISP(%r64), is encoded as 32-bit immediate sign-
15607 extended from 32-bit to 64-bit. For -0x40000300(%r64),
15608 address is %r64 + 0xffffffffbffffd00. When %r64 <
15609 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
15610 which is invalid for x32. The correct address is %r64
15611 - 0x40000300 == 0xf7ffdd64. To properly encode
15612 -0x40000300(%r64) for x32, we zero-extend negative
15613 displacement by forcing addr32 prefix which truncates
15614 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
15615 zero-extend all negative displacements, including -1(%rsp).
15616 However, for small negative displacements, sign-extension
15617 won't cause overflow. We only zero-extend negative
15618 displacements if they < -16*1024*1024, which is also used
15619 to check legitimate address displacements for PIC. */
15620 code = 'k';
15621 }
15622
15623 if (ASSEMBLER_DIALECT == ASM_ATT)
15624 {
15625 if (disp)
15626 {
15627 if (flag_pic)
15628 output_pic_addr_const (file, disp, 0);
15629 else if (GET_CODE (disp) == LABEL_REF)
15630 output_asm_label (disp);
15631 else
15632 output_addr_const (file, disp);
15633 }
15634
15635 putc ('(', file);
15636 if (base)
15637 print_reg (base, code, file);
15638 if (index)
15639 {
15640 putc (',', file);
15641 print_reg (index, vsib ? 0 : code, file);
15642 if (scale != 1 || vsib)
15643 fprintf (file, ",%d", scale);
15644 }
15645 putc (')', file);
15646 }
15647 else
15648 {
15649 rtx offset = NULL_RTX;
15650
15651 if (disp)
15652 {
15653 /* Pull out the offset of a symbol; print any symbol itself. */
15654 if (GET_CODE (disp) == CONST
15655 && GET_CODE (XEXP (disp, 0)) == PLUS
15656 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
15657 {
15658 offset = XEXP (XEXP (disp, 0), 1);
15659 disp = gen_rtx_CONST (VOIDmode,
15660 XEXP (XEXP (disp, 0), 0));
15661 }
15662
15663 if (flag_pic)
15664 output_pic_addr_const (file, disp, 0);
15665 else if (GET_CODE (disp) == LABEL_REF)
15666 output_asm_label (disp);
15667 else if (CONST_INT_P (disp))
15668 offset = disp;
15669 else
15670 output_addr_const (file, disp);
15671 }
15672
15673 putc ('[', file);
15674 if (base)
15675 {
15676 print_reg (base, code, file);
15677 if (offset)
15678 {
15679 if (INTVAL (offset) >= 0)
15680 putc ('+', file);
15681 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
15682 }
15683 }
15684 else if (offset)
15685 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
15686 else
15687 putc ('0', file);
15688
15689 if (index)
15690 {
15691 putc ('+', file);
15692 print_reg (index, vsib ? 0 : code, file);
15693 if (scale != 1 || vsib)
15694 fprintf (file, "*%d", scale);
15695 }
15696 putc (']', file);
15697 }
15698 }
15699 }
15700
15701 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
15702
15703 static bool
15704 i386_asm_output_addr_const_extra (FILE *file, rtx x)
15705 {
15706 rtx op;
15707
15708 if (GET_CODE (x) != UNSPEC)
15709 return false;
15710
15711 op = XVECEXP (x, 0, 0);
15712 switch (XINT (x, 1))
15713 {
15714 case UNSPEC_GOTTPOFF:
15715 output_addr_const (file, op);
15716 /* FIXME: This might be @TPOFF in Sun ld. */
15717 fputs ("@gottpoff", file);
15718 break;
15719 case UNSPEC_TPOFF:
15720 output_addr_const (file, op);
15721 fputs ("@tpoff", file);
15722 break;
15723 case UNSPEC_NTPOFF:
15724 output_addr_const (file, op);
15725 if (TARGET_64BIT)
15726 fputs ("@tpoff", file);
15727 else
15728 fputs ("@ntpoff", file);
15729 break;
15730 case UNSPEC_DTPOFF:
15731 output_addr_const (file, op);
15732 fputs ("@dtpoff", file);
15733 break;
15734 case UNSPEC_GOTNTPOFF:
15735 output_addr_const (file, op);
15736 if (TARGET_64BIT)
15737 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
15738 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
15739 else
15740 fputs ("@gotntpoff", file);
15741 break;
15742 case UNSPEC_INDNTPOFF:
15743 output_addr_const (file, op);
15744 fputs ("@indntpoff", file);
15745 break;
15746 #if TARGET_MACHO
15747 case UNSPEC_MACHOPIC_OFFSET:
15748 output_addr_const (file, op);
15749 putc ('-', file);
15750 machopic_output_function_base_name (file);
15751 break;
15752 #endif
15753
15754 case UNSPEC_STACK_CHECK:
15755 {
15756 int offset;
15757
15758 gcc_assert (flag_split_stack);
15759
15760 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
15761 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
15762 #else
15763 gcc_unreachable ();
15764 #endif
15765
15766 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
15767 }
15768 break;
15769
15770 default:
15771 return false;
15772 }
15773
15774 return true;
15775 }
15776 \f
15777 /* Split one or more double-mode RTL references into pairs of half-mode
15778 references. The RTL can be REG, offsettable MEM, integer constant, or
15779 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
15780 split and "num" is its length. lo_half and hi_half are output arrays
15781 that parallel "operands". */
15782
15783 void
15784 split_double_mode (enum machine_mode mode, rtx operands[],
15785 int num, rtx lo_half[], rtx hi_half[])
15786 {
15787 enum machine_mode half_mode;
15788 unsigned int byte;
15789
15790 switch (mode)
15791 {
15792 case TImode:
15793 half_mode = DImode;
15794 break;
15795 case DImode:
15796 half_mode = SImode;
15797 break;
15798 default:
15799 gcc_unreachable ();
15800 }
15801
15802 byte = GET_MODE_SIZE (half_mode);
15803
15804 while (num--)
15805 {
15806 rtx op = operands[num];
15807
15808 /* simplify_subreg refuse to split volatile memory addresses,
15809 but we still have to handle it. */
15810 if (MEM_P (op))
15811 {
15812 lo_half[num] = adjust_address (op, half_mode, 0);
15813 hi_half[num] = adjust_address (op, half_mode, byte);
15814 }
15815 else
15816 {
15817 lo_half[num] = simplify_gen_subreg (half_mode, op,
15818 GET_MODE (op) == VOIDmode
15819 ? mode : GET_MODE (op), 0);
15820 hi_half[num] = simplify_gen_subreg (half_mode, op,
15821 GET_MODE (op) == VOIDmode
15822 ? mode : GET_MODE (op), byte);
15823 }
15824 }
15825 }
15826 \f
15827 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
15828 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
15829 is the expression of the binary operation. The output may either be
15830 emitted here, or returned to the caller, like all output_* functions.
15831
15832 There is no guarantee that the operands are the same mode, as they
15833 might be within FLOAT or FLOAT_EXTEND expressions. */
15834
15835 #ifndef SYSV386_COMPAT
15836 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
15837 wants to fix the assemblers because that causes incompatibility
15838 with gcc. No-one wants to fix gcc because that causes
15839 incompatibility with assemblers... You can use the option of
15840 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
15841 #define SYSV386_COMPAT 1
15842 #endif
15843
15844 const char *
15845 output_387_binary_op (rtx insn, rtx *operands)
15846 {
15847 static char buf[40];
15848 const char *p;
15849 const char *ssep;
15850 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
15851
15852 #ifdef ENABLE_CHECKING
15853 /* Even if we do not want to check the inputs, this documents input
15854 constraints. Which helps in understanding the following code. */
15855 if (STACK_REG_P (operands[0])
15856 && ((REG_P (operands[1])
15857 && REGNO (operands[0]) == REGNO (operands[1])
15858 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
15859 || (REG_P (operands[2])
15860 && REGNO (operands[0]) == REGNO (operands[2])
15861 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
15862 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
15863 ; /* ok */
15864 else
15865 gcc_assert (is_sse);
15866 #endif
15867
15868 switch (GET_CODE (operands[3]))
15869 {
15870 case PLUS:
15871 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15872 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15873 p = "fiadd";
15874 else
15875 p = "fadd";
15876 ssep = "vadd";
15877 break;
15878
15879 case MINUS:
15880 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15881 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15882 p = "fisub";
15883 else
15884 p = "fsub";
15885 ssep = "vsub";
15886 break;
15887
15888 case MULT:
15889 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15890 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15891 p = "fimul";
15892 else
15893 p = "fmul";
15894 ssep = "vmul";
15895 break;
15896
15897 case DIV:
15898 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15899 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15900 p = "fidiv";
15901 else
15902 p = "fdiv";
15903 ssep = "vdiv";
15904 break;
15905
15906 default:
15907 gcc_unreachable ();
15908 }
15909
15910 if (is_sse)
15911 {
15912 if (TARGET_AVX)
15913 {
15914 strcpy (buf, ssep);
15915 if (GET_MODE (operands[0]) == SFmode)
15916 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
15917 else
15918 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
15919 }
15920 else
15921 {
15922 strcpy (buf, ssep + 1);
15923 if (GET_MODE (operands[0]) == SFmode)
15924 strcat (buf, "ss\t{%2, %0|%0, %2}");
15925 else
15926 strcat (buf, "sd\t{%2, %0|%0, %2}");
15927 }
15928 return buf;
15929 }
15930 strcpy (buf, p);
15931
15932 switch (GET_CODE (operands[3]))
15933 {
15934 case MULT:
15935 case PLUS:
15936 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
15937 {
15938 rtx temp = operands[2];
15939 operands[2] = operands[1];
15940 operands[1] = temp;
15941 }
15942
15943 /* know operands[0] == operands[1]. */
15944
15945 if (MEM_P (operands[2]))
15946 {
15947 p = "%Z2\t%2";
15948 break;
15949 }
15950
15951 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
15952 {
15953 if (STACK_TOP_P (operands[0]))
15954 /* How is it that we are storing to a dead operand[2]?
15955 Well, presumably operands[1] is dead too. We can't
15956 store the result to st(0) as st(0) gets popped on this
15957 instruction. Instead store to operands[2] (which I
15958 think has to be st(1)). st(1) will be popped later.
15959 gcc <= 2.8.1 didn't have this check and generated
15960 assembly code that the Unixware assembler rejected. */
15961 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15962 else
15963 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15964 break;
15965 }
15966
15967 if (STACK_TOP_P (operands[0]))
15968 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15969 else
15970 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15971 break;
15972
15973 case MINUS:
15974 case DIV:
15975 if (MEM_P (operands[1]))
15976 {
15977 p = "r%Z1\t%1";
15978 break;
15979 }
15980
15981 if (MEM_P (operands[2]))
15982 {
15983 p = "%Z2\t%2";
15984 break;
15985 }
15986
15987 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
15988 {
15989 #if SYSV386_COMPAT
15990 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
15991 derived assemblers, confusingly reverse the direction of
15992 the operation for fsub{r} and fdiv{r} when the
15993 destination register is not st(0). The Intel assembler
15994 doesn't have this brain damage. Read !SYSV386_COMPAT to
15995 figure out what the hardware really does. */
15996 if (STACK_TOP_P (operands[0]))
15997 p = "{p\t%0, %2|rp\t%2, %0}";
15998 else
15999 p = "{rp\t%2, %0|p\t%0, %2}";
16000 #else
16001 if (STACK_TOP_P (operands[0]))
16002 /* As above for fmul/fadd, we can't store to st(0). */
16003 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
16004 else
16005 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
16006 #endif
16007 break;
16008 }
16009
16010 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
16011 {
16012 #if SYSV386_COMPAT
16013 if (STACK_TOP_P (operands[0]))
16014 p = "{rp\t%0, %1|p\t%1, %0}";
16015 else
16016 p = "{p\t%1, %0|rp\t%0, %1}";
16017 #else
16018 if (STACK_TOP_P (operands[0]))
16019 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
16020 else
16021 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
16022 #endif
16023 break;
16024 }
16025
16026 if (STACK_TOP_P (operands[0]))
16027 {
16028 if (STACK_TOP_P (operands[1]))
16029 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
16030 else
16031 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
16032 break;
16033 }
16034 else if (STACK_TOP_P (operands[1]))
16035 {
16036 #if SYSV386_COMPAT
16037 p = "{\t%1, %0|r\t%0, %1}";
16038 #else
16039 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
16040 #endif
16041 }
16042 else
16043 {
16044 #if SYSV386_COMPAT
16045 p = "{r\t%2, %0|\t%0, %2}";
16046 #else
16047 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16048 #endif
16049 }
16050 break;
16051
16052 default:
16053 gcc_unreachable ();
16054 }
16055
16056 strcat (buf, p);
16057 return buf;
16058 }
16059
16060 /* Check if a 256bit AVX register is referenced inside of EXP. */
16061
16062 static int
16063 ix86_check_avx256_register (rtx *pexp, void *)
16064 {
16065 rtx exp = *pexp;
16066
16067 if (GET_CODE (exp) == SUBREG)
16068 exp = SUBREG_REG (exp);
16069
16070 if (REG_P (exp)
16071 && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp)))
16072 return 1;
16073
16074 return 0;
16075 }
16076
16077 /* Return needed mode for entity in optimize_mode_switching pass. */
16078
16079 static int
16080 ix86_avx_u128_mode_needed (rtx_insn *insn)
16081 {
16082 if (CALL_P (insn))
16083 {
16084 rtx link;
16085
16086 /* Needed mode is set to AVX_U128_CLEAN if there are
16087 no 256bit modes used in function arguments. */
16088 for (link = CALL_INSN_FUNCTION_USAGE (insn);
16089 link;
16090 link = XEXP (link, 1))
16091 {
16092 if (GET_CODE (XEXP (link, 0)) == USE)
16093 {
16094 rtx arg = XEXP (XEXP (link, 0), 0);
16095
16096 if (ix86_check_avx256_register (&arg, NULL))
16097 return AVX_U128_DIRTY;
16098 }
16099 }
16100
16101 return AVX_U128_CLEAN;
16102 }
16103
16104 /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware
16105 changes state only when a 256bit register is written to, but we need
16106 to prevent the compiler from moving optimal insertion point above
16107 eventual read from 256bit register. */
16108 if (for_each_rtx (&PATTERN (insn), ix86_check_avx256_register, NULL))
16109 return AVX_U128_DIRTY;
16110
16111 return AVX_U128_ANY;
16112 }
16113
16114 /* Return mode that i387 must be switched into
16115 prior to the execution of insn. */
16116
16117 static int
16118 ix86_i387_mode_needed (int entity, rtx_insn *insn)
16119 {
16120 enum attr_i387_cw mode;
16121
16122 /* The mode UNINITIALIZED is used to store control word after a
16123 function call or ASM pattern. The mode ANY specify that function
16124 has no requirements on the control word and make no changes in the
16125 bits we are interested in. */
16126
16127 if (CALL_P (insn)
16128 || (NONJUMP_INSN_P (insn)
16129 && (asm_noperands (PATTERN (insn)) >= 0
16130 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
16131 return I387_CW_UNINITIALIZED;
16132
16133 if (recog_memoized (insn) < 0)
16134 return I387_CW_ANY;
16135
16136 mode = get_attr_i387_cw (insn);
16137
16138 switch (entity)
16139 {
16140 case I387_TRUNC:
16141 if (mode == I387_CW_TRUNC)
16142 return mode;
16143 break;
16144
16145 case I387_FLOOR:
16146 if (mode == I387_CW_FLOOR)
16147 return mode;
16148 break;
16149
16150 case I387_CEIL:
16151 if (mode == I387_CW_CEIL)
16152 return mode;
16153 break;
16154
16155 case I387_MASK_PM:
16156 if (mode == I387_CW_MASK_PM)
16157 return mode;
16158 break;
16159
16160 default:
16161 gcc_unreachable ();
16162 }
16163
16164 return I387_CW_ANY;
16165 }
16166
16167 /* Return mode that entity must be switched into
16168 prior to the execution of insn. */
16169
16170 static int
16171 ix86_mode_needed (int entity, rtx_insn *insn)
16172 {
16173 switch (entity)
16174 {
16175 case AVX_U128:
16176 return ix86_avx_u128_mode_needed (insn);
16177 case I387_TRUNC:
16178 case I387_FLOOR:
16179 case I387_CEIL:
16180 case I387_MASK_PM:
16181 return ix86_i387_mode_needed (entity, insn);
16182 default:
16183 gcc_unreachable ();
16184 }
16185 return 0;
16186 }
16187
16188 /* Check if a 256bit AVX register is referenced in stores. */
16189
16190 static void
16191 ix86_check_avx256_stores (rtx dest, const_rtx, void *data)
16192 {
16193 if (ix86_check_avx256_register (&dest, NULL))
16194 {
16195 bool *used = (bool *) data;
16196 *used = true;
16197 }
16198 }
16199
16200 /* Calculate mode of upper 128bit AVX registers after the insn. */
16201
16202 static int
16203 ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
16204 {
16205 rtx pat = PATTERN (insn);
16206
16207 if (vzeroupper_operation (pat, VOIDmode)
16208 || vzeroall_operation (pat, VOIDmode))
16209 return AVX_U128_CLEAN;
16210
16211 /* We know that state is clean after CALL insn if there are no
16212 256bit registers used in the function return register. */
16213 if (CALL_P (insn))
16214 {
16215 bool avx_reg256_found = false;
16216 note_stores (pat, ix86_check_avx256_stores, &avx_reg256_found);
16217
16218 return avx_reg256_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
16219 }
16220
16221 /* Otherwise, return current mode. Remember that if insn
16222 references AVX 256bit registers, the mode was already changed
16223 to DIRTY from MODE_NEEDED. */
16224 return mode;
16225 }
16226
16227 /* Return the mode that an insn results in. */
16228
16229 int
16230 ix86_mode_after (int entity, int mode, rtx_insn *insn)
16231 {
16232 switch (entity)
16233 {
16234 case AVX_U128:
16235 return ix86_avx_u128_mode_after (mode, insn);
16236 case I387_TRUNC:
16237 case I387_FLOOR:
16238 case I387_CEIL:
16239 case I387_MASK_PM:
16240 return mode;
16241 default:
16242 gcc_unreachable ();
16243 }
16244 }
16245
16246 static int
16247 ix86_avx_u128_mode_entry (void)
16248 {
16249 tree arg;
16250
16251 /* Entry mode is set to AVX_U128_DIRTY if there are
16252 256bit modes used in function arguments. */
16253 for (arg = DECL_ARGUMENTS (current_function_decl); arg;
16254 arg = TREE_CHAIN (arg))
16255 {
16256 rtx incoming = DECL_INCOMING_RTL (arg);
16257
16258 if (incoming && ix86_check_avx256_register (&incoming, NULL))
16259 return AVX_U128_DIRTY;
16260 }
16261
16262 return AVX_U128_CLEAN;
16263 }
16264
16265 /* Return a mode that ENTITY is assumed to be
16266 switched to at function entry. */
16267
16268 static int
16269 ix86_mode_entry (int entity)
16270 {
16271 switch (entity)
16272 {
16273 case AVX_U128:
16274 return ix86_avx_u128_mode_entry ();
16275 case I387_TRUNC:
16276 case I387_FLOOR:
16277 case I387_CEIL:
16278 case I387_MASK_PM:
16279 return I387_CW_ANY;
16280 default:
16281 gcc_unreachable ();
16282 }
16283 }
16284
16285 static int
16286 ix86_avx_u128_mode_exit (void)
16287 {
16288 rtx reg = crtl->return_rtx;
16289
16290 /* Exit mode is set to AVX_U128_DIRTY if there are
16291 256bit modes used in the function return register. */
16292 if (reg && ix86_check_avx256_register (&reg, NULL))
16293 return AVX_U128_DIRTY;
16294
16295 return AVX_U128_CLEAN;
16296 }
16297
16298 /* Return a mode that ENTITY is assumed to be
16299 switched to at function exit. */
16300
16301 static int
16302 ix86_mode_exit (int entity)
16303 {
16304 switch (entity)
16305 {
16306 case AVX_U128:
16307 return ix86_avx_u128_mode_exit ();
16308 case I387_TRUNC:
16309 case I387_FLOOR:
16310 case I387_CEIL:
16311 case I387_MASK_PM:
16312 return I387_CW_ANY;
16313 default:
16314 gcc_unreachable ();
16315 }
16316 }
16317
16318 static int
16319 ix86_mode_priority (int, int n)
16320 {
16321 return n;
16322 }
16323
16324 /* Output code to initialize control word copies used by trunc?f?i and
16325 rounding patterns. CURRENT_MODE is set to current control word,
16326 while NEW_MODE is set to new control word. */
16327
16328 static void
16329 emit_i387_cw_initialization (int mode)
16330 {
16331 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
16332 rtx new_mode;
16333
16334 enum ix86_stack_slot slot;
16335
16336 rtx reg = gen_reg_rtx (HImode);
16337
16338 emit_insn (gen_x86_fnstcw_1 (stored_mode));
16339 emit_move_insn (reg, copy_rtx (stored_mode));
16340
16341 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
16342 || optimize_insn_for_size_p ())
16343 {
16344 switch (mode)
16345 {
16346 case I387_CW_TRUNC:
16347 /* round toward zero (truncate) */
16348 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
16349 slot = SLOT_CW_TRUNC;
16350 break;
16351
16352 case I387_CW_FLOOR:
16353 /* round down toward -oo */
16354 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16355 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
16356 slot = SLOT_CW_FLOOR;
16357 break;
16358
16359 case I387_CW_CEIL:
16360 /* round up toward +oo */
16361 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16362 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
16363 slot = SLOT_CW_CEIL;
16364 break;
16365
16366 case I387_CW_MASK_PM:
16367 /* mask precision exception for nearbyint() */
16368 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16369 slot = SLOT_CW_MASK_PM;
16370 break;
16371
16372 default:
16373 gcc_unreachable ();
16374 }
16375 }
16376 else
16377 {
16378 switch (mode)
16379 {
16380 case I387_CW_TRUNC:
16381 /* round toward zero (truncate) */
16382 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
16383 slot = SLOT_CW_TRUNC;
16384 break;
16385
16386 case I387_CW_FLOOR:
16387 /* round down toward -oo */
16388 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
16389 slot = SLOT_CW_FLOOR;
16390 break;
16391
16392 case I387_CW_CEIL:
16393 /* round up toward +oo */
16394 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
16395 slot = SLOT_CW_CEIL;
16396 break;
16397
16398 case I387_CW_MASK_PM:
16399 /* mask precision exception for nearbyint() */
16400 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16401 slot = SLOT_CW_MASK_PM;
16402 break;
16403
16404 default:
16405 gcc_unreachable ();
16406 }
16407 }
16408
16409 gcc_assert (slot < MAX_386_STACK_LOCALS);
16410
16411 new_mode = assign_386_stack_local (HImode, slot);
16412 emit_move_insn (new_mode, reg);
16413 }
16414
16415 /* Emit vzeroupper. */
16416
16417 void
16418 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live)
16419 {
16420 int i;
16421
16422 /* Cancel automatic vzeroupper insertion if there are
16423 live call-saved SSE registers at the insertion point. */
16424
16425 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
16426 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16427 return;
16428
16429 if (TARGET_64BIT)
16430 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
16431 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16432 return;
16433
16434 emit_insn (gen_avx_vzeroupper ());
16435 }
16436
16437 /* Generate one or more insns to set ENTITY to MODE. */
16438
16439 /* Generate one or more insns to set ENTITY to MODE. HARD_REG_LIVE
16440 is the set of hard registers live at the point where the insn(s)
16441 are to be inserted. */
16442
16443 static void
16444 ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED,
16445 HARD_REG_SET regs_live)
16446 {
16447 switch (entity)
16448 {
16449 case AVX_U128:
16450 if (mode == AVX_U128_CLEAN)
16451 ix86_avx_emit_vzeroupper (regs_live);
16452 break;
16453 case I387_TRUNC:
16454 case I387_FLOOR:
16455 case I387_CEIL:
16456 case I387_MASK_PM:
16457 if (mode != I387_CW_ANY
16458 && mode != I387_CW_UNINITIALIZED)
16459 emit_i387_cw_initialization (mode);
16460 break;
16461 default:
16462 gcc_unreachable ();
16463 }
16464 }
16465
16466 /* Output code for INSN to convert a float to a signed int. OPERANDS
16467 are the insn operands. The output may be [HSD]Imode and the input
16468 operand may be [SDX]Fmode. */
16469
16470 const char *
16471 output_fix_trunc (rtx_insn *insn, rtx *operands, bool fisttp)
16472 {
16473 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
16474 int dimode_p = GET_MODE (operands[0]) == DImode;
16475 int round_mode = get_attr_i387_cw (insn);
16476
16477 /* Jump through a hoop or two for DImode, since the hardware has no
16478 non-popping instruction. We used to do this a different way, but
16479 that was somewhat fragile and broke with post-reload splitters. */
16480 if ((dimode_p || fisttp) && !stack_top_dies)
16481 output_asm_insn ("fld\t%y1", operands);
16482
16483 gcc_assert (STACK_TOP_P (operands[1]));
16484 gcc_assert (MEM_P (operands[0]));
16485 gcc_assert (GET_MODE (operands[1]) != TFmode);
16486
16487 if (fisttp)
16488 output_asm_insn ("fisttp%Z0\t%0", operands);
16489 else
16490 {
16491 if (round_mode != I387_CW_ANY)
16492 output_asm_insn ("fldcw\t%3", operands);
16493 if (stack_top_dies || dimode_p)
16494 output_asm_insn ("fistp%Z0\t%0", operands);
16495 else
16496 output_asm_insn ("fist%Z0\t%0", operands);
16497 if (round_mode != I387_CW_ANY)
16498 output_asm_insn ("fldcw\t%2", operands);
16499 }
16500
16501 return "";
16502 }
16503
16504 /* Output code for x87 ffreep insn. The OPNO argument, which may only
16505 have the values zero or one, indicates the ffreep insn's operand
16506 from the OPERANDS array. */
16507
16508 static const char *
16509 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
16510 {
16511 if (TARGET_USE_FFREEP)
16512 #ifdef HAVE_AS_IX86_FFREEP
16513 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
16514 #else
16515 {
16516 static char retval[32];
16517 int regno = REGNO (operands[opno]);
16518
16519 gcc_assert (STACK_REGNO_P (regno));
16520
16521 regno -= FIRST_STACK_REG;
16522
16523 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
16524 return retval;
16525 }
16526 #endif
16527
16528 return opno ? "fstp\t%y1" : "fstp\t%y0";
16529 }
16530
16531
16532 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
16533 should be used. UNORDERED_P is true when fucom should be used. */
16534
16535 const char *
16536 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
16537 {
16538 int stack_top_dies;
16539 rtx cmp_op0, cmp_op1;
16540 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
16541
16542 if (eflags_p)
16543 {
16544 cmp_op0 = operands[0];
16545 cmp_op1 = operands[1];
16546 }
16547 else
16548 {
16549 cmp_op0 = operands[1];
16550 cmp_op1 = operands[2];
16551 }
16552
16553 if (is_sse)
16554 {
16555 if (GET_MODE (operands[0]) == SFmode)
16556 if (unordered_p)
16557 return "%vucomiss\t{%1, %0|%0, %1}";
16558 else
16559 return "%vcomiss\t{%1, %0|%0, %1}";
16560 else
16561 if (unordered_p)
16562 return "%vucomisd\t{%1, %0|%0, %1}";
16563 else
16564 return "%vcomisd\t{%1, %0|%0, %1}";
16565 }
16566
16567 gcc_assert (STACK_TOP_P (cmp_op0));
16568
16569 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
16570
16571 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
16572 {
16573 if (stack_top_dies)
16574 {
16575 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
16576 return output_387_ffreep (operands, 1);
16577 }
16578 else
16579 return "ftst\n\tfnstsw\t%0";
16580 }
16581
16582 if (STACK_REG_P (cmp_op1)
16583 && stack_top_dies
16584 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
16585 && REGNO (cmp_op1) != FIRST_STACK_REG)
16586 {
16587 /* If both the top of the 387 stack dies, and the other operand
16588 is also a stack register that dies, then this must be a
16589 `fcompp' float compare */
16590
16591 if (eflags_p)
16592 {
16593 /* There is no double popping fcomi variant. Fortunately,
16594 eflags is immune from the fstp's cc clobbering. */
16595 if (unordered_p)
16596 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
16597 else
16598 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
16599 return output_387_ffreep (operands, 0);
16600 }
16601 else
16602 {
16603 if (unordered_p)
16604 return "fucompp\n\tfnstsw\t%0";
16605 else
16606 return "fcompp\n\tfnstsw\t%0";
16607 }
16608 }
16609 else
16610 {
16611 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
16612
16613 static const char * const alt[16] =
16614 {
16615 "fcom%Z2\t%y2\n\tfnstsw\t%0",
16616 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
16617 "fucom%Z2\t%y2\n\tfnstsw\t%0",
16618 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
16619
16620 "ficom%Z2\t%y2\n\tfnstsw\t%0",
16621 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
16622 NULL,
16623 NULL,
16624
16625 "fcomi\t{%y1, %0|%0, %y1}",
16626 "fcomip\t{%y1, %0|%0, %y1}",
16627 "fucomi\t{%y1, %0|%0, %y1}",
16628 "fucomip\t{%y1, %0|%0, %y1}",
16629
16630 NULL,
16631 NULL,
16632 NULL,
16633 NULL
16634 };
16635
16636 int mask;
16637 const char *ret;
16638
16639 mask = eflags_p << 3;
16640 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
16641 mask |= unordered_p << 1;
16642 mask |= stack_top_dies;
16643
16644 gcc_assert (mask < 16);
16645 ret = alt[mask];
16646 gcc_assert (ret);
16647
16648 return ret;
16649 }
16650 }
16651
16652 void
16653 ix86_output_addr_vec_elt (FILE *file, int value)
16654 {
16655 const char *directive = ASM_LONG;
16656
16657 #ifdef ASM_QUAD
16658 if (TARGET_LP64)
16659 directive = ASM_QUAD;
16660 #else
16661 gcc_assert (!TARGET_64BIT);
16662 #endif
16663
16664 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
16665 }
16666
16667 void
16668 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
16669 {
16670 const char *directive = ASM_LONG;
16671
16672 #ifdef ASM_QUAD
16673 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
16674 directive = ASM_QUAD;
16675 #else
16676 gcc_assert (!TARGET_64BIT);
16677 #endif
16678 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
16679 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
16680 fprintf (file, "%s%s%d-%s%d\n",
16681 directive, LPREFIX, value, LPREFIX, rel);
16682 else if (HAVE_AS_GOTOFF_IN_DATA)
16683 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
16684 #if TARGET_MACHO
16685 else if (TARGET_MACHO)
16686 {
16687 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
16688 machopic_output_function_base_name (file);
16689 putc ('\n', file);
16690 }
16691 #endif
16692 else
16693 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
16694 GOT_SYMBOL_NAME, LPREFIX, value);
16695 }
16696 \f
16697 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
16698 for the target. */
16699
16700 void
16701 ix86_expand_clear (rtx dest)
16702 {
16703 rtx tmp;
16704
16705 /* We play register width games, which are only valid after reload. */
16706 gcc_assert (reload_completed);
16707
16708 /* Avoid HImode and its attendant prefix byte. */
16709 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
16710 dest = gen_rtx_REG (SImode, REGNO (dest));
16711 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
16712
16713 if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
16714 {
16715 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16716 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
16717 }
16718
16719 emit_insn (tmp);
16720 }
16721
16722 /* X is an unchanging MEM. If it is a constant pool reference, return
16723 the constant pool rtx, else NULL. */
16724
16725 rtx
16726 maybe_get_pool_constant (rtx x)
16727 {
16728 x = ix86_delegitimize_address (XEXP (x, 0));
16729
16730 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
16731 return get_pool_constant (x);
16732
16733 return NULL_RTX;
16734 }
16735
16736 void
16737 ix86_expand_move (enum machine_mode mode, rtx operands[])
16738 {
16739 rtx op0, op1;
16740 enum tls_model model;
16741
16742 op0 = operands[0];
16743 op1 = operands[1];
16744
16745 if (GET_CODE (op1) == SYMBOL_REF)
16746 {
16747 rtx tmp;
16748
16749 model = SYMBOL_REF_TLS_MODEL (op1);
16750 if (model)
16751 {
16752 op1 = legitimize_tls_address (op1, model, true);
16753 op1 = force_operand (op1, op0);
16754 if (op1 == op0)
16755 return;
16756 op1 = convert_to_mode (mode, op1, 1);
16757 }
16758 else if ((tmp = legitimize_pe_coff_symbol (op1, false)) != NULL_RTX)
16759 op1 = tmp;
16760 }
16761 else if (GET_CODE (op1) == CONST
16762 && GET_CODE (XEXP (op1, 0)) == PLUS
16763 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
16764 {
16765 rtx addend = XEXP (XEXP (op1, 0), 1);
16766 rtx symbol = XEXP (XEXP (op1, 0), 0);
16767 rtx tmp;
16768
16769 model = SYMBOL_REF_TLS_MODEL (symbol);
16770 if (model)
16771 tmp = legitimize_tls_address (symbol, model, true);
16772 else
16773 tmp = legitimize_pe_coff_symbol (symbol, true);
16774
16775 if (tmp)
16776 {
16777 tmp = force_operand (tmp, NULL);
16778 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
16779 op0, 1, OPTAB_DIRECT);
16780 if (tmp == op0)
16781 return;
16782 op1 = convert_to_mode (mode, tmp, 1);
16783 }
16784 }
16785
16786 if ((flag_pic || MACHOPIC_INDIRECT)
16787 && symbolic_operand (op1, mode))
16788 {
16789 if (TARGET_MACHO && !TARGET_64BIT)
16790 {
16791 #if TARGET_MACHO
16792 /* dynamic-no-pic */
16793 if (MACHOPIC_INDIRECT)
16794 {
16795 rtx temp = ((reload_in_progress
16796 || ((op0 && REG_P (op0))
16797 && mode == Pmode))
16798 ? op0 : gen_reg_rtx (Pmode));
16799 op1 = machopic_indirect_data_reference (op1, temp);
16800 if (MACHOPIC_PURE)
16801 op1 = machopic_legitimize_pic_address (op1, mode,
16802 temp == op1 ? 0 : temp);
16803 }
16804 if (op0 != op1 && GET_CODE (op0) != MEM)
16805 {
16806 rtx insn = gen_rtx_SET (VOIDmode, op0, op1);
16807 emit_insn (insn);
16808 return;
16809 }
16810 if (GET_CODE (op0) == MEM)
16811 op1 = force_reg (Pmode, op1);
16812 else
16813 {
16814 rtx temp = op0;
16815 if (GET_CODE (temp) != REG)
16816 temp = gen_reg_rtx (Pmode);
16817 temp = legitimize_pic_address (op1, temp);
16818 if (temp == op0)
16819 return;
16820 op1 = temp;
16821 }
16822 /* dynamic-no-pic */
16823 #endif
16824 }
16825 else
16826 {
16827 if (MEM_P (op0))
16828 op1 = force_reg (mode, op1);
16829 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
16830 {
16831 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
16832 op1 = legitimize_pic_address (op1, reg);
16833 if (op0 == op1)
16834 return;
16835 op1 = convert_to_mode (mode, op1, 1);
16836 }
16837 }
16838 }
16839 else
16840 {
16841 if (MEM_P (op0)
16842 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
16843 || !push_operand (op0, mode))
16844 && MEM_P (op1))
16845 op1 = force_reg (mode, op1);
16846
16847 if (push_operand (op0, mode)
16848 && ! general_no_elim_operand (op1, mode))
16849 op1 = copy_to_mode_reg (mode, op1);
16850
16851 /* Force large constants in 64bit compilation into register
16852 to get them CSEed. */
16853 if (can_create_pseudo_p ()
16854 && (mode == DImode) && TARGET_64BIT
16855 && immediate_operand (op1, mode)
16856 && !x86_64_zext_immediate_operand (op1, VOIDmode)
16857 && !register_operand (op0, mode)
16858 && optimize)
16859 op1 = copy_to_mode_reg (mode, op1);
16860
16861 if (can_create_pseudo_p ()
16862 && FLOAT_MODE_P (mode)
16863 && GET_CODE (op1) == CONST_DOUBLE)
16864 {
16865 /* If we are loading a floating point constant to a register,
16866 force the value to memory now, since we'll get better code
16867 out the back end. */
16868
16869 op1 = validize_mem (force_const_mem (mode, op1));
16870 if (!register_operand (op0, mode))
16871 {
16872 rtx temp = gen_reg_rtx (mode);
16873 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
16874 emit_move_insn (op0, temp);
16875 return;
16876 }
16877 }
16878 }
16879
16880 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
16881 }
16882
16883 void
16884 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
16885 {
16886 rtx op0 = operands[0], op1 = operands[1];
16887 unsigned int align = GET_MODE_ALIGNMENT (mode);
16888
16889 if (push_operand (op0, VOIDmode))
16890 op0 = emit_move_resolve_push (mode, op0);
16891
16892 /* Force constants other than zero into memory. We do not know how
16893 the instructions used to build constants modify the upper 64 bits
16894 of the register, once we have that information we may be able
16895 to handle some of them more efficiently. */
16896 if (can_create_pseudo_p ()
16897 && register_operand (op0, mode)
16898 && (CONSTANT_P (op1)
16899 || (GET_CODE (op1) == SUBREG
16900 && CONSTANT_P (SUBREG_REG (op1))))
16901 && !standard_sse_constant_p (op1))
16902 op1 = validize_mem (force_const_mem (mode, op1));
16903
16904 /* We need to check memory alignment for SSE mode since attribute
16905 can make operands unaligned. */
16906 if (can_create_pseudo_p ()
16907 && SSE_REG_MODE_P (mode)
16908 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
16909 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
16910 {
16911 rtx tmp[2];
16912
16913 /* ix86_expand_vector_move_misalign() does not like constants ... */
16914 if (CONSTANT_P (op1)
16915 || (GET_CODE (op1) == SUBREG
16916 && CONSTANT_P (SUBREG_REG (op1))))
16917 op1 = validize_mem (force_const_mem (mode, op1));
16918
16919 /* ... nor both arguments in memory. */
16920 if (!register_operand (op0, mode)
16921 && !register_operand (op1, mode))
16922 op1 = force_reg (mode, op1);
16923
16924 tmp[0] = op0; tmp[1] = op1;
16925 ix86_expand_vector_move_misalign (mode, tmp);
16926 return;
16927 }
16928
16929 /* Make operand1 a register if it isn't already. */
16930 if (can_create_pseudo_p ()
16931 && !register_operand (op0, mode)
16932 && !register_operand (op1, mode))
16933 {
16934 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
16935 return;
16936 }
16937
16938 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
16939 }
16940
16941 /* Split 32-byte AVX unaligned load and store if needed. */
16942
16943 static void
16944 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
16945 {
16946 rtx m;
16947 rtx (*extract) (rtx, rtx, rtx);
16948 rtx (*load_unaligned) (rtx, rtx);
16949 rtx (*store_unaligned) (rtx, rtx);
16950 enum machine_mode mode;
16951
16952 switch (GET_MODE (op0))
16953 {
16954 default:
16955 gcc_unreachable ();
16956 case V32QImode:
16957 extract = gen_avx_vextractf128v32qi;
16958 load_unaligned = gen_avx_loaddquv32qi;
16959 store_unaligned = gen_avx_storedquv32qi;
16960 mode = V16QImode;
16961 break;
16962 case V8SFmode:
16963 extract = gen_avx_vextractf128v8sf;
16964 load_unaligned = gen_avx_loadups256;
16965 store_unaligned = gen_avx_storeups256;
16966 mode = V4SFmode;
16967 break;
16968 case V4DFmode:
16969 extract = gen_avx_vextractf128v4df;
16970 load_unaligned = gen_avx_loadupd256;
16971 store_unaligned = gen_avx_storeupd256;
16972 mode = V2DFmode;
16973 break;
16974 }
16975
16976 if (MEM_P (op1))
16977 {
16978 if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
16979 {
16980 rtx r = gen_reg_rtx (mode);
16981 m = adjust_address (op1, mode, 0);
16982 emit_move_insn (r, m);
16983 m = adjust_address (op1, mode, 16);
16984 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
16985 emit_move_insn (op0, r);
16986 }
16987 /* Normal *mov<mode>_internal pattern will handle
16988 unaligned loads just fine if misaligned_operand
16989 is true, and without the UNSPEC it can be combined
16990 with arithmetic instructions. */
16991 else if (misaligned_operand (op1, GET_MODE (op1)))
16992 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
16993 else
16994 emit_insn (load_unaligned (op0, op1));
16995 }
16996 else if (MEM_P (op0))
16997 {
16998 if (TARGET_AVX256_SPLIT_UNALIGNED_STORE)
16999 {
17000 m = adjust_address (op0, mode, 0);
17001 emit_insn (extract (m, op1, const0_rtx));
17002 m = adjust_address (op0, mode, 16);
17003 emit_insn (extract (m, op1, const1_rtx));
17004 }
17005 else
17006 emit_insn (store_unaligned (op0, op1));
17007 }
17008 else
17009 gcc_unreachable ();
17010 }
17011
17012 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
17013 straight to ix86_expand_vector_move. */
17014 /* Code generation for scalar reg-reg moves of single and double precision data:
17015 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
17016 movaps reg, reg
17017 else
17018 movss reg, reg
17019 if (x86_sse_partial_reg_dependency == true)
17020 movapd reg, reg
17021 else
17022 movsd reg, reg
17023
17024 Code generation for scalar loads of double precision data:
17025 if (x86_sse_split_regs == true)
17026 movlpd mem, reg (gas syntax)
17027 else
17028 movsd mem, reg
17029
17030 Code generation for unaligned packed loads of single precision data
17031 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
17032 if (x86_sse_unaligned_move_optimal)
17033 movups mem, reg
17034
17035 if (x86_sse_partial_reg_dependency == true)
17036 {
17037 xorps reg, reg
17038 movlps mem, reg
17039 movhps mem+8, reg
17040 }
17041 else
17042 {
17043 movlps mem, reg
17044 movhps mem+8, reg
17045 }
17046
17047 Code generation for unaligned packed loads of double precision data
17048 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
17049 if (x86_sse_unaligned_move_optimal)
17050 movupd mem, reg
17051
17052 if (x86_sse_split_regs == true)
17053 {
17054 movlpd mem, reg
17055 movhpd mem+8, reg
17056 }
17057 else
17058 {
17059 movsd mem, reg
17060 movhpd mem+8, reg
17061 }
17062 */
17063
17064 void
17065 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
17066 {
17067 rtx op0, op1, orig_op0 = NULL_RTX, m;
17068 rtx (*load_unaligned) (rtx, rtx);
17069 rtx (*store_unaligned) (rtx, rtx);
17070
17071 op0 = operands[0];
17072 op1 = operands[1];
17073
17074 if (GET_MODE_SIZE (mode) == 64)
17075 {
17076 switch (GET_MODE_CLASS (mode))
17077 {
17078 case MODE_VECTOR_INT:
17079 case MODE_INT:
17080 if (GET_MODE (op0) != V16SImode)
17081 {
17082 if (!MEM_P (op0))
17083 {
17084 orig_op0 = op0;
17085 op0 = gen_reg_rtx (V16SImode);
17086 }
17087 else
17088 op0 = gen_lowpart (V16SImode, op0);
17089 }
17090 op1 = gen_lowpart (V16SImode, op1);
17091 /* FALLTHRU */
17092
17093 case MODE_VECTOR_FLOAT:
17094 switch (GET_MODE (op0))
17095 {
17096 default:
17097 gcc_unreachable ();
17098 case V16SImode:
17099 load_unaligned = gen_avx512f_loaddquv16si;
17100 store_unaligned = gen_avx512f_storedquv16si;
17101 break;
17102 case V16SFmode:
17103 load_unaligned = gen_avx512f_loadups512;
17104 store_unaligned = gen_avx512f_storeups512;
17105 break;
17106 case V8DFmode:
17107 load_unaligned = gen_avx512f_loadupd512;
17108 store_unaligned = gen_avx512f_storeupd512;
17109 break;
17110 }
17111
17112 if (MEM_P (op1))
17113 emit_insn (load_unaligned (op0, op1));
17114 else if (MEM_P (op0))
17115 emit_insn (store_unaligned (op0, op1));
17116 else
17117 gcc_unreachable ();
17118 if (orig_op0)
17119 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17120 break;
17121
17122 default:
17123 gcc_unreachable ();
17124 }
17125
17126 return;
17127 }
17128
17129 if (TARGET_AVX
17130 && GET_MODE_SIZE (mode) == 32)
17131 {
17132 switch (GET_MODE_CLASS (mode))
17133 {
17134 case MODE_VECTOR_INT:
17135 case MODE_INT:
17136 if (GET_MODE (op0) != V32QImode)
17137 {
17138 if (!MEM_P (op0))
17139 {
17140 orig_op0 = op0;
17141 op0 = gen_reg_rtx (V32QImode);
17142 }
17143 else
17144 op0 = gen_lowpart (V32QImode, op0);
17145 }
17146 op1 = gen_lowpart (V32QImode, op1);
17147 /* FALLTHRU */
17148
17149 case MODE_VECTOR_FLOAT:
17150 ix86_avx256_split_vector_move_misalign (op0, op1);
17151 if (orig_op0)
17152 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17153 break;
17154
17155 default:
17156 gcc_unreachable ();
17157 }
17158
17159 return;
17160 }
17161
17162 if (MEM_P (op1))
17163 {
17164 /* Normal *mov<mode>_internal pattern will handle
17165 unaligned loads just fine if misaligned_operand
17166 is true, and without the UNSPEC it can be combined
17167 with arithmetic instructions. */
17168 if (TARGET_AVX
17169 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
17170 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
17171 && misaligned_operand (op1, GET_MODE (op1)))
17172 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17173 /* ??? If we have typed data, then it would appear that using
17174 movdqu is the only way to get unaligned data loaded with
17175 integer type. */
17176 else if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17177 {
17178 if (GET_MODE (op0) != V16QImode)
17179 {
17180 orig_op0 = op0;
17181 op0 = gen_reg_rtx (V16QImode);
17182 }
17183 op1 = gen_lowpart (V16QImode, op1);
17184 /* We will eventually emit movups based on insn attributes. */
17185 emit_insn (gen_sse2_loaddquv16qi (op0, op1));
17186 if (orig_op0)
17187 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17188 }
17189 else if (TARGET_SSE2 && mode == V2DFmode)
17190 {
17191 rtx zero;
17192
17193 if (TARGET_AVX
17194 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17195 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17196 || optimize_insn_for_size_p ())
17197 {
17198 /* We will eventually emit movups based on insn attributes. */
17199 emit_insn (gen_sse2_loadupd (op0, op1));
17200 return;
17201 }
17202
17203 /* When SSE registers are split into halves, we can avoid
17204 writing to the top half twice. */
17205 if (TARGET_SSE_SPLIT_REGS)
17206 {
17207 emit_clobber (op0);
17208 zero = op0;
17209 }
17210 else
17211 {
17212 /* ??? Not sure about the best option for the Intel chips.
17213 The following would seem to satisfy; the register is
17214 entirely cleared, breaking the dependency chain. We
17215 then store to the upper half, with a dependency depth
17216 of one. A rumor has it that Intel recommends two movsd
17217 followed by an unpacklpd, but this is unconfirmed. And
17218 given that the dependency depth of the unpacklpd would
17219 still be one, I'm not sure why this would be better. */
17220 zero = CONST0_RTX (V2DFmode);
17221 }
17222
17223 m = adjust_address (op1, DFmode, 0);
17224 emit_insn (gen_sse2_loadlpd (op0, zero, m));
17225 m = adjust_address (op1, DFmode, 8);
17226 emit_insn (gen_sse2_loadhpd (op0, op0, m));
17227 }
17228 else
17229 {
17230 rtx t;
17231
17232 if (TARGET_AVX
17233 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17234 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17235 || optimize_insn_for_size_p ())
17236 {
17237 if (GET_MODE (op0) != V4SFmode)
17238 {
17239 orig_op0 = op0;
17240 op0 = gen_reg_rtx (V4SFmode);
17241 }
17242 op1 = gen_lowpart (V4SFmode, op1);
17243 emit_insn (gen_sse_loadups (op0, op1));
17244 if (orig_op0)
17245 emit_move_insn (orig_op0,
17246 gen_lowpart (GET_MODE (orig_op0), op0));
17247 return;
17248 }
17249
17250 if (mode != V4SFmode)
17251 t = gen_reg_rtx (V4SFmode);
17252 else
17253 t = op0;
17254
17255 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
17256 emit_move_insn (t, CONST0_RTX (V4SFmode));
17257 else
17258 emit_clobber (t);
17259
17260 m = adjust_address (op1, V2SFmode, 0);
17261 emit_insn (gen_sse_loadlps (t, t, m));
17262 m = adjust_address (op1, V2SFmode, 8);
17263 emit_insn (gen_sse_loadhps (t, t, m));
17264 if (mode != V4SFmode)
17265 emit_move_insn (op0, gen_lowpart (mode, t));
17266 }
17267 }
17268 else if (MEM_P (op0))
17269 {
17270 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17271 {
17272 op0 = gen_lowpart (V16QImode, op0);
17273 op1 = gen_lowpart (V16QImode, op1);
17274 /* We will eventually emit movups based on insn attributes. */
17275 emit_insn (gen_sse2_storedquv16qi (op0, op1));
17276 }
17277 else if (TARGET_SSE2 && mode == V2DFmode)
17278 {
17279 if (TARGET_AVX
17280 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17281 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17282 || optimize_insn_for_size_p ())
17283 /* We will eventually emit movups based on insn attributes. */
17284 emit_insn (gen_sse2_storeupd (op0, op1));
17285 else
17286 {
17287 m = adjust_address (op0, DFmode, 0);
17288 emit_insn (gen_sse2_storelpd (m, op1));
17289 m = adjust_address (op0, DFmode, 8);
17290 emit_insn (gen_sse2_storehpd (m, op1));
17291 }
17292 }
17293 else
17294 {
17295 if (mode != V4SFmode)
17296 op1 = gen_lowpart (V4SFmode, op1);
17297
17298 if (TARGET_AVX
17299 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17300 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17301 || optimize_insn_for_size_p ())
17302 {
17303 op0 = gen_lowpart (V4SFmode, op0);
17304 emit_insn (gen_sse_storeups (op0, op1));
17305 }
17306 else
17307 {
17308 m = adjust_address (op0, V2SFmode, 0);
17309 emit_insn (gen_sse_storelps (m, op1));
17310 m = adjust_address (op0, V2SFmode, 8);
17311 emit_insn (gen_sse_storehps (m, op1));
17312 }
17313 }
17314 }
17315 else
17316 gcc_unreachable ();
17317 }
17318
17319 /* Helper function of ix86_fixup_binary_operands to canonicalize
17320 operand order. Returns true if the operands should be swapped. */
17321
17322 static bool
17323 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
17324 rtx operands[])
17325 {
17326 rtx dst = operands[0];
17327 rtx src1 = operands[1];
17328 rtx src2 = operands[2];
17329
17330 /* If the operation is not commutative, we can't do anything. */
17331 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
17332 return false;
17333
17334 /* Highest priority is that src1 should match dst. */
17335 if (rtx_equal_p (dst, src1))
17336 return false;
17337 if (rtx_equal_p (dst, src2))
17338 return true;
17339
17340 /* Next highest priority is that immediate constants come second. */
17341 if (immediate_operand (src2, mode))
17342 return false;
17343 if (immediate_operand (src1, mode))
17344 return true;
17345
17346 /* Lowest priority is that memory references should come second. */
17347 if (MEM_P (src2))
17348 return false;
17349 if (MEM_P (src1))
17350 return true;
17351
17352 return false;
17353 }
17354
17355
17356 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
17357 destination to use for the operation. If different from the true
17358 destination in operands[0], a copy operation will be required. */
17359
17360 rtx
17361 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
17362 rtx operands[])
17363 {
17364 rtx dst = operands[0];
17365 rtx src1 = operands[1];
17366 rtx src2 = operands[2];
17367
17368 /* Canonicalize operand order. */
17369 if (ix86_swap_binary_operands_p (code, mode, operands))
17370 {
17371 rtx temp;
17372
17373 /* It is invalid to swap operands of different modes. */
17374 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
17375
17376 temp = src1;
17377 src1 = src2;
17378 src2 = temp;
17379 }
17380
17381 /* Both source operands cannot be in memory. */
17382 if (MEM_P (src1) && MEM_P (src2))
17383 {
17384 /* Optimization: Only read from memory once. */
17385 if (rtx_equal_p (src1, src2))
17386 {
17387 src2 = force_reg (mode, src2);
17388 src1 = src2;
17389 }
17390 else if (rtx_equal_p (dst, src1))
17391 src2 = force_reg (mode, src2);
17392 else
17393 src1 = force_reg (mode, src1);
17394 }
17395
17396 /* If the destination is memory, and we do not have matching source
17397 operands, do things in registers. */
17398 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
17399 dst = gen_reg_rtx (mode);
17400
17401 /* Source 1 cannot be a constant. */
17402 if (CONSTANT_P (src1))
17403 src1 = force_reg (mode, src1);
17404
17405 /* Source 1 cannot be a non-matching memory. */
17406 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
17407 src1 = force_reg (mode, src1);
17408
17409 /* Improve address combine. */
17410 if (code == PLUS
17411 && GET_MODE_CLASS (mode) == MODE_INT
17412 && MEM_P (src2))
17413 src2 = force_reg (mode, src2);
17414
17415 operands[1] = src1;
17416 operands[2] = src2;
17417 return dst;
17418 }
17419
17420 /* Similarly, but assume that the destination has already been
17421 set up properly. */
17422
17423 void
17424 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
17425 enum machine_mode mode, rtx operands[])
17426 {
17427 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
17428 gcc_assert (dst == operands[0]);
17429 }
17430
17431 /* Attempt to expand a binary operator. Make the expansion closer to the
17432 actual machine, then just general_operand, which will allow 3 separate
17433 memory references (one output, two input) in a single insn. */
17434
17435 void
17436 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
17437 rtx operands[])
17438 {
17439 rtx src1, src2, dst, op, clob;
17440
17441 dst = ix86_fixup_binary_operands (code, mode, operands);
17442 src1 = operands[1];
17443 src2 = operands[2];
17444
17445 /* Emit the instruction. */
17446
17447 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
17448 if (reload_in_progress)
17449 {
17450 /* Reload doesn't know about the flags register, and doesn't know that
17451 it doesn't want to clobber it. We can only do this with PLUS. */
17452 gcc_assert (code == PLUS);
17453 emit_insn (op);
17454 }
17455 else if (reload_completed
17456 && code == PLUS
17457 && !rtx_equal_p (dst, src1))
17458 {
17459 /* This is going to be an LEA; avoid splitting it later. */
17460 emit_insn (op);
17461 }
17462 else
17463 {
17464 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17465 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
17466 }
17467
17468 /* Fix up the destination if needed. */
17469 if (dst != operands[0])
17470 emit_move_insn (operands[0], dst);
17471 }
17472
17473 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
17474 the given OPERANDS. */
17475
17476 void
17477 ix86_expand_vector_logical_operator (enum rtx_code code, enum machine_mode mode,
17478 rtx operands[])
17479 {
17480 rtx op1 = NULL_RTX, op2 = NULL_RTX;
17481 if (GET_CODE (operands[1]) == SUBREG)
17482 {
17483 op1 = operands[1];
17484 op2 = operands[2];
17485 }
17486 else if (GET_CODE (operands[2]) == SUBREG)
17487 {
17488 op1 = operands[2];
17489 op2 = operands[1];
17490 }
17491 /* Optimize (__m128i) d | (__m128i) e and similar code
17492 when d and e are float vectors into float vector logical
17493 insn. In C/C++ without using intrinsics there is no other way
17494 to express vector logical operation on float vectors than
17495 to cast them temporarily to integer vectors. */
17496 if (op1
17497 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17498 && ((GET_CODE (op2) == SUBREG || GET_CODE (op2) == CONST_VECTOR))
17499 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1))) == MODE_VECTOR_FLOAT
17500 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1))) == GET_MODE_SIZE (mode)
17501 && SUBREG_BYTE (op1) == 0
17502 && (GET_CODE (op2) == CONST_VECTOR
17503 || (GET_MODE (SUBREG_REG (op1)) == GET_MODE (SUBREG_REG (op2))
17504 && SUBREG_BYTE (op2) == 0))
17505 && can_create_pseudo_p ())
17506 {
17507 rtx dst;
17508 switch (GET_MODE (SUBREG_REG (op1)))
17509 {
17510 case V4SFmode:
17511 case V8SFmode:
17512 case V2DFmode:
17513 case V4DFmode:
17514 dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1)));
17515 if (GET_CODE (op2) == CONST_VECTOR)
17516 {
17517 op2 = gen_lowpart (GET_MODE (dst), op2);
17518 op2 = force_reg (GET_MODE (dst), op2);
17519 }
17520 else
17521 {
17522 op1 = operands[1];
17523 op2 = SUBREG_REG (operands[2]);
17524 if (!nonimmediate_operand (op2, GET_MODE (dst)))
17525 op2 = force_reg (GET_MODE (dst), op2);
17526 }
17527 op1 = SUBREG_REG (op1);
17528 if (!nonimmediate_operand (op1, GET_MODE (dst)))
17529 op1 = force_reg (GET_MODE (dst), op1);
17530 emit_insn (gen_rtx_SET (VOIDmode, dst,
17531 gen_rtx_fmt_ee (code, GET_MODE (dst),
17532 op1, op2)));
17533 emit_move_insn (operands[0], gen_lowpart (mode, dst));
17534 return;
17535 default:
17536 break;
17537 }
17538 }
17539 if (!nonimmediate_operand (operands[1], mode))
17540 operands[1] = force_reg (mode, operands[1]);
17541 if (!nonimmediate_operand (operands[2], mode))
17542 operands[2] = force_reg (mode, operands[2]);
17543 ix86_fixup_binary_operands_no_copy (code, mode, operands);
17544 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
17545 gen_rtx_fmt_ee (code, mode, operands[1],
17546 operands[2])));
17547 }
17548
17549 /* Return TRUE or FALSE depending on whether the binary operator meets the
17550 appropriate constraints. */
17551
17552 bool
17553 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
17554 rtx operands[3])
17555 {
17556 rtx dst = operands[0];
17557 rtx src1 = operands[1];
17558 rtx src2 = operands[2];
17559
17560 /* Both source operands cannot be in memory. */
17561 if (MEM_P (src1) && MEM_P (src2))
17562 return false;
17563
17564 /* Canonicalize operand order for commutative operators. */
17565 if (ix86_swap_binary_operands_p (code, mode, operands))
17566 {
17567 rtx temp = src1;
17568 src1 = src2;
17569 src2 = temp;
17570 }
17571
17572 /* If the destination is memory, we must have a matching source operand. */
17573 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
17574 return false;
17575
17576 /* Source 1 cannot be a constant. */
17577 if (CONSTANT_P (src1))
17578 return false;
17579
17580 /* Source 1 cannot be a non-matching memory. */
17581 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
17582 /* Support "andhi/andsi/anddi" as a zero-extending move. */
17583 return (code == AND
17584 && (mode == HImode
17585 || mode == SImode
17586 || (TARGET_64BIT && mode == DImode))
17587 && satisfies_constraint_L (src2));
17588
17589 return true;
17590 }
17591
17592 /* Attempt to expand a unary operator. Make the expansion closer to the
17593 actual machine, then just general_operand, which will allow 2 separate
17594 memory references (one output, one input) in a single insn. */
17595
17596 void
17597 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
17598 rtx operands[])
17599 {
17600 int matching_memory;
17601 rtx src, dst, op, clob;
17602
17603 dst = operands[0];
17604 src = operands[1];
17605
17606 /* If the destination is memory, and we do not have matching source
17607 operands, do things in registers. */
17608 matching_memory = 0;
17609 if (MEM_P (dst))
17610 {
17611 if (rtx_equal_p (dst, src))
17612 matching_memory = 1;
17613 else
17614 dst = gen_reg_rtx (mode);
17615 }
17616
17617 /* When source operand is memory, destination must match. */
17618 if (MEM_P (src) && !matching_memory)
17619 src = force_reg (mode, src);
17620
17621 /* Emit the instruction. */
17622
17623 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
17624 if (reload_in_progress || code == NOT)
17625 {
17626 /* Reload doesn't know about the flags register, and doesn't know that
17627 it doesn't want to clobber it. */
17628 gcc_assert (code == NOT);
17629 emit_insn (op);
17630 }
17631 else
17632 {
17633 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17634 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
17635 }
17636
17637 /* Fix up the destination if needed. */
17638 if (dst != operands[0])
17639 emit_move_insn (operands[0], dst);
17640 }
17641
17642 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
17643 divisor are within the range [0-255]. */
17644
17645 void
17646 ix86_split_idivmod (enum machine_mode mode, rtx operands[],
17647 bool signed_p)
17648 {
17649 rtx_code_label *end_label, *qimode_label;
17650 rtx insn, div, mod;
17651 rtx scratch, tmp0, tmp1, tmp2;
17652 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
17653 rtx (*gen_zero_extend) (rtx, rtx);
17654 rtx (*gen_test_ccno_1) (rtx, rtx);
17655
17656 switch (mode)
17657 {
17658 case SImode:
17659 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
17660 gen_test_ccno_1 = gen_testsi_ccno_1;
17661 gen_zero_extend = gen_zero_extendqisi2;
17662 break;
17663 case DImode:
17664 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
17665 gen_test_ccno_1 = gen_testdi_ccno_1;
17666 gen_zero_extend = gen_zero_extendqidi2;
17667 break;
17668 default:
17669 gcc_unreachable ();
17670 }
17671
17672 end_label = gen_label_rtx ();
17673 qimode_label = gen_label_rtx ();
17674
17675 scratch = gen_reg_rtx (mode);
17676
17677 /* Use 8bit unsigned divimod if dividend and divisor are within
17678 the range [0-255]. */
17679 emit_move_insn (scratch, operands[2]);
17680 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
17681 scratch, 1, OPTAB_DIRECT);
17682 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
17683 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
17684 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
17685 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
17686 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
17687 pc_rtx);
17688 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
17689 predict_jump (REG_BR_PROB_BASE * 50 / 100);
17690 JUMP_LABEL (insn) = qimode_label;
17691
17692 /* Generate original signed/unsigned divimod. */
17693 div = gen_divmod4_1 (operands[0], operands[1],
17694 operands[2], operands[3]);
17695 emit_insn (div);
17696
17697 /* Branch to the end. */
17698 emit_jump_insn (gen_jump (end_label));
17699 emit_barrier ();
17700
17701 /* Generate 8bit unsigned divide. */
17702 emit_label (qimode_label);
17703 /* Don't use operands[0] for result of 8bit divide since not all
17704 registers support QImode ZERO_EXTRACT. */
17705 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
17706 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
17707 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
17708 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
17709
17710 if (signed_p)
17711 {
17712 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
17713 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
17714 }
17715 else
17716 {
17717 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
17718 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
17719 }
17720
17721 /* Extract remainder from AH. */
17722 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
17723 if (REG_P (operands[1]))
17724 insn = emit_move_insn (operands[1], tmp1);
17725 else
17726 {
17727 /* Need a new scratch register since the old one has result
17728 of 8bit divide. */
17729 scratch = gen_reg_rtx (mode);
17730 emit_move_insn (scratch, tmp1);
17731 insn = emit_move_insn (operands[1], scratch);
17732 }
17733 set_unique_reg_note (insn, REG_EQUAL, mod);
17734
17735 /* Zero extend quotient from AL. */
17736 tmp1 = gen_lowpart (QImode, tmp0);
17737 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
17738 set_unique_reg_note (insn, REG_EQUAL, div);
17739
17740 emit_label (end_label);
17741 }
17742
17743 /* Whether it is OK to emit CFI directives when emitting asm code. */
17744
17745 bool
17746 ix86_emit_cfi ()
17747 {
17748 return dwarf2out_do_cfi_asm ();
17749 }
17750
17751 #define LEA_MAX_STALL (3)
17752 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
17753
17754 /* Increase given DISTANCE in half-cycles according to
17755 dependencies between PREV and NEXT instructions.
17756 Add 1 half-cycle if there is no dependency and
17757 go to next cycle if there is some dependecy. */
17758
17759 static unsigned int
17760 increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance)
17761 {
17762 df_ref def, use;
17763
17764 if (!prev || !next)
17765 return distance + (distance & 1) + 2;
17766
17767 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
17768 return distance + 1;
17769
17770 FOR_EACH_INSN_USE (use, next)
17771 FOR_EACH_INSN_DEF (def, prev)
17772 if (!DF_REF_IS_ARTIFICIAL (def)
17773 && DF_REF_REGNO (use) == DF_REF_REGNO (def))
17774 return distance + (distance & 1) + 2;
17775
17776 return distance + 1;
17777 }
17778
17779 /* Function checks if instruction INSN defines register number
17780 REGNO1 or REGNO2. */
17781
17782 static bool
17783 insn_defines_reg (unsigned int regno1, unsigned int regno2,
17784 rtx insn)
17785 {
17786 df_ref def;
17787
17788 FOR_EACH_INSN_DEF (def, insn)
17789 if (DF_REF_REG_DEF_P (def)
17790 && !DF_REF_IS_ARTIFICIAL (def)
17791 && (regno1 == DF_REF_REGNO (def)
17792 || regno2 == DF_REF_REGNO (def)))
17793 return true;
17794
17795 return false;
17796 }
17797
17798 /* Function checks if instruction INSN uses register number
17799 REGNO as a part of address expression. */
17800
17801 static bool
17802 insn_uses_reg_mem (unsigned int regno, rtx insn)
17803 {
17804 df_ref use;
17805
17806 FOR_EACH_INSN_USE (use, insn)
17807 if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use))
17808 return true;
17809
17810 return false;
17811 }
17812
17813 /* Search backward for non-agu definition of register number REGNO1
17814 or register number REGNO2 in basic block starting from instruction
17815 START up to head of basic block or instruction INSN.
17816
17817 Function puts true value into *FOUND var if definition was found
17818 and false otherwise.
17819
17820 Distance in half-cycles between START and found instruction or head
17821 of BB is added to DISTANCE and returned. */
17822
17823 static int
17824 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
17825 rtx_insn *insn, int distance,
17826 rtx_insn *start, bool *found)
17827 {
17828 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
17829 rtx_insn *prev = start;
17830 rtx_insn *next = NULL;
17831
17832 *found = false;
17833
17834 while (prev
17835 && prev != insn
17836 && distance < LEA_SEARCH_THRESHOLD)
17837 {
17838 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
17839 {
17840 distance = increase_distance (prev, next, distance);
17841 if (insn_defines_reg (regno1, regno2, prev))
17842 {
17843 if (recog_memoized (prev) < 0
17844 || get_attr_type (prev) != TYPE_LEA)
17845 {
17846 *found = true;
17847 return distance;
17848 }
17849 }
17850
17851 next = prev;
17852 }
17853 if (prev == BB_HEAD (bb))
17854 break;
17855
17856 prev = PREV_INSN (prev);
17857 }
17858
17859 return distance;
17860 }
17861
17862 /* Search backward for non-agu definition of register number REGNO1
17863 or register number REGNO2 in INSN's basic block until
17864 1. Pass LEA_SEARCH_THRESHOLD instructions, or
17865 2. Reach neighbour BBs boundary, or
17866 3. Reach agu definition.
17867 Returns the distance between the non-agu definition point and INSN.
17868 If no definition point, returns -1. */
17869
17870 static int
17871 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
17872 rtx_insn *insn)
17873 {
17874 basic_block bb = BLOCK_FOR_INSN (insn);
17875 int distance = 0;
17876 bool found = false;
17877
17878 if (insn != BB_HEAD (bb))
17879 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
17880 distance, PREV_INSN (insn),
17881 &found);
17882
17883 if (!found && distance < LEA_SEARCH_THRESHOLD)
17884 {
17885 edge e;
17886 edge_iterator ei;
17887 bool simple_loop = false;
17888
17889 FOR_EACH_EDGE (e, ei, bb->preds)
17890 if (e->src == bb)
17891 {
17892 simple_loop = true;
17893 break;
17894 }
17895
17896 if (simple_loop)
17897 distance = distance_non_agu_define_in_bb (regno1, regno2,
17898 insn, distance,
17899 BB_END (bb), &found);
17900 else
17901 {
17902 int shortest_dist = -1;
17903 bool found_in_bb = false;
17904
17905 FOR_EACH_EDGE (e, ei, bb->preds)
17906 {
17907 int bb_dist
17908 = distance_non_agu_define_in_bb (regno1, regno2,
17909 insn, distance,
17910 BB_END (e->src),
17911 &found_in_bb);
17912 if (found_in_bb)
17913 {
17914 if (shortest_dist < 0)
17915 shortest_dist = bb_dist;
17916 else if (bb_dist > 0)
17917 shortest_dist = MIN (bb_dist, shortest_dist);
17918
17919 found = true;
17920 }
17921 }
17922
17923 distance = shortest_dist;
17924 }
17925 }
17926
17927 /* get_attr_type may modify recog data. We want to make sure
17928 that recog data is valid for instruction INSN, on which
17929 distance_non_agu_define is called. INSN is unchanged here. */
17930 extract_insn_cached (insn);
17931
17932 if (!found)
17933 return -1;
17934
17935 return distance >> 1;
17936 }
17937
17938 /* Return the distance in half-cycles between INSN and the next
17939 insn that uses register number REGNO in memory address added
17940 to DISTANCE. Return -1 if REGNO0 is set.
17941
17942 Put true value into *FOUND if register usage was found and
17943 false otherwise.
17944 Put true value into *REDEFINED if register redefinition was
17945 found and false otherwise. */
17946
17947 static int
17948 distance_agu_use_in_bb (unsigned int regno,
17949 rtx_insn *insn, int distance, rtx_insn *start,
17950 bool *found, bool *redefined)
17951 {
17952 basic_block bb = NULL;
17953 rtx_insn *next = start;
17954 rtx_insn *prev = NULL;
17955
17956 *found = false;
17957 *redefined = false;
17958
17959 if (start != NULL_RTX)
17960 {
17961 bb = BLOCK_FOR_INSN (start);
17962 if (start != BB_HEAD (bb))
17963 /* If insn and start belong to the same bb, set prev to insn,
17964 so the call to increase_distance will increase the distance
17965 between insns by 1. */
17966 prev = insn;
17967 }
17968
17969 while (next
17970 && next != insn
17971 && distance < LEA_SEARCH_THRESHOLD)
17972 {
17973 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
17974 {
17975 distance = increase_distance(prev, next, distance);
17976 if (insn_uses_reg_mem (regno, next))
17977 {
17978 /* Return DISTANCE if OP0 is used in memory
17979 address in NEXT. */
17980 *found = true;
17981 return distance;
17982 }
17983
17984 if (insn_defines_reg (regno, INVALID_REGNUM, next))
17985 {
17986 /* Return -1 if OP0 is set in NEXT. */
17987 *redefined = true;
17988 return -1;
17989 }
17990
17991 prev = next;
17992 }
17993
17994 if (next == BB_END (bb))
17995 break;
17996
17997 next = NEXT_INSN (next);
17998 }
17999
18000 return distance;
18001 }
18002
18003 /* Return the distance between INSN and the next insn that uses
18004 register number REGNO0 in memory address. Return -1 if no such
18005 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
18006
18007 static int
18008 distance_agu_use (unsigned int regno0, rtx_insn *insn)
18009 {
18010 basic_block bb = BLOCK_FOR_INSN (insn);
18011 int distance = 0;
18012 bool found = false;
18013 bool redefined = false;
18014
18015 if (insn != BB_END (bb))
18016 distance = distance_agu_use_in_bb (regno0, insn, distance,
18017 NEXT_INSN (insn),
18018 &found, &redefined);
18019
18020 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
18021 {
18022 edge e;
18023 edge_iterator ei;
18024 bool simple_loop = false;
18025
18026 FOR_EACH_EDGE (e, ei, bb->succs)
18027 if (e->dest == bb)
18028 {
18029 simple_loop = true;
18030 break;
18031 }
18032
18033 if (simple_loop)
18034 distance = distance_agu_use_in_bb (regno0, insn,
18035 distance, BB_HEAD (bb),
18036 &found, &redefined);
18037 else
18038 {
18039 int shortest_dist = -1;
18040 bool found_in_bb = false;
18041 bool redefined_in_bb = false;
18042
18043 FOR_EACH_EDGE (e, ei, bb->succs)
18044 {
18045 int bb_dist
18046 = distance_agu_use_in_bb (regno0, insn,
18047 distance, BB_HEAD (e->dest),
18048 &found_in_bb, &redefined_in_bb);
18049 if (found_in_bb)
18050 {
18051 if (shortest_dist < 0)
18052 shortest_dist = bb_dist;
18053 else if (bb_dist > 0)
18054 shortest_dist = MIN (bb_dist, shortest_dist);
18055
18056 found = true;
18057 }
18058 }
18059
18060 distance = shortest_dist;
18061 }
18062 }
18063
18064 if (!found || redefined)
18065 return -1;
18066
18067 return distance >> 1;
18068 }
18069
18070 /* Define this macro to tune LEA priority vs ADD, it take effect when
18071 there is a dilemma of choicing LEA or ADD
18072 Negative value: ADD is more preferred than LEA
18073 Zero: Netrual
18074 Positive value: LEA is more preferred than ADD*/
18075 #define IX86_LEA_PRIORITY 0
18076
18077 /* Return true if usage of lea INSN has performance advantage
18078 over a sequence of instructions. Instructions sequence has
18079 SPLIT_COST cycles higher latency than lea latency. */
18080
18081 static bool
18082 ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1,
18083 unsigned int regno2, int split_cost, bool has_scale)
18084 {
18085 int dist_define, dist_use;
18086
18087 /* For Silvermont if using a 2-source or 3-source LEA for
18088 non-destructive destination purposes, or due to wanting
18089 ability to use SCALE, the use of LEA is justified. */
18090 if (TARGET_SILVERMONT || TARGET_INTEL)
18091 {
18092 if (has_scale)
18093 return true;
18094 if (split_cost < 1)
18095 return false;
18096 if (regno0 == regno1 || regno0 == regno2)
18097 return false;
18098 return true;
18099 }
18100
18101 dist_define = distance_non_agu_define (regno1, regno2, insn);
18102 dist_use = distance_agu_use (regno0, insn);
18103
18104 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
18105 {
18106 /* If there is no non AGU operand definition, no AGU
18107 operand usage and split cost is 0 then both lea
18108 and non lea variants have same priority. Currently
18109 we prefer lea for 64 bit code and non lea on 32 bit
18110 code. */
18111 if (dist_use < 0 && split_cost == 0)
18112 return TARGET_64BIT || IX86_LEA_PRIORITY;
18113 else
18114 return true;
18115 }
18116
18117 /* With longer definitions distance lea is more preferable.
18118 Here we change it to take into account splitting cost and
18119 lea priority. */
18120 dist_define += split_cost + IX86_LEA_PRIORITY;
18121
18122 /* If there is no use in memory addess then we just check
18123 that split cost exceeds AGU stall. */
18124 if (dist_use < 0)
18125 return dist_define > LEA_MAX_STALL;
18126
18127 /* If this insn has both backward non-agu dependence and forward
18128 agu dependence, the one with short distance takes effect. */
18129 return dist_define >= dist_use;
18130 }
18131
18132 /* Return true if it is legal to clobber flags by INSN and
18133 false otherwise. */
18134
18135 static bool
18136 ix86_ok_to_clobber_flags (rtx_insn *insn)
18137 {
18138 basic_block bb = BLOCK_FOR_INSN (insn);
18139 df_ref use;
18140 bitmap live;
18141
18142 while (insn)
18143 {
18144 if (NONDEBUG_INSN_P (insn))
18145 {
18146 FOR_EACH_INSN_USE (use, insn)
18147 if (DF_REF_REG_USE_P (use) && DF_REF_REGNO (use) == FLAGS_REG)
18148 return false;
18149
18150 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
18151 return true;
18152 }
18153
18154 if (insn == BB_END (bb))
18155 break;
18156
18157 insn = NEXT_INSN (insn);
18158 }
18159
18160 live = df_get_live_out(bb);
18161 return !REGNO_REG_SET_P (live, FLAGS_REG);
18162 }
18163
18164 /* Return true if we need to split op0 = op1 + op2 into a sequence of
18165 move and add to avoid AGU stalls. */
18166
18167 bool
18168 ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[])
18169 {
18170 unsigned int regno0, regno1, regno2;
18171
18172 /* Check if we need to optimize. */
18173 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18174 return false;
18175
18176 /* Check it is correct to split here. */
18177 if (!ix86_ok_to_clobber_flags(insn))
18178 return false;
18179
18180 regno0 = true_regnum (operands[0]);
18181 regno1 = true_regnum (operands[1]);
18182 regno2 = true_regnum (operands[2]);
18183
18184 /* We need to split only adds with non destructive
18185 destination operand. */
18186 if (regno0 == regno1 || regno0 == regno2)
18187 return false;
18188 else
18189 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
18190 }
18191
18192 /* Return true if we should emit lea instruction instead of mov
18193 instruction. */
18194
18195 bool
18196 ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[])
18197 {
18198 unsigned int regno0, regno1;
18199
18200 /* Check if we need to optimize. */
18201 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18202 return false;
18203
18204 /* Use lea for reg to reg moves only. */
18205 if (!REG_P (operands[0]) || !REG_P (operands[1]))
18206 return false;
18207
18208 regno0 = true_regnum (operands[0]);
18209 regno1 = true_regnum (operands[1]);
18210
18211 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
18212 }
18213
18214 /* Return true if we need to split lea into a sequence of
18215 instructions to avoid AGU stalls. */
18216
18217 bool
18218 ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
18219 {
18220 unsigned int regno0, regno1, regno2;
18221 int split_cost;
18222 struct ix86_address parts;
18223 int ok;
18224
18225 /* Check we need to optimize. */
18226 if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
18227 return false;
18228
18229 /* The "at least two components" test below might not catch simple
18230 move or zero extension insns if parts.base is non-NULL and parts.disp
18231 is const0_rtx as the only components in the address, e.g. if the
18232 register is %rbp or %r13. As this test is much cheaper and moves or
18233 zero extensions are the common case, do this check first. */
18234 if (REG_P (operands[1])
18235 || (SImode_address_operand (operands[1], VOIDmode)
18236 && REG_P (XEXP (operands[1], 0))))
18237 return false;
18238
18239 /* Check if it is OK to split here. */
18240 if (!ix86_ok_to_clobber_flags (insn))
18241 return false;
18242
18243 ok = ix86_decompose_address (operands[1], &parts);
18244 gcc_assert (ok);
18245
18246 /* There should be at least two components in the address. */
18247 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
18248 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
18249 return false;
18250
18251 /* We should not split into add if non legitimate pic
18252 operand is used as displacement. */
18253 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
18254 return false;
18255
18256 regno0 = true_regnum (operands[0]) ;
18257 regno1 = INVALID_REGNUM;
18258 regno2 = INVALID_REGNUM;
18259
18260 if (parts.base)
18261 regno1 = true_regnum (parts.base);
18262 if (parts.index)
18263 regno2 = true_regnum (parts.index);
18264
18265 split_cost = 0;
18266
18267 /* Compute how many cycles we will add to execution time
18268 if split lea into a sequence of instructions. */
18269 if (parts.base || parts.index)
18270 {
18271 /* Have to use mov instruction if non desctructive
18272 destination form is used. */
18273 if (regno1 != regno0 && regno2 != regno0)
18274 split_cost += 1;
18275
18276 /* Have to add index to base if both exist. */
18277 if (parts.base && parts.index)
18278 split_cost += 1;
18279
18280 /* Have to use shift and adds if scale is 2 or greater. */
18281 if (parts.scale > 1)
18282 {
18283 if (regno0 != regno1)
18284 split_cost += 1;
18285 else if (regno2 == regno0)
18286 split_cost += 4;
18287 else
18288 split_cost += parts.scale;
18289 }
18290
18291 /* Have to use add instruction with immediate if
18292 disp is non zero. */
18293 if (parts.disp && parts.disp != const0_rtx)
18294 split_cost += 1;
18295
18296 /* Subtract the price of lea. */
18297 split_cost -= 1;
18298 }
18299
18300 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
18301 parts.scale > 1);
18302 }
18303
18304 /* Emit x86 binary operand CODE in mode MODE, where the first operand
18305 matches destination. RTX includes clobber of FLAGS_REG. */
18306
18307 static void
18308 ix86_emit_binop (enum rtx_code code, enum machine_mode mode,
18309 rtx dst, rtx src)
18310 {
18311 rtx op, clob;
18312
18313 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, dst, src));
18314 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18315
18316 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18317 }
18318
18319 /* Return true if regno1 def is nearest to the insn. */
18320
18321 static bool
18322 find_nearest_reg_def (rtx_insn *insn, int regno1, int regno2)
18323 {
18324 rtx_insn *prev = insn;
18325 rtx_insn *start = BB_HEAD (BLOCK_FOR_INSN (insn));
18326
18327 if (insn == start)
18328 return false;
18329 while (prev && prev != start)
18330 {
18331 if (!INSN_P (prev) || !NONDEBUG_INSN_P (prev))
18332 {
18333 prev = PREV_INSN (prev);
18334 continue;
18335 }
18336 if (insn_defines_reg (regno1, INVALID_REGNUM, prev))
18337 return true;
18338 else if (insn_defines_reg (regno2, INVALID_REGNUM, prev))
18339 return false;
18340 prev = PREV_INSN (prev);
18341 }
18342
18343 /* None of the regs is defined in the bb. */
18344 return false;
18345 }
18346
18347 /* Split lea instructions into a sequence of instructions
18348 which are executed on ALU to avoid AGU stalls.
18349 It is assumed that it is allowed to clobber flags register
18350 at lea position. */
18351
18352 void
18353 ix86_split_lea_for_addr (rtx_insn *insn, rtx operands[], enum machine_mode mode)
18354 {
18355 unsigned int regno0, regno1, regno2;
18356 struct ix86_address parts;
18357 rtx target, tmp;
18358 int ok, adds;
18359
18360 ok = ix86_decompose_address (operands[1], &parts);
18361 gcc_assert (ok);
18362
18363 target = gen_lowpart (mode, operands[0]);
18364
18365 regno0 = true_regnum (target);
18366 regno1 = INVALID_REGNUM;
18367 regno2 = INVALID_REGNUM;
18368
18369 if (parts.base)
18370 {
18371 parts.base = gen_lowpart (mode, parts.base);
18372 regno1 = true_regnum (parts.base);
18373 }
18374
18375 if (parts.index)
18376 {
18377 parts.index = gen_lowpart (mode, parts.index);
18378 regno2 = true_regnum (parts.index);
18379 }
18380
18381 if (parts.disp)
18382 parts.disp = gen_lowpart (mode, parts.disp);
18383
18384 if (parts.scale > 1)
18385 {
18386 /* Case r1 = r1 + ... */
18387 if (regno1 == regno0)
18388 {
18389 /* If we have a case r1 = r1 + C * r2 then we
18390 should use multiplication which is very
18391 expensive. Assume cost model is wrong if we
18392 have such case here. */
18393 gcc_assert (regno2 != regno0);
18394
18395 for (adds = parts.scale; adds > 0; adds--)
18396 ix86_emit_binop (PLUS, mode, target, parts.index);
18397 }
18398 else
18399 {
18400 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
18401 if (regno0 != regno2)
18402 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18403
18404 /* Use shift for scaling. */
18405 ix86_emit_binop (ASHIFT, mode, target,
18406 GEN_INT (exact_log2 (parts.scale)));
18407
18408 if (parts.base)
18409 ix86_emit_binop (PLUS, mode, target, parts.base);
18410
18411 if (parts.disp && parts.disp != const0_rtx)
18412 ix86_emit_binop (PLUS, mode, target, parts.disp);
18413 }
18414 }
18415 else if (!parts.base && !parts.index)
18416 {
18417 gcc_assert(parts.disp);
18418 emit_insn (gen_rtx_SET (VOIDmode, target, parts.disp));
18419 }
18420 else
18421 {
18422 if (!parts.base)
18423 {
18424 if (regno0 != regno2)
18425 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18426 }
18427 else if (!parts.index)
18428 {
18429 if (regno0 != regno1)
18430 emit_insn (gen_rtx_SET (VOIDmode, target, parts.base));
18431 }
18432 else
18433 {
18434 if (regno0 == regno1)
18435 tmp = parts.index;
18436 else if (regno0 == regno2)
18437 tmp = parts.base;
18438 else
18439 {
18440 rtx tmp1;
18441
18442 /* Find better operand for SET instruction, depending
18443 on which definition is farther from the insn. */
18444 if (find_nearest_reg_def (insn, regno1, regno2))
18445 tmp = parts.index, tmp1 = parts.base;
18446 else
18447 tmp = parts.base, tmp1 = parts.index;
18448
18449 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18450
18451 if (parts.disp && parts.disp != const0_rtx)
18452 ix86_emit_binop (PLUS, mode, target, parts.disp);
18453
18454 ix86_emit_binop (PLUS, mode, target, tmp1);
18455 return;
18456 }
18457
18458 ix86_emit_binop (PLUS, mode, target, tmp);
18459 }
18460
18461 if (parts.disp && parts.disp != const0_rtx)
18462 ix86_emit_binop (PLUS, mode, target, parts.disp);
18463 }
18464 }
18465
18466 /* Return true if it is ok to optimize an ADD operation to LEA
18467 operation to avoid flag register consumation. For most processors,
18468 ADD is faster than LEA. For the processors like BONNELL, if the
18469 destination register of LEA holds an actual address which will be
18470 used soon, LEA is better and otherwise ADD is better. */
18471
18472 bool
18473 ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[])
18474 {
18475 unsigned int regno0 = true_regnum (operands[0]);
18476 unsigned int regno1 = true_regnum (operands[1]);
18477 unsigned int regno2 = true_regnum (operands[2]);
18478
18479 /* If a = b + c, (a!=b && a!=c), must use lea form. */
18480 if (regno0 != regno1 && regno0 != regno2)
18481 return true;
18482
18483 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18484 return false;
18485
18486 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
18487 }
18488
18489 /* Return true if destination reg of SET_BODY is shift count of
18490 USE_BODY. */
18491
18492 static bool
18493 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
18494 {
18495 rtx set_dest;
18496 rtx shift_rtx;
18497 int i;
18498
18499 /* Retrieve destination of SET_BODY. */
18500 switch (GET_CODE (set_body))
18501 {
18502 case SET:
18503 set_dest = SET_DEST (set_body);
18504 if (!set_dest || !REG_P (set_dest))
18505 return false;
18506 break;
18507 case PARALLEL:
18508 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
18509 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
18510 use_body))
18511 return true;
18512 default:
18513 return false;
18514 break;
18515 }
18516
18517 /* Retrieve shift count of USE_BODY. */
18518 switch (GET_CODE (use_body))
18519 {
18520 case SET:
18521 shift_rtx = XEXP (use_body, 1);
18522 break;
18523 case PARALLEL:
18524 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
18525 if (ix86_dep_by_shift_count_body (set_body,
18526 XVECEXP (use_body, 0, i)))
18527 return true;
18528 default:
18529 return false;
18530 break;
18531 }
18532
18533 if (shift_rtx
18534 && (GET_CODE (shift_rtx) == ASHIFT
18535 || GET_CODE (shift_rtx) == LSHIFTRT
18536 || GET_CODE (shift_rtx) == ASHIFTRT
18537 || GET_CODE (shift_rtx) == ROTATE
18538 || GET_CODE (shift_rtx) == ROTATERT))
18539 {
18540 rtx shift_count = XEXP (shift_rtx, 1);
18541
18542 /* Return true if shift count is dest of SET_BODY. */
18543 if (REG_P (shift_count))
18544 {
18545 /* Add check since it can be invoked before register
18546 allocation in pre-reload schedule. */
18547 if (reload_completed
18548 && true_regnum (set_dest) == true_regnum (shift_count))
18549 return true;
18550 else if (REGNO(set_dest) == REGNO(shift_count))
18551 return true;
18552 }
18553 }
18554
18555 return false;
18556 }
18557
18558 /* Return true if destination reg of SET_INSN is shift count of
18559 USE_INSN. */
18560
18561 bool
18562 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
18563 {
18564 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
18565 PATTERN (use_insn));
18566 }
18567
18568 /* Return TRUE or FALSE depending on whether the unary operator meets the
18569 appropriate constraints. */
18570
18571 bool
18572 ix86_unary_operator_ok (enum rtx_code,
18573 enum machine_mode,
18574 rtx operands[2])
18575 {
18576 /* If one of operands is memory, source and destination must match. */
18577 if ((MEM_P (operands[0])
18578 || MEM_P (operands[1]))
18579 && ! rtx_equal_p (operands[0], operands[1]))
18580 return false;
18581 return true;
18582 }
18583
18584 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
18585 are ok, keeping in mind the possible movddup alternative. */
18586
18587 bool
18588 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
18589 {
18590 if (MEM_P (operands[0]))
18591 return rtx_equal_p (operands[0], operands[1 + high]);
18592 if (MEM_P (operands[1]) && MEM_P (operands[2]))
18593 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
18594 return true;
18595 }
18596
18597 /* Post-reload splitter for converting an SF or DFmode value in an
18598 SSE register into an unsigned SImode. */
18599
18600 void
18601 ix86_split_convert_uns_si_sse (rtx operands[])
18602 {
18603 enum machine_mode vecmode;
18604 rtx value, large, zero_or_two31, input, two31, x;
18605
18606 large = operands[1];
18607 zero_or_two31 = operands[2];
18608 input = operands[3];
18609 two31 = operands[4];
18610 vecmode = GET_MODE (large);
18611 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
18612
18613 /* Load up the value into the low element. We must ensure that the other
18614 elements are valid floats -- zero is the easiest such value. */
18615 if (MEM_P (input))
18616 {
18617 if (vecmode == V4SFmode)
18618 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
18619 else
18620 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
18621 }
18622 else
18623 {
18624 input = gen_rtx_REG (vecmode, REGNO (input));
18625 emit_move_insn (value, CONST0_RTX (vecmode));
18626 if (vecmode == V4SFmode)
18627 emit_insn (gen_sse_movss (value, value, input));
18628 else
18629 emit_insn (gen_sse2_movsd (value, value, input));
18630 }
18631
18632 emit_move_insn (large, two31);
18633 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
18634
18635 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
18636 emit_insn (gen_rtx_SET (VOIDmode, large, x));
18637
18638 x = gen_rtx_AND (vecmode, zero_or_two31, large);
18639 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
18640
18641 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
18642 emit_insn (gen_rtx_SET (VOIDmode, value, x));
18643
18644 large = gen_rtx_REG (V4SImode, REGNO (large));
18645 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
18646
18647 x = gen_rtx_REG (V4SImode, REGNO (value));
18648 if (vecmode == V4SFmode)
18649 emit_insn (gen_fix_truncv4sfv4si2 (x, value));
18650 else
18651 emit_insn (gen_sse2_cvttpd2dq (x, value));
18652 value = x;
18653
18654 emit_insn (gen_xorv4si3 (value, value, large));
18655 }
18656
18657 /* Convert an unsigned DImode value into a DFmode, using only SSE.
18658 Expects the 64-bit DImode to be supplied in a pair of integral
18659 registers. Requires SSE2; will use SSE3 if available. For x86_32,
18660 -mfpmath=sse, !optimize_size only. */
18661
18662 void
18663 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
18664 {
18665 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
18666 rtx int_xmm, fp_xmm;
18667 rtx biases, exponents;
18668 rtx x;
18669
18670 int_xmm = gen_reg_rtx (V4SImode);
18671 if (TARGET_INTER_UNIT_MOVES_TO_VEC)
18672 emit_insn (gen_movdi_to_sse (int_xmm, input));
18673 else if (TARGET_SSE_SPLIT_REGS)
18674 {
18675 emit_clobber (int_xmm);
18676 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
18677 }
18678 else
18679 {
18680 x = gen_reg_rtx (V2DImode);
18681 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
18682 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
18683 }
18684
18685 x = gen_rtx_CONST_VECTOR (V4SImode,
18686 gen_rtvec (4, GEN_INT (0x43300000UL),
18687 GEN_INT (0x45300000UL),
18688 const0_rtx, const0_rtx));
18689 exponents = validize_mem (force_const_mem (V4SImode, x));
18690
18691 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
18692 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
18693
18694 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
18695 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
18696 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
18697 (0x1.0p84 + double(fp_value_hi_xmm)).
18698 Note these exponents differ by 32. */
18699
18700 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
18701
18702 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
18703 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
18704 real_ldexp (&bias_lo_rvt, &dconst1, 52);
18705 real_ldexp (&bias_hi_rvt, &dconst1, 84);
18706 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
18707 x = const_double_from_real_value (bias_hi_rvt, DFmode);
18708 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
18709 biases = validize_mem (force_const_mem (V2DFmode, biases));
18710 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
18711
18712 /* Add the upper and lower DFmode values together. */
18713 if (TARGET_SSE3)
18714 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
18715 else
18716 {
18717 x = copy_to_mode_reg (V2DFmode, fp_xmm);
18718 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
18719 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
18720 }
18721
18722 ix86_expand_vector_extract (false, target, fp_xmm, 0);
18723 }
18724
18725 /* Not used, but eases macroization of patterns. */
18726 void
18727 ix86_expand_convert_uns_sixf_sse (rtx, rtx)
18728 {
18729 gcc_unreachable ();
18730 }
18731
18732 /* Convert an unsigned SImode value into a DFmode. Only currently used
18733 for SSE, but applicable anywhere. */
18734
18735 void
18736 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
18737 {
18738 REAL_VALUE_TYPE TWO31r;
18739 rtx x, fp;
18740
18741 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
18742 NULL, 1, OPTAB_DIRECT);
18743
18744 fp = gen_reg_rtx (DFmode);
18745 emit_insn (gen_floatsidf2 (fp, x));
18746
18747 real_ldexp (&TWO31r, &dconst1, 31);
18748 x = const_double_from_real_value (TWO31r, DFmode);
18749
18750 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
18751 if (x != target)
18752 emit_move_insn (target, x);
18753 }
18754
18755 /* Convert a signed DImode value into a DFmode. Only used for SSE in
18756 32-bit mode; otherwise we have a direct convert instruction. */
18757
18758 void
18759 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
18760 {
18761 REAL_VALUE_TYPE TWO32r;
18762 rtx fp_lo, fp_hi, x;
18763
18764 fp_lo = gen_reg_rtx (DFmode);
18765 fp_hi = gen_reg_rtx (DFmode);
18766
18767 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
18768
18769 real_ldexp (&TWO32r, &dconst1, 32);
18770 x = const_double_from_real_value (TWO32r, DFmode);
18771 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
18772
18773 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
18774
18775 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
18776 0, OPTAB_DIRECT);
18777 if (x != target)
18778 emit_move_insn (target, x);
18779 }
18780
18781 /* Convert an unsigned SImode value into a SFmode, using only SSE.
18782 For x86_32, -mfpmath=sse, !optimize_size only. */
18783 void
18784 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
18785 {
18786 REAL_VALUE_TYPE ONE16r;
18787 rtx fp_hi, fp_lo, int_hi, int_lo, x;
18788
18789 real_ldexp (&ONE16r, &dconst1, 16);
18790 x = const_double_from_real_value (ONE16r, SFmode);
18791 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
18792 NULL, 0, OPTAB_DIRECT);
18793 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
18794 NULL, 0, OPTAB_DIRECT);
18795 fp_hi = gen_reg_rtx (SFmode);
18796 fp_lo = gen_reg_rtx (SFmode);
18797 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
18798 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
18799 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
18800 0, OPTAB_DIRECT);
18801 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
18802 0, OPTAB_DIRECT);
18803 if (!rtx_equal_p (target, fp_hi))
18804 emit_move_insn (target, fp_hi);
18805 }
18806
18807 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
18808 a vector of unsigned ints VAL to vector of floats TARGET. */
18809
18810 void
18811 ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
18812 {
18813 rtx tmp[8];
18814 REAL_VALUE_TYPE TWO16r;
18815 enum machine_mode intmode = GET_MODE (val);
18816 enum machine_mode fltmode = GET_MODE (target);
18817 rtx (*cvt) (rtx, rtx);
18818
18819 if (intmode == V4SImode)
18820 cvt = gen_floatv4siv4sf2;
18821 else
18822 cvt = gen_floatv8siv8sf2;
18823 tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
18824 tmp[0] = force_reg (intmode, tmp[0]);
18825 tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
18826 OPTAB_DIRECT);
18827 tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
18828 NULL_RTX, 1, OPTAB_DIRECT);
18829 tmp[3] = gen_reg_rtx (fltmode);
18830 emit_insn (cvt (tmp[3], tmp[1]));
18831 tmp[4] = gen_reg_rtx (fltmode);
18832 emit_insn (cvt (tmp[4], tmp[2]));
18833 real_ldexp (&TWO16r, &dconst1, 16);
18834 tmp[5] = const_double_from_real_value (TWO16r, SFmode);
18835 tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
18836 tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
18837 OPTAB_DIRECT);
18838 tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
18839 OPTAB_DIRECT);
18840 if (tmp[7] != target)
18841 emit_move_insn (target, tmp[7]);
18842 }
18843
18844 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
18845 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
18846 This is done by doing just signed conversion if < 0x1p31, and otherwise by
18847 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
18848
18849 rtx
18850 ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
18851 {
18852 REAL_VALUE_TYPE TWO31r;
18853 rtx two31r, tmp[4];
18854 enum machine_mode mode = GET_MODE (val);
18855 enum machine_mode scalarmode = GET_MODE_INNER (mode);
18856 enum machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
18857 rtx (*cmp) (rtx, rtx, rtx, rtx);
18858 int i;
18859
18860 for (i = 0; i < 3; i++)
18861 tmp[i] = gen_reg_rtx (mode);
18862 real_ldexp (&TWO31r, &dconst1, 31);
18863 two31r = const_double_from_real_value (TWO31r, scalarmode);
18864 two31r = ix86_build_const_vector (mode, 1, two31r);
18865 two31r = force_reg (mode, two31r);
18866 switch (mode)
18867 {
18868 case V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
18869 case V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
18870 case V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
18871 case V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
18872 default: gcc_unreachable ();
18873 }
18874 tmp[3] = gen_rtx_LE (mode, two31r, val);
18875 emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
18876 tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
18877 0, OPTAB_DIRECT);
18878 if (intmode == V4SImode || TARGET_AVX2)
18879 *xorp = expand_simple_binop (intmode, ASHIFT,
18880 gen_lowpart (intmode, tmp[0]),
18881 GEN_INT (31), NULL_RTX, 0,
18882 OPTAB_DIRECT);
18883 else
18884 {
18885 rtx two31 = GEN_INT ((unsigned HOST_WIDE_INT) 1 << 31);
18886 two31 = ix86_build_const_vector (intmode, 1, two31);
18887 *xorp = expand_simple_binop (intmode, AND,
18888 gen_lowpart (intmode, tmp[0]),
18889 two31, NULL_RTX, 0,
18890 OPTAB_DIRECT);
18891 }
18892 return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
18893 0, OPTAB_DIRECT);
18894 }
18895
18896 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
18897 then replicate the value for all elements of the vector
18898 register. */
18899
18900 rtx
18901 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
18902 {
18903 int i, n_elt;
18904 rtvec v;
18905 enum machine_mode scalar_mode;
18906
18907 switch (mode)
18908 {
18909 case V64QImode:
18910 case V32QImode:
18911 case V16QImode:
18912 case V32HImode:
18913 case V16HImode:
18914 case V8HImode:
18915 case V16SImode:
18916 case V8SImode:
18917 case V4SImode:
18918 case V8DImode:
18919 case V4DImode:
18920 case V2DImode:
18921 gcc_assert (vect);
18922 case V16SFmode:
18923 case V8SFmode:
18924 case V4SFmode:
18925 case V8DFmode:
18926 case V4DFmode:
18927 case V2DFmode:
18928 n_elt = GET_MODE_NUNITS (mode);
18929 v = rtvec_alloc (n_elt);
18930 scalar_mode = GET_MODE_INNER (mode);
18931
18932 RTVEC_ELT (v, 0) = value;
18933
18934 for (i = 1; i < n_elt; ++i)
18935 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
18936
18937 return gen_rtx_CONST_VECTOR (mode, v);
18938
18939 default:
18940 gcc_unreachable ();
18941 }
18942 }
18943
18944 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
18945 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
18946 for an SSE register. If VECT is true, then replicate the mask for
18947 all elements of the vector register. If INVERT is true, then create
18948 a mask excluding the sign bit. */
18949
18950 rtx
18951 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
18952 {
18953 enum machine_mode vec_mode, imode;
18954 HOST_WIDE_INT hi, lo;
18955 int shift = 63;
18956 rtx v;
18957 rtx mask;
18958
18959 /* Find the sign bit, sign extended to 2*HWI. */
18960 switch (mode)
18961 {
18962 case V16SImode:
18963 case V16SFmode:
18964 case V8SImode:
18965 case V4SImode:
18966 case V8SFmode:
18967 case V4SFmode:
18968 vec_mode = mode;
18969 mode = GET_MODE_INNER (mode);
18970 imode = SImode;
18971 lo = 0x80000000, hi = lo < 0;
18972 break;
18973
18974 case V8DImode:
18975 case V4DImode:
18976 case V2DImode:
18977 case V8DFmode:
18978 case V4DFmode:
18979 case V2DFmode:
18980 vec_mode = mode;
18981 mode = GET_MODE_INNER (mode);
18982 imode = DImode;
18983 if (HOST_BITS_PER_WIDE_INT >= 64)
18984 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
18985 else
18986 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
18987 break;
18988
18989 case TImode:
18990 case TFmode:
18991 vec_mode = VOIDmode;
18992 if (HOST_BITS_PER_WIDE_INT >= 64)
18993 {
18994 imode = TImode;
18995 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
18996 }
18997 else
18998 {
18999 rtvec vec;
19000
19001 imode = DImode;
19002 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
19003
19004 if (invert)
19005 {
19006 lo = ~lo, hi = ~hi;
19007 v = constm1_rtx;
19008 }
19009 else
19010 v = const0_rtx;
19011
19012 mask = immed_double_const (lo, hi, imode);
19013
19014 vec = gen_rtvec (2, v, mask);
19015 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
19016 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
19017
19018 return v;
19019 }
19020 break;
19021
19022 default:
19023 gcc_unreachable ();
19024 }
19025
19026 if (invert)
19027 lo = ~lo, hi = ~hi;
19028
19029 /* Force this value into the low part of a fp vector constant. */
19030 mask = immed_double_const (lo, hi, imode);
19031 mask = gen_lowpart (mode, mask);
19032
19033 if (vec_mode == VOIDmode)
19034 return force_reg (mode, mask);
19035
19036 v = ix86_build_const_vector (vec_mode, vect, mask);
19037 return force_reg (vec_mode, v);
19038 }
19039
19040 /* Generate code for floating point ABS or NEG. */
19041
19042 void
19043 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
19044 rtx operands[])
19045 {
19046 rtx mask, set, dst, src;
19047 bool use_sse = false;
19048 bool vector_mode = VECTOR_MODE_P (mode);
19049 enum machine_mode vmode = mode;
19050
19051 if (vector_mode)
19052 use_sse = true;
19053 else if (mode == TFmode)
19054 use_sse = true;
19055 else if (TARGET_SSE_MATH)
19056 {
19057 use_sse = SSE_FLOAT_MODE_P (mode);
19058 if (mode == SFmode)
19059 vmode = V4SFmode;
19060 else if (mode == DFmode)
19061 vmode = V2DFmode;
19062 }
19063
19064 /* NEG and ABS performed with SSE use bitwise mask operations.
19065 Create the appropriate mask now. */
19066 if (use_sse)
19067 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
19068 else
19069 mask = NULL_RTX;
19070
19071 dst = operands[0];
19072 src = operands[1];
19073
19074 set = gen_rtx_fmt_e (code, mode, src);
19075 set = gen_rtx_SET (VOIDmode, dst, set);
19076
19077 if (mask)
19078 {
19079 rtx use, clob;
19080 rtvec par;
19081
19082 use = gen_rtx_USE (VOIDmode, mask);
19083 if (vector_mode)
19084 par = gen_rtvec (2, set, use);
19085 else
19086 {
19087 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
19088 par = gen_rtvec (3, set, use, clob);
19089 }
19090 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
19091 }
19092 else
19093 emit_insn (set);
19094 }
19095
19096 /* Expand a copysign operation. Special case operand 0 being a constant. */
19097
19098 void
19099 ix86_expand_copysign (rtx operands[])
19100 {
19101 enum machine_mode mode, vmode;
19102 rtx dest, op0, op1, mask, nmask;
19103
19104 dest = operands[0];
19105 op0 = operands[1];
19106 op1 = operands[2];
19107
19108 mode = GET_MODE (dest);
19109
19110 if (mode == SFmode)
19111 vmode = V4SFmode;
19112 else if (mode == DFmode)
19113 vmode = V2DFmode;
19114 else
19115 vmode = mode;
19116
19117 if (GET_CODE (op0) == CONST_DOUBLE)
19118 {
19119 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
19120
19121 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
19122 op0 = simplify_unary_operation (ABS, mode, op0, mode);
19123
19124 if (mode == SFmode || mode == DFmode)
19125 {
19126 if (op0 == CONST0_RTX (mode))
19127 op0 = CONST0_RTX (vmode);
19128 else
19129 {
19130 rtx v = ix86_build_const_vector (vmode, false, op0);
19131
19132 op0 = force_reg (vmode, v);
19133 }
19134 }
19135 else if (op0 != CONST0_RTX (mode))
19136 op0 = force_reg (mode, op0);
19137
19138 mask = ix86_build_signbit_mask (vmode, 0, 0);
19139
19140 if (mode == SFmode)
19141 copysign_insn = gen_copysignsf3_const;
19142 else if (mode == DFmode)
19143 copysign_insn = gen_copysigndf3_const;
19144 else
19145 copysign_insn = gen_copysigntf3_const;
19146
19147 emit_insn (copysign_insn (dest, op0, op1, mask));
19148 }
19149 else
19150 {
19151 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
19152
19153 nmask = ix86_build_signbit_mask (vmode, 0, 1);
19154 mask = ix86_build_signbit_mask (vmode, 0, 0);
19155
19156 if (mode == SFmode)
19157 copysign_insn = gen_copysignsf3_var;
19158 else if (mode == DFmode)
19159 copysign_insn = gen_copysigndf3_var;
19160 else
19161 copysign_insn = gen_copysigntf3_var;
19162
19163 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
19164 }
19165 }
19166
19167 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
19168 be a constant, and so has already been expanded into a vector constant. */
19169
19170 void
19171 ix86_split_copysign_const (rtx operands[])
19172 {
19173 enum machine_mode mode, vmode;
19174 rtx dest, op0, mask, x;
19175
19176 dest = operands[0];
19177 op0 = operands[1];
19178 mask = operands[3];
19179
19180 mode = GET_MODE (dest);
19181 vmode = GET_MODE (mask);
19182
19183 dest = simplify_gen_subreg (vmode, dest, mode, 0);
19184 x = gen_rtx_AND (vmode, dest, mask);
19185 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19186
19187 if (op0 != CONST0_RTX (vmode))
19188 {
19189 x = gen_rtx_IOR (vmode, dest, op0);
19190 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19191 }
19192 }
19193
19194 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
19195 so we have to do two masks. */
19196
19197 void
19198 ix86_split_copysign_var (rtx operands[])
19199 {
19200 enum machine_mode mode, vmode;
19201 rtx dest, scratch, op0, op1, mask, nmask, x;
19202
19203 dest = operands[0];
19204 scratch = operands[1];
19205 op0 = operands[2];
19206 op1 = operands[3];
19207 nmask = operands[4];
19208 mask = operands[5];
19209
19210 mode = GET_MODE (dest);
19211 vmode = GET_MODE (mask);
19212
19213 if (rtx_equal_p (op0, op1))
19214 {
19215 /* Shouldn't happen often (it's useless, obviously), but when it does
19216 we'd generate incorrect code if we continue below. */
19217 emit_move_insn (dest, op0);
19218 return;
19219 }
19220
19221 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
19222 {
19223 gcc_assert (REGNO (op1) == REGNO (scratch));
19224
19225 x = gen_rtx_AND (vmode, scratch, mask);
19226 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19227
19228 dest = mask;
19229 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19230 x = gen_rtx_NOT (vmode, dest);
19231 x = gen_rtx_AND (vmode, x, op0);
19232 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19233 }
19234 else
19235 {
19236 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
19237 {
19238 x = gen_rtx_AND (vmode, scratch, mask);
19239 }
19240 else /* alternative 2,4 */
19241 {
19242 gcc_assert (REGNO (mask) == REGNO (scratch));
19243 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
19244 x = gen_rtx_AND (vmode, scratch, op1);
19245 }
19246 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19247
19248 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
19249 {
19250 dest = simplify_gen_subreg (vmode, op0, mode, 0);
19251 x = gen_rtx_AND (vmode, dest, nmask);
19252 }
19253 else /* alternative 3,4 */
19254 {
19255 gcc_assert (REGNO (nmask) == REGNO (dest));
19256 dest = nmask;
19257 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19258 x = gen_rtx_AND (vmode, dest, op0);
19259 }
19260 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19261 }
19262
19263 x = gen_rtx_IOR (vmode, dest, scratch);
19264 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19265 }
19266
19267 /* Return TRUE or FALSE depending on whether the first SET in INSN
19268 has source and destination with matching CC modes, and that the
19269 CC mode is at least as constrained as REQ_MODE. */
19270
19271 bool
19272 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
19273 {
19274 rtx set;
19275 enum machine_mode set_mode;
19276
19277 set = PATTERN (insn);
19278 if (GET_CODE (set) == PARALLEL)
19279 set = XVECEXP (set, 0, 0);
19280 gcc_assert (GET_CODE (set) == SET);
19281 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
19282
19283 set_mode = GET_MODE (SET_DEST (set));
19284 switch (set_mode)
19285 {
19286 case CCNOmode:
19287 if (req_mode != CCNOmode
19288 && (req_mode != CCmode
19289 || XEXP (SET_SRC (set), 1) != const0_rtx))
19290 return false;
19291 break;
19292 case CCmode:
19293 if (req_mode == CCGCmode)
19294 return false;
19295 /* FALLTHRU */
19296 case CCGCmode:
19297 if (req_mode == CCGOCmode || req_mode == CCNOmode)
19298 return false;
19299 /* FALLTHRU */
19300 case CCGOCmode:
19301 if (req_mode == CCZmode)
19302 return false;
19303 /* FALLTHRU */
19304 case CCZmode:
19305 break;
19306
19307 case CCAmode:
19308 case CCCmode:
19309 case CCOmode:
19310 case CCSmode:
19311 if (set_mode != req_mode)
19312 return false;
19313 break;
19314
19315 default:
19316 gcc_unreachable ();
19317 }
19318
19319 return GET_MODE (SET_SRC (set)) == set_mode;
19320 }
19321
19322 /* Generate insn patterns to do an integer compare of OPERANDS. */
19323
19324 static rtx
19325 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
19326 {
19327 enum machine_mode cmpmode;
19328 rtx tmp, flags;
19329
19330 cmpmode = SELECT_CC_MODE (code, op0, op1);
19331 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
19332
19333 /* This is very simple, but making the interface the same as in the
19334 FP case makes the rest of the code easier. */
19335 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
19336 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
19337
19338 /* Return the test that should be put into the flags user, i.e.
19339 the bcc, scc, or cmov instruction. */
19340 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
19341 }
19342
19343 /* Figure out whether to use ordered or unordered fp comparisons.
19344 Return the appropriate mode to use. */
19345
19346 enum machine_mode
19347 ix86_fp_compare_mode (enum rtx_code)
19348 {
19349 /* ??? In order to make all comparisons reversible, we do all comparisons
19350 non-trapping when compiling for IEEE. Once gcc is able to distinguish
19351 all forms trapping and nontrapping comparisons, we can make inequality
19352 comparisons trapping again, since it results in better code when using
19353 FCOM based compares. */
19354 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
19355 }
19356
19357 enum machine_mode
19358 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
19359 {
19360 enum machine_mode mode = GET_MODE (op0);
19361
19362 if (SCALAR_FLOAT_MODE_P (mode))
19363 {
19364 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
19365 return ix86_fp_compare_mode (code);
19366 }
19367
19368 switch (code)
19369 {
19370 /* Only zero flag is needed. */
19371 case EQ: /* ZF=0 */
19372 case NE: /* ZF!=0 */
19373 return CCZmode;
19374 /* Codes needing carry flag. */
19375 case GEU: /* CF=0 */
19376 case LTU: /* CF=1 */
19377 /* Detect overflow checks. They need just the carry flag. */
19378 if (GET_CODE (op0) == PLUS
19379 && rtx_equal_p (op1, XEXP (op0, 0)))
19380 return CCCmode;
19381 else
19382 return CCmode;
19383 case GTU: /* CF=0 & ZF=0 */
19384 case LEU: /* CF=1 | ZF=1 */
19385 return CCmode;
19386 /* Codes possibly doable only with sign flag when
19387 comparing against zero. */
19388 case GE: /* SF=OF or SF=0 */
19389 case LT: /* SF<>OF or SF=1 */
19390 if (op1 == const0_rtx)
19391 return CCGOCmode;
19392 else
19393 /* For other cases Carry flag is not required. */
19394 return CCGCmode;
19395 /* Codes doable only with sign flag when comparing
19396 against zero, but we miss jump instruction for it
19397 so we need to use relational tests against overflow
19398 that thus needs to be zero. */
19399 case GT: /* ZF=0 & SF=OF */
19400 case LE: /* ZF=1 | SF<>OF */
19401 if (op1 == const0_rtx)
19402 return CCNOmode;
19403 else
19404 return CCGCmode;
19405 /* strcmp pattern do (use flags) and combine may ask us for proper
19406 mode. */
19407 case USE:
19408 return CCmode;
19409 default:
19410 gcc_unreachable ();
19411 }
19412 }
19413
19414 /* Return the fixed registers used for condition codes. */
19415
19416 static bool
19417 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
19418 {
19419 *p1 = FLAGS_REG;
19420 *p2 = FPSR_REG;
19421 return true;
19422 }
19423
19424 /* If two condition code modes are compatible, return a condition code
19425 mode which is compatible with both. Otherwise, return
19426 VOIDmode. */
19427
19428 static enum machine_mode
19429 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
19430 {
19431 if (m1 == m2)
19432 return m1;
19433
19434 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
19435 return VOIDmode;
19436
19437 if ((m1 == CCGCmode && m2 == CCGOCmode)
19438 || (m1 == CCGOCmode && m2 == CCGCmode))
19439 return CCGCmode;
19440
19441 if (m1 == CCZmode && (m2 == CCGCmode || m2 == CCGOCmode))
19442 return m2;
19443 else if (m2 == CCZmode && (m1 == CCGCmode || m1 == CCGOCmode))
19444 return m1;
19445
19446 switch (m1)
19447 {
19448 default:
19449 gcc_unreachable ();
19450
19451 case CCmode:
19452 case CCGCmode:
19453 case CCGOCmode:
19454 case CCNOmode:
19455 case CCAmode:
19456 case CCCmode:
19457 case CCOmode:
19458 case CCSmode:
19459 case CCZmode:
19460 switch (m2)
19461 {
19462 default:
19463 return VOIDmode;
19464
19465 case CCmode:
19466 case CCGCmode:
19467 case CCGOCmode:
19468 case CCNOmode:
19469 case CCAmode:
19470 case CCCmode:
19471 case CCOmode:
19472 case CCSmode:
19473 case CCZmode:
19474 return CCmode;
19475 }
19476
19477 case CCFPmode:
19478 case CCFPUmode:
19479 /* These are only compatible with themselves, which we already
19480 checked above. */
19481 return VOIDmode;
19482 }
19483 }
19484
19485
19486 /* Return a comparison we can do and that it is equivalent to
19487 swap_condition (code) apart possibly from orderedness.
19488 But, never change orderedness if TARGET_IEEE_FP, returning
19489 UNKNOWN in that case if necessary. */
19490
19491 static enum rtx_code
19492 ix86_fp_swap_condition (enum rtx_code code)
19493 {
19494 switch (code)
19495 {
19496 case GT: /* GTU - CF=0 & ZF=0 */
19497 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
19498 case GE: /* GEU - CF=0 */
19499 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
19500 case UNLT: /* LTU - CF=1 */
19501 return TARGET_IEEE_FP ? UNKNOWN : GT;
19502 case UNLE: /* LEU - CF=1 | ZF=1 */
19503 return TARGET_IEEE_FP ? UNKNOWN : GE;
19504 default:
19505 return swap_condition (code);
19506 }
19507 }
19508
19509 /* Return cost of comparison CODE using the best strategy for performance.
19510 All following functions do use number of instructions as a cost metrics.
19511 In future this should be tweaked to compute bytes for optimize_size and
19512 take into account performance of various instructions on various CPUs. */
19513
19514 static int
19515 ix86_fp_comparison_cost (enum rtx_code code)
19516 {
19517 int arith_cost;
19518
19519 /* The cost of code using bit-twiddling on %ah. */
19520 switch (code)
19521 {
19522 case UNLE:
19523 case UNLT:
19524 case LTGT:
19525 case GT:
19526 case GE:
19527 case UNORDERED:
19528 case ORDERED:
19529 case UNEQ:
19530 arith_cost = 4;
19531 break;
19532 case LT:
19533 case NE:
19534 case EQ:
19535 case UNGE:
19536 arith_cost = TARGET_IEEE_FP ? 5 : 4;
19537 break;
19538 case LE:
19539 case UNGT:
19540 arith_cost = TARGET_IEEE_FP ? 6 : 4;
19541 break;
19542 default:
19543 gcc_unreachable ();
19544 }
19545
19546 switch (ix86_fp_comparison_strategy (code))
19547 {
19548 case IX86_FPCMP_COMI:
19549 return arith_cost > 4 ? 3 : 2;
19550 case IX86_FPCMP_SAHF:
19551 return arith_cost > 4 ? 4 : 3;
19552 default:
19553 return arith_cost;
19554 }
19555 }
19556
19557 /* Return strategy to use for floating-point. We assume that fcomi is always
19558 preferrable where available, since that is also true when looking at size
19559 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
19560
19561 enum ix86_fpcmp_strategy
19562 ix86_fp_comparison_strategy (enum rtx_code)
19563 {
19564 /* Do fcomi/sahf based test when profitable. */
19565
19566 if (TARGET_CMOVE)
19567 return IX86_FPCMP_COMI;
19568
19569 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
19570 return IX86_FPCMP_SAHF;
19571
19572 return IX86_FPCMP_ARITH;
19573 }
19574
19575 /* Swap, force into registers, or otherwise massage the two operands
19576 to a fp comparison. The operands are updated in place; the new
19577 comparison code is returned. */
19578
19579 static enum rtx_code
19580 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
19581 {
19582 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
19583 rtx op0 = *pop0, op1 = *pop1;
19584 enum machine_mode op_mode = GET_MODE (op0);
19585 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
19586
19587 /* All of the unordered compare instructions only work on registers.
19588 The same is true of the fcomi compare instructions. The XFmode
19589 compare instructions require registers except when comparing
19590 against zero or when converting operand 1 from fixed point to
19591 floating point. */
19592
19593 if (!is_sse
19594 && (fpcmp_mode == CCFPUmode
19595 || (op_mode == XFmode
19596 && ! (standard_80387_constant_p (op0) == 1
19597 || standard_80387_constant_p (op1) == 1)
19598 && GET_CODE (op1) != FLOAT)
19599 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
19600 {
19601 op0 = force_reg (op_mode, op0);
19602 op1 = force_reg (op_mode, op1);
19603 }
19604 else
19605 {
19606 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
19607 things around if they appear profitable, otherwise force op0
19608 into a register. */
19609
19610 if (standard_80387_constant_p (op0) == 0
19611 || (MEM_P (op0)
19612 && ! (standard_80387_constant_p (op1) == 0
19613 || MEM_P (op1))))
19614 {
19615 enum rtx_code new_code = ix86_fp_swap_condition (code);
19616 if (new_code != UNKNOWN)
19617 {
19618 rtx tmp;
19619 tmp = op0, op0 = op1, op1 = tmp;
19620 code = new_code;
19621 }
19622 }
19623
19624 if (!REG_P (op0))
19625 op0 = force_reg (op_mode, op0);
19626
19627 if (CONSTANT_P (op1))
19628 {
19629 int tmp = standard_80387_constant_p (op1);
19630 if (tmp == 0)
19631 op1 = validize_mem (force_const_mem (op_mode, op1));
19632 else if (tmp == 1)
19633 {
19634 if (TARGET_CMOVE)
19635 op1 = force_reg (op_mode, op1);
19636 }
19637 else
19638 op1 = force_reg (op_mode, op1);
19639 }
19640 }
19641
19642 /* Try to rearrange the comparison to make it cheaper. */
19643 if (ix86_fp_comparison_cost (code)
19644 > ix86_fp_comparison_cost (swap_condition (code))
19645 && (REG_P (op1) || can_create_pseudo_p ()))
19646 {
19647 rtx tmp;
19648 tmp = op0, op0 = op1, op1 = tmp;
19649 code = swap_condition (code);
19650 if (!REG_P (op0))
19651 op0 = force_reg (op_mode, op0);
19652 }
19653
19654 *pop0 = op0;
19655 *pop1 = op1;
19656 return code;
19657 }
19658
19659 /* Convert comparison codes we use to represent FP comparison to integer
19660 code that will result in proper branch. Return UNKNOWN if no such code
19661 is available. */
19662
19663 enum rtx_code
19664 ix86_fp_compare_code_to_integer (enum rtx_code code)
19665 {
19666 switch (code)
19667 {
19668 case GT:
19669 return GTU;
19670 case GE:
19671 return GEU;
19672 case ORDERED:
19673 case UNORDERED:
19674 return code;
19675 break;
19676 case UNEQ:
19677 return EQ;
19678 break;
19679 case UNLT:
19680 return LTU;
19681 break;
19682 case UNLE:
19683 return LEU;
19684 break;
19685 case LTGT:
19686 return NE;
19687 break;
19688 default:
19689 return UNKNOWN;
19690 }
19691 }
19692
19693 /* Generate insn patterns to do a floating point compare of OPERANDS. */
19694
19695 static rtx
19696 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
19697 {
19698 enum machine_mode fpcmp_mode, intcmp_mode;
19699 rtx tmp, tmp2;
19700
19701 fpcmp_mode = ix86_fp_compare_mode (code);
19702 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
19703
19704 /* Do fcomi/sahf based test when profitable. */
19705 switch (ix86_fp_comparison_strategy (code))
19706 {
19707 case IX86_FPCMP_COMI:
19708 intcmp_mode = fpcmp_mode;
19709 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19710 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
19711 tmp);
19712 emit_insn (tmp);
19713 break;
19714
19715 case IX86_FPCMP_SAHF:
19716 intcmp_mode = fpcmp_mode;
19717 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19718 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
19719 tmp);
19720
19721 if (!scratch)
19722 scratch = gen_reg_rtx (HImode);
19723 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
19724 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
19725 break;
19726
19727 case IX86_FPCMP_ARITH:
19728 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
19729 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19730 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
19731 if (!scratch)
19732 scratch = gen_reg_rtx (HImode);
19733 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
19734
19735 /* In the unordered case, we have to check C2 for NaN's, which
19736 doesn't happen to work out to anything nice combination-wise.
19737 So do some bit twiddling on the value we've got in AH to come
19738 up with an appropriate set of condition codes. */
19739
19740 intcmp_mode = CCNOmode;
19741 switch (code)
19742 {
19743 case GT:
19744 case UNGT:
19745 if (code == GT || !TARGET_IEEE_FP)
19746 {
19747 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
19748 code = EQ;
19749 }
19750 else
19751 {
19752 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19753 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
19754 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
19755 intcmp_mode = CCmode;
19756 code = GEU;
19757 }
19758 break;
19759 case LT:
19760 case UNLT:
19761 if (code == LT && TARGET_IEEE_FP)
19762 {
19763 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19764 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
19765 intcmp_mode = CCmode;
19766 code = EQ;
19767 }
19768 else
19769 {
19770 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
19771 code = NE;
19772 }
19773 break;
19774 case GE:
19775 case UNGE:
19776 if (code == GE || !TARGET_IEEE_FP)
19777 {
19778 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
19779 code = EQ;
19780 }
19781 else
19782 {
19783 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19784 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
19785 code = NE;
19786 }
19787 break;
19788 case LE:
19789 case UNLE:
19790 if (code == LE && TARGET_IEEE_FP)
19791 {
19792 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19793 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
19794 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
19795 intcmp_mode = CCmode;
19796 code = LTU;
19797 }
19798 else
19799 {
19800 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
19801 code = NE;
19802 }
19803 break;
19804 case EQ:
19805 case UNEQ:
19806 if (code == EQ && TARGET_IEEE_FP)
19807 {
19808 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19809 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
19810 intcmp_mode = CCmode;
19811 code = EQ;
19812 }
19813 else
19814 {
19815 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
19816 code = NE;
19817 }
19818 break;
19819 case NE:
19820 case LTGT:
19821 if (code == NE && TARGET_IEEE_FP)
19822 {
19823 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19824 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
19825 GEN_INT (0x40)));
19826 code = NE;
19827 }
19828 else
19829 {
19830 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
19831 code = EQ;
19832 }
19833 break;
19834
19835 case UNORDERED:
19836 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
19837 code = NE;
19838 break;
19839 case ORDERED:
19840 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
19841 code = EQ;
19842 break;
19843
19844 default:
19845 gcc_unreachable ();
19846 }
19847 break;
19848
19849 default:
19850 gcc_unreachable();
19851 }
19852
19853 /* Return the test that should be put into the flags user, i.e.
19854 the bcc, scc, or cmov instruction. */
19855 return gen_rtx_fmt_ee (code, VOIDmode,
19856 gen_rtx_REG (intcmp_mode, FLAGS_REG),
19857 const0_rtx);
19858 }
19859
19860 static rtx
19861 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
19862 {
19863 rtx ret;
19864
19865 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
19866 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
19867
19868 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
19869 {
19870 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
19871 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
19872 }
19873 else
19874 ret = ix86_expand_int_compare (code, op0, op1);
19875
19876 return ret;
19877 }
19878
19879 void
19880 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
19881 {
19882 enum machine_mode mode = GET_MODE (op0);
19883 rtx tmp;
19884
19885 switch (mode)
19886 {
19887 case SFmode:
19888 case DFmode:
19889 case XFmode:
19890 case QImode:
19891 case HImode:
19892 case SImode:
19893 simple:
19894 tmp = ix86_expand_compare (code, op0, op1);
19895 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
19896 gen_rtx_LABEL_REF (VOIDmode, label),
19897 pc_rtx);
19898 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
19899 return;
19900
19901 case DImode:
19902 if (TARGET_64BIT)
19903 goto simple;
19904 case TImode:
19905 /* Expand DImode branch into multiple compare+branch. */
19906 {
19907 rtx lo[2], hi[2];
19908 rtx_code_label *label2;
19909 enum rtx_code code1, code2, code3;
19910 enum machine_mode submode;
19911
19912 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
19913 {
19914 tmp = op0, op0 = op1, op1 = tmp;
19915 code = swap_condition (code);
19916 }
19917
19918 split_double_mode (mode, &op0, 1, lo+0, hi+0);
19919 split_double_mode (mode, &op1, 1, lo+1, hi+1);
19920
19921 submode = mode == DImode ? SImode : DImode;
19922
19923 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
19924 avoid two branches. This costs one extra insn, so disable when
19925 optimizing for size. */
19926
19927 if ((code == EQ || code == NE)
19928 && (!optimize_insn_for_size_p ()
19929 || hi[1] == const0_rtx || lo[1] == const0_rtx))
19930 {
19931 rtx xor0, xor1;
19932
19933 xor1 = hi[0];
19934 if (hi[1] != const0_rtx)
19935 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
19936 NULL_RTX, 0, OPTAB_WIDEN);
19937
19938 xor0 = lo[0];
19939 if (lo[1] != const0_rtx)
19940 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
19941 NULL_RTX, 0, OPTAB_WIDEN);
19942
19943 tmp = expand_binop (submode, ior_optab, xor1, xor0,
19944 NULL_RTX, 0, OPTAB_WIDEN);
19945
19946 ix86_expand_branch (code, tmp, const0_rtx, label);
19947 return;
19948 }
19949
19950 /* Otherwise, if we are doing less-than or greater-or-equal-than,
19951 op1 is a constant and the low word is zero, then we can just
19952 examine the high word. Similarly for low word -1 and
19953 less-or-equal-than or greater-than. */
19954
19955 if (CONST_INT_P (hi[1]))
19956 switch (code)
19957 {
19958 case LT: case LTU: case GE: case GEU:
19959 if (lo[1] == const0_rtx)
19960 {
19961 ix86_expand_branch (code, hi[0], hi[1], label);
19962 return;
19963 }
19964 break;
19965 case LE: case LEU: case GT: case GTU:
19966 if (lo[1] == constm1_rtx)
19967 {
19968 ix86_expand_branch (code, hi[0], hi[1], label);
19969 return;
19970 }
19971 break;
19972 default:
19973 break;
19974 }
19975
19976 /* Otherwise, we need two or three jumps. */
19977
19978 label2 = gen_label_rtx ();
19979
19980 code1 = code;
19981 code2 = swap_condition (code);
19982 code3 = unsigned_condition (code);
19983
19984 switch (code)
19985 {
19986 case LT: case GT: case LTU: case GTU:
19987 break;
19988
19989 case LE: code1 = LT; code2 = GT; break;
19990 case GE: code1 = GT; code2 = LT; break;
19991 case LEU: code1 = LTU; code2 = GTU; break;
19992 case GEU: code1 = GTU; code2 = LTU; break;
19993
19994 case EQ: code1 = UNKNOWN; code2 = NE; break;
19995 case NE: code2 = UNKNOWN; break;
19996
19997 default:
19998 gcc_unreachable ();
19999 }
20000
20001 /*
20002 * a < b =>
20003 * if (hi(a) < hi(b)) goto true;
20004 * if (hi(a) > hi(b)) goto false;
20005 * if (lo(a) < lo(b)) goto true;
20006 * false:
20007 */
20008
20009 if (code1 != UNKNOWN)
20010 ix86_expand_branch (code1, hi[0], hi[1], label);
20011 if (code2 != UNKNOWN)
20012 ix86_expand_branch (code2, hi[0], hi[1], label2);
20013
20014 ix86_expand_branch (code3, lo[0], lo[1], label);
20015
20016 if (code2 != UNKNOWN)
20017 emit_label (label2);
20018 return;
20019 }
20020
20021 default:
20022 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
20023 goto simple;
20024 }
20025 }
20026
20027 /* Split branch based on floating point condition. */
20028 void
20029 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
20030 rtx target1, rtx target2, rtx tmp)
20031 {
20032 rtx condition;
20033 rtx i;
20034
20035 if (target2 != pc_rtx)
20036 {
20037 rtx tmp = target2;
20038 code = reverse_condition_maybe_unordered (code);
20039 target2 = target1;
20040 target1 = tmp;
20041 }
20042
20043 condition = ix86_expand_fp_compare (code, op1, op2,
20044 tmp);
20045
20046 i = emit_jump_insn (gen_rtx_SET
20047 (VOIDmode, pc_rtx,
20048 gen_rtx_IF_THEN_ELSE (VOIDmode,
20049 condition, target1, target2)));
20050 if (split_branch_probability >= 0)
20051 add_int_reg_note (i, REG_BR_PROB, split_branch_probability);
20052 }
20053
20054 void
20055 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
20056 {
20057 rtx ret;
20058
20059 gcc_assert (GET_MODE (dest) == QImode);
20060
20061 ret = ix86_expand_compare (code, op0, op1);
20062 PUT_MODE (ret, QImode);
20063 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
20064 }
20065
20066 /* Expand comparison setting or clearing carry flag. Return true when
20067 successful and set pop for the operation. */
20068 static bool
20069 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
20070 {
20071 enum machine_mode mode =
20072 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
20073
20074 /* Do not handle double-mode compares that go through special path. */
20075 if (mode == (TARGET_64BIT ? TImode : DImode))
20076 return false;
20077
20078 if (SCALAR_FLOAT_MODE_P (mode))
20079 {
20080 rtx compare_op;
20081 rtx_insn *compare_seq;
20082
20083 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
20084
20085 /* Shortcut: following common codes never translate
20086 into carry flag compares. */
20087 if (code == EQ || code == NE || code == UNEQ || code == LTGT
20088 || code == ORDERED || code == UNORDERED)
20089 return false;
20090
20091 /* These comparisons require zero flag; swap operands so they won't. */
20092 if ((code == GT || code == UNLE || code == LE || code == UNGT)
20093 && !TARGET_IEEE_FP)
20094 {
20095 rtx tmp = op0;
20096 op0 = op1;
20097 op1 = tmp;
20098 code = swap_condition (code);
20099 }
20100
20101 /* Try to expand the comparison and verify that we end up with
20102 carry flag based comparison. This fails to be true only when
20103 we decide to expand comparison using arithmetic that is not
20104 too common scenario. */
20105 start_sequence ();
20106 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
20107 compare_seq = get_insns ();
20108 end_sequence ();
20109
20110 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
20111 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
20112 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
20113 else
20114 code = GET_CODE (compare_op);
20115
20116 if (code != LTU && code != GEU)
20117 return false;
20118
20119 emit_insn (compare_seq);
20120 *pop = compare_op;
20121 return true;
20122 }
20123
20124 if (!INTEGRAL_MODE_P (mode))
20125 return false;
20126
20127 switch (code)
20128 {
20129 case LTU:
20130 case GEU:
20131 break;
20132
20133 /* Convert a==0 into (unsigned)a<1. */
20134 case EQ:
20135 case NE:
20136 if (op1 != const0_rtx)
20137 return false;
20138 op1 = const1_rtx;
20139 code = (code == EQ ? LTU : GEU);
20140 break;
20141
20142 /* Convert a>b into b<a or a>=b-1. */
20143 case GTU:
20144 case LEU:
20145 if (CONST_INT_P (op1))
20146 {
20147 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
20148 /* Bail out on overflow. We still can swap operands but that
20149 would force loading of the constant into register. */
20150 if (op1 == const0_rtx
20151 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
20152 return false;
20153 code = (code == GTU ? GEU : LTU);
20154 }
20155 else
20156 {
20157 rtx tmp = op1;
20158 op1 = op0;
20159 op0 = tmp;
20160 code = (code == GTU ? LTU : GEU);
20161 }
20162 break;
20163
20164 /* Convert a>=0 into (unsigned)a<0x80000000. */
20165 case LT:
20166 case GE:
20167 if (mode == DImode || op1 != const0_rtx)
20168 return false;
20169 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20170 code = (code == LT ? GEU : LTU);
20171 break;
20172 case LE:
20173 case GT:
20174 if (mode == DImode || op1 != constm1_rtx)
20175 return false;
20176 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20177 code = (code == LE ? GEU : LTU);
20178 break;
20179
20180 default:
20181 return false;
20182 }
20183 /* Swapping operands may cause constant to appear as first operand. */
20184 if (!nonimmediate_operand (op0, VOIDmode))
20185 {
20186 if (!can_create_pseudo_p ())
20187 return false;
20188 op0 = force_reg (mode, op0);
20189 }
20190 *pop = ix86_expand_compare (code, op0, op1);
20191 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
20192 return true;
20193 }
20194
20195 bool
20196 ix86_expand_int_movcc (rtx operands[])
20197 {
20198 enum rtx_code code = GET_CODE (operands[1]), compare_code;
20199 rtx_insn *compare_seq;
20200 rtx compare_op;
20201 enum machine_mode mode = GET_MODE (operands[0]);
20202 bool sign_bit_compare_p = false;
20203 rtx op0 = XEXP (operands[1], 0);
20204 rtx op1 = XEXP (operands[1], 1);
20205
20206 if (GET_MODE (op0) == TImode
20207 || (GET_MODE (op0) == DImode
20208 && !TARGET_64BIT))
20209 return false;
20210
20211 start_sequence ();
20212 compare_op = ix86_expand_compare (code, op0, op1);
20213 compare_seq = get_insns ();
20214 end_sequence ();
20215
20216 compare_code = GET_CODE (compare_op);
20217
20218 if ((op1 == const0_rtx && (code == GE || code == LT))
20219 || (op1 == constm1_rtx && (code == GT || code == LE)))
20220 sign_bit_compare_p = true;
20221
20222 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
20223 HImode insns, we'd be swallowed in word prefix ops. */
20224
20225 if ((mode != HImode || TARGET_FAST_PREFIX)
20226 && (mode != (TARGET_64BIT ? TImode : DImode))
20227 && CONST_INT_P (operands[2])
20228 && CONST_INT_P (operands[3]))
20229 {
20230 rtx out = operands[0];
20231 HOST_WIDE_INT ct = INTVAL (operands[2]);
20232 HOST_WIDE_INT cf = INTVAL (operands[3]);
20233 HOST_WIDE_INT diff;
20234
20235 diff = ct - cf;
20236 /* Sign bit compares are better done using shifts than we do by using
20237 sbb. */
20238 if (sign_bit_compare_p
20239 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
20240 {
20241 /* Detect overlap between destination and compare sources. */
20242 rtx tmp = out;
20243
20244 if (!sign_bit_compare_p)
20245 {
20246 rtx flags;
20247 bool fpcmp = false;
20248
20249 compare_code = GET_CODE (compare_op);
20250
20251 flags = XEXP (compare_op, 0);
20252
20253 if (GET_MODE (flags) == CCFPmode
20254 || GET_MODE (flags) == CCFPUmode)
20255 {
20256 fpcmp = true;
20257 compare_code
20258 = ix86_fp_compare_code_to_integer (compare_code);
20259 }
20260
20261 /* To simplify rest of code, restrict to the GEU case. */
20262 if (compare_code == LTU)
20263 {
20264 HOST_WIDE_INT tmp = ct;
20265 ct = cf;
20266 cf = tmp;
20267 compare_code = reverse_condition (compare_code);
20268 code = reverse_condition (code);
20269 }
20270 else
20271 {
20272 if (fpcmp)
20273 PUT_CODE (compare_op,
20274 reverse_condition_maybe_unordered
20275 (GET_CODE (compare_op)));
20276 else
20277 PUT_CODE (compare_op,
20278 reverse_condition (GET_CODE (compare_op)));
20279 }
20280 diff = ct - cf;
20281
20282 if (reg_overlap_mentioned_p (out, op0)
20283 || reg_overlap_mentioned_p (out, op1))
20284 tmp = gen_reg_rtx (mode);
20285
20286 if (mode == DImode)
20287 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
20288 else
20289 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
20290 flags, compare_op));
20291 }
20292 else
20293 {
20294 if (code == GT || code == GE)
20295 code = reverse_condition (code);
20296 else
20297 {
20298 HOST_WIDE_INT tmp = ct;
20299 ct = cf;
20300 cf = tmp;
20301 diff = ct - cf;
20302 }
20303 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
20304 }
20305
20306 if (diff == 1)
20307 {
20308 /*
20309 * cmpl op0,op1
20310 * sbbl dest,dest
20311 * [addl dest, ct]
20312 *
20313 * Size 5 - 8.
20314 */
20315 if (ct)
20316 tmp = expand_simple_binop (mode, PLUS,
20317 tmp, GEN_INT (ct),
20318 copy_rtx (tmp), 1, OPTAB_DIRECT);
20319 }
20320 else if (cf == -1)
20321 {
20322 /*
20323 * cmpl op0,op1
20324 * sbbl dest,dest
20325 * orl $ct, dest
20326 *
20327 * Size 8.
20328 */
20329 tmp = expand_simple_binop (mode, IOR,
20330 tmp, GEN_INT (ct),
20331 copy_rtx (tmp), 1, OPTAB_DIRECT);
20332 }
20333 else if (diff == -1 && ct)
20334 {
20335 /*
20336 * cmpl op0,op1
20337 * sbbl dest,dest
20338 * notl dest
20339 * [addl dest, cf]
20340 *
20341 * Size 8 - 11.
20342 */
20343 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20344 if (cf)
20345 tmp = expand_simple_binop (mode, PLUS,
20346 copy_rtx (tmp), GEN_INT (cf),
20347 copy_rtx (tmp), 1, OPTAB_DIRECT);
20348 }
20349 else
20350 {
20351 /*
20352 * cmpl op0,op1
20353 * sbbl dest,dest
20354 * [notl dest]
20355 * andl cf - ct, dest
20356 * [addl dest, ct]
20357 *
20358 * Size 8 - 11.
20359 */
20360
20361 if (cf == 0)
20362 {
20363 cf = ct;
20364 ct = 0;
20365 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20366 }
20367
20368 tmp = expand_simple_binop (mode, AND,
20369 copy_rtx (tmp),
20370 gen_int_mode (cf - ct, mode),
20371 copy_rtx (tmp), 1, OPTAB_DIRECT);
20372 if (ct)
20373 tmp = expand_simple_binop (mode, PLUS,
20374 copy_rtx (tmp), GEN_INT (ct),
20375 copy_rtx (tmp), 1, OPTAB_DIRECT);
20376 }
20377
20378 if (!rtx_equal_p (tmp, out))
20379 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
20380
20381 return true;
20382 }
20383
20384 if (diff < 0)
20385 {
20386 enum machine_mode cmp_mode = GET_MODE (op0);
20387
20388 HOST_WIDE_INT tmp;
20389 tmp = ct, ct = cf, cf = tmp;
20390 diff = -diff;
20391
20392 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20393 {
20394 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20395
20396 /* We may be reversing unordered compare to normal compare, that
20397 is not valid in general (we may convert non-trapping condition
20398 to trapping one), however on i386 we currently emit all
20399 comparisons unordered. */
20400 compare_code = reverse_condition_maybe_unordered (compare_code);
20401 code = reverse_condition_maybe_unordered (code);
20402 }
20403 else
20404 {
20405 compare_code = reverse_condition (compare_code);
20406 code = reverse_condition (code);
20407 }
20408 }
20409
20410 compare_code = UNKNOWN;
20411 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
20412 && CONST_INT_P (op1))
20413 {
20414 if (op1 == const0_rtx
20415 && (code == LT || code == GE))
20416 compare_code = code;
20417 else if (op1 == constm1_rtx)
20418 {
20419 if (code == LE)
20420 compare_code = LT;
20421 else if (code == GT)
20422 compare_code = GE;
20423 }
20424 }
20425
20426 /* Optimize dest = (op0 < 0) ? -1 : cf. */
20427 if (compare_code != UNKNOWN
20428 && GET_MODE (op0) == GET_MODE (out)
20429 && (cf == -1 || ct == -1))
20430 {
20431 /* If lea code below could be used, only optimize
20432 if it results in a 2 insn sequence. */
20433
20434 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
20435 || diff == 3 || diff == 5 || diff == 9)
20436 || (compare_code == LT && ct == -1)
20437 || (compare_code == GE && cf == -1))
20438 {
20439 /*
20440 * notl op1 (if necessary)
20441 * sarl $31, op1
20442 * orl cf, op1
20443 */
20444 if (ct != -1)
20445 {
20446 cf = ct;
20447 ct = -1;
20448 code = reverse_condition (code);
20449 }
20450
20451 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
20452
20453 out = expand_simple_binop (mode, IOR,
20454 out, GEN_INT (cf),
20455 out, 1, OPTAB_DIRECT);
20456 if (out != operands[0])
20457 emit_move_insn (operands[0], out);
20458
20459 return true;
20460 }
20461 }
20462
20463
20464 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
20465 || diff == 3 || diff == 5 || diff == 9)
20466 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
20467 && (mode != DImode
20468 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
20469 {
20470 /*
20471 * xorl dest,dest
20472 * cmpl op1,op2
20473 * setcc dest
20474 * lea cf(dest*(ct-cf)),dest
20475 *
20476 * Size 14.
20477 *
20478 * This also catches the degenerate setcc-only case.
20479 */
20480
20481 rtx tmp;
20482 int nops;
20483
20484 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
20485
20486 nops = 0;
20487 /* On x86_64 the lea instruction operates on Pmode, so we need
20488 to get arithmetics done in proper mode to match. */
20489 if (diff == 1)
20490 tmp = copy_rtx (out);
20491 else
20492 {
20493 rtx out1;
20494 out1 = copy_rtx (out);
20495 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
20496 nops++;
20497 if (diff & 1)
20498 {
20499 tmp = gen_rtx_PLUS (mode, tmp, out1);
20500 nops++;
20501 }
20502 }
20503 if (cf != 0)
20504 {
20505 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
20506 nops++;
20507 }
20508 if (!rtx_equal_p (tmp, out))
20509 {
20510 if (nops == 1)
20511 out = force_operand (tmp, copy_rtx (out));
20512 else
20513 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
20514 }
20515 if (!rtx_equal_p (out, operands[0]))
20516 emit_move_insn (operands[0], copy_rtx (out));
20517
20518 return true;
20519 }
20520
20521 /*
20522 * General case: Jumpful:
20523 * xorl dest,dest cmpl op1, op2
20524 * cmpl op1, op2 movl ct, dest
20525 * setcc dest jcc 1f
20526 * decl dest movl cf, dest
20527 * andl (cf-ct),dest 1:
20528 * addl ct,dest
20529 *
20530 * Size 20. Size 14.
20531 *
20532 * This is reasonably steep, but branch mispredict costs are
20533 * high on modern cpus, so consider failing only if optimizing
20534 * for space.
20535 */
20536
20537 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
20538 && BRANCH_COST (optimize_insn_for_speed_p (),
20539 false) >= 2)
20540 {
20541 if (cf == 0)
20542 {
20543 enum machine_mode cmp_mode = GET_MODE (op0);
20544
20545 cf = ct;
20546 ct = 0;
20547
20548 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20549 {
20550 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20551
20552 /* We may be reversing unordered compare to normal compare,
20553 that is not valid in general (we may convert non-trapping
20554 condition to trapping one), however on i386 we currently
20555 emit all comparisons unordered. */
20556 code = reverse_condition_maybe_unordered (code);
20557 }
20558 else
20559 {
20560 code = reverse_condition (code);
20561 if (compare_code != UNKNOWN)
20562 compare_code = reverse_condition (compare_code);
20563 }
20564 }
20565
20566 if (compare_code != UNKNOWN)
20567 {
20568 /* notl op1 (if needed)
20569 sarl $31, op1
20570 andl (cf-ct), op1
20571 addl ct, op1
20572
20573 For x < 0 (resp. x <= -1) there will be no notl,
20574 so if possible swap the constants to get rid of the
20575 complement.
20576 True/false will be -1/0 while code below (store flag
20577 followed by decrement) is 0/-1, so the constants need
20578 to be exchanged once more. */
20579
20580 if (compare_code == GE || !cf)
20581 {
20582 code = reverse_condition (code);
20583 compare_code = LT;
20584 }
20585 else
20586 {
20587 HOST_WIDE_INT tmp = cf;
20588 cf = ct;
20589 ct = tmp;
20590 }
20591
20592 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
20593 }
20594 else
20595 {
20596 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
20597
20598 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
20599 constm1_rtx,
20600 copy_rtx (out), 1, OPTAB_DIRECT);
20601 }
20602
20603 out = expand_simple_binop (mode, AND, copy_rtx (out),
20604 gen_int_mode (cf - ct, mode),
20605 copy_rtx (out), 1, OPTAB_DIRECT);
20606 if (ct)
20607 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
20608 copy_rtx (out), 1, OPTAB_DIRECT);
20609 if (!rtx_equal_p (out, operands[0]))
20610 emit_move_insn (operands[0], copy_rtx (out));
20611
20612 return true;
20613 }
20614 }
20615
20616 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
20617 {
20618 /* Try a few things more with specific constants and a variable. */
20619
20620 optab op;
20621 rtx var, orig_out, out, tmp;
20622
20623 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
20624 return false;
20625
20626 /* If one of the two operands is an interesting constant, load a
20627 constant with the above and mask it in with a logical operation. */
20628
20629 if (CONST_INT_P (operands[2]))
20630 {
20631 var = operands[3];
20632 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
20633 operands[3] = constm1_rtx, op = and_optab;
20634 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
20635 operands[3] = const0_rtx, op = ior_optab;
20636 else
20637 return false;
20638 }
20639 else if (CONST_INT_P (operands[3]))
20640 {
20641 var = operands[2];
20642 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
20643 operands[2] = constm1_rtx, op = and_optab;
20644 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
20645 operands[2] = const0_rtx, op = ior_optab;
20646 else
20647 return false;
20648 }
20649 else
20650 return false;
20651
20652 orig_out = operands[0];
20653 tmp = gen_reg_rtx (mode);
20654 operands[0] = tmp;
20655
20656 /* Recurse to get the constant loaded. */
20657 if (ix86_expand_int_movcc (operands) == 0)
20658 return false;
20659
20660 /* Mask in the interesting variable. */
20661 out = expand_binop (mode, op, var, tmp, orig_out, 0,
20662 OPTAB_WIDEN);
20663 if (!rtx_equal_p (out, orig_out))
20664 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
20665
20666 return true;
20667 }
20668
20669 /*
20670 * For comparison with above,
20671 *
20672 * movl cf,dest
20673 * movl ct,tmp
20674 * cmpl op1,op2
20675 * cmovcc tmp,dest
20676 *
20677 * Size 15.
20678 */
20679
20680 if (! nonimmediate_operand (operands[2], mode))
20681 operands[2] = force_reg (mode, operands[2]);
20682 if (! nonimmediate_operand (operands[3], mode))
20683 operands[3] = force_reg (mode, operands[3]);
20684
20685 if (! register_operand (operands[2], VOIDmode)
20686 && (mode == QImode
20687 || ! register_operand (operands[3], VOIDmode)))
20688 operands[2] = force_reg (mode, operands[2]);
20689
20690 if (mode == QImode
20691 && ! register_operand (operands[3], VOIDmode))
20692 operands[3] = force_reg (mode, operands[3]);
20693
20694 emit_insn (compare_seq);
20695 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
20696 gen_rtx_IF_THEN_ELSE (mode,
20697 compare_op, operands[2],
20698 operands[3])));
20699 return true;
20700 }
20701
20702 /* Swap, force into registers, or otherwise massage the two operands
20703 to an sse comparison with a mask result. Thus we differ a bit from
20704 ix86_prepare_fp_compare_args which expects to produce a flags result.
20705
20706 The DEST operand exists to help determine whether to commute commutative
20707 operators. The POP0/POP1 operands are updated in place. The new
20708 comparison code is returned, or UNKNOWN if not implementable. */
20709
20710 static enum rtx_code
20711 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
20712 rtx *pop0, rtx *pop1)
20713 {
20714 rtx tmp;
20715
20716 switch (code)
20717 {
20718 case LTGT:
20719 case UNEQ:
20720 /* AVX supports all the needed comparisons. */
20721 if (TARGET_AVX)
20722 break;
20723 /* We have no LTGT as an operator. We could implement it with
20724 NE & ORDERED, but this requires an extra temporary. It's
20725 not clear that it's worth it. */
20726 return UNKNOWN;
20727
20728 case LT:
20729 case LE:
20730 case UNGT:
20731 case UNGE:
20732 /* These are supported directly. */
20733 break;
20734
20735 case EQ:
20736 case NE:
20737 case UNORDERED:
20738 case ORDERED:
20739 /* AVX has 3 operand comparisons, no need to swap anything. */
20740 if (TARGET_AVX)
20741 break;
20742 /* For commutative operators, try to canonicalize the destination
20743 operand to be first in the comparison - this helps reload to
20744 avoid extra moves. */
20745 if (!dest || !rtx_equal_p (dest, *pop1))
20746 break;
20747 /* FALLTHRU */
20748
20749 case GE:
20750 case GT:
20751 case UNLE:
20752 case UNLT:
20753 /* These are not supported directly before AVX, and furthermore
20754 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
20755 comparison operands to transform into something that is
20756 supported. */
20757 tmp = *pop0;
20758 *pop0 = *pop1;
20759 *pop1 = tmp;
20760 code = swap_condition (code);
20761 break;
20762
20763 default:
20764 gcc_unreachable ();
20765 }
20766
20767 return code;
20768 }
20769
20770 /* Detect conditional moves that exactly match min/max operational
20771 semantics. Note that this is IEEE safe, as long as we don't
20772 interchange the operands.
20773
20774 Returns FALSE if this conditional move doesn't match a MIN/MAX,
20775 and TRUE if the operation is successful and instructions are emitted. */
20776
20777 static bool
20778 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
20779 rtx cmp_op1, rtx if_true, rtx if_false)
20780 {
20781 enum machine_mode mode;
20782 bool is_min;
20783 rtx tmp;
20784
20785 if (code == LT)
20786 ;
20787 else if (code == UNGE)
20788 {
20789 tmp = if_true;
20790 if_true = if_false;
20791 if_false = tmp;
20792 }
20793 else
20794 return false;
20795
20796 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
20797 is_min = true;
20798 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
20799 is_min = false;
20800 else
20801 return false;
20802
20803 mode = GET_MODE (dest);
20804
20805 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
20806 but MODE may be a vector mode and thus not appropriate. */
20807 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
20808 {
20809 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
20810 rtvec v;
20811
20812 if_true = force_reg (mode, if_true);
20813 v = gen_rtvec (2, if_true, if_false);
20814 tmp = gen_rtx_UNSPEC (mode, v, u);
20815 }
20816 else
20817 {
20818 code = is_min ? SMIN : SMAX;
20819 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
20820 }
20821
20822 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
20823 return true;
20824 }
20825
20826 /* Expand an sse vector comparison. Return the register with the result. */
20827
20828 static rtx
20829 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
20830 rtx op_true, rtx op_false)
20831 {
20832 enum machine_mode mode = GET_MODE (dest);
20833 enum machine_mode cmp_ops_mode = GET_MODE (cmp_op0);
20834
20835 /* In general case result of comparison can differ from operands' type. */
20836 enum machine_mode cmp_mode;
20837
20838 /* In AVX512F the result of comparison is an integer mask. */
20839 bool maskcmp = false;
20840 rtx x;
20841
20842 if (GET_MODE_SIZE (cmp_ops_mode) == 64)
20843 {
20844 cmp_mode = mode_for_size (GET_MODE_NUNITS (cmp_ops_mode), MODE_INT, 0);
20845 gcc_assert (cmp_mode != BLKmode);
20846
20847 maskcmp = true;
20848 }
20849 else
20850 cmp_mode = cmp_ops_mode;
20851
20852
20853 cmp_op0 = force_reg (cmp_ops_mode, cmp_op0);
20854 if (!nonimmediate_operand (cmp_op1, cmp_ops_mode))
20855 cmp_op1 = force_reg (cmp_ops_mode, cmp_op1);
20856
20857 if (optimize
20858 || reg_overlap_mentioned_p (dest, op_true)
20859 || reg_overlap_mentioned_p (dest, op_false))
20860 dest = gen_reg_rtx (maskcmp ? cmp_mode : mode);
20861
20862 /* Compare patterns for int modes are unspec in AVX512F only. */
20863 if (maskcmp && (code == GT || code == EQ))
20864 {
20865 rtx (*gen)(rtx, rtx, rtx);
20866
20867 switch (cmp_ops_mode)
20868 {
20869 case V16SImode:
20870 gen = code == GT ? gen_avx512f_gtv16si3 : gen_avx512f_eqv16si3_1;
20871 break;
20872 case V8DImode:
20873 gen = code == GT ? gen_avx512f_gtv8di3 : gen_avx512f_eqv8di3_1;
20874 break;
20875 default:
20876 gen = NULL;
20877 }
20878
20879 if (gen)
20880 {
20881 emit_insn (gen (dest, cmp_op0, cmp_op1));
20882 return dest;
20883 }
20884 }
20885 x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
20886
20887 if (cmp_mode != mode && !maskcmp)
20888 {
20889 x = force_reg (cmp_ops_mode, x);
20890 convert_move (dest, x, false);
20891 }
20892 else
20893 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20894
20895 return dest;
20896 }
20897
20898 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
20899 operations. This is used for both scalar and vector conditional moves. */
20900
20901 static void
20902 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
20903 {
20904 enum machine_mode mode = GET_MODE (dest);
20905 enum machine_mode cmpmode = GET_MODE (cmp);
20906
20907 /* In AVX512F the result of comparison is an integer mask. */
20908 bool maskcmp = (mode != cmpmode && TARGET_AVX512F);
20909
20910 rtx t2, t3, x;
20911
20912 if (vector_all_ones_operand (op_true, mode)
20913 && rtx_equal_p (op_false, CONST0_RTX (mode))
20914 && !maskcmp)
20915 {
20916 emit_insn (gen_rtx_SET (VOIDmode, dest, cmp));
20917 }
20918 else if (op_false == CONST0_RTX (mode)
20919 && !maskcmp)
20920 {
20921 op_true = force_reg (mode, op_true);
20922 x = gen_rtx_AND (mode, cmp, op_true);
20923 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20924 }
20925 else if (op_true == CONST0_RTX (mode)
20926 && !maskcmp)
20927 {
20928 op_false = force_reg (mode, op_false);
20929 x = gen_rtx_NOT (mode, cmp);
20930 x = gen_rtx_AND (mode, x, op_false);
20931 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20932 }
20933 else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode)
20934 && !maskcmp)
20935 {
20936 op_false = force_reg (mode, op_false);
20937 x = gen_rtx_IOR (mode, cmp, op_false);
20938 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20939 }
20940 else if (TARGET_XOP
20941 && !maskcmp)
20942 {
20943 op_true = force_reg (mode, op_true);
20944
20945 if (!nonimmediate_operand (op_false, mode))
20946 op_false = force_reg (mode, op_false);
20947
20948 emit_insn (gen_rtx_SET (mode, dest,
20949 gen_rtx_IF_THEN_ELSE (mode, cmp,
20950 op_true,
20951 op_false)));
20952 }
20953 else
20954 {
20955 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
20956 rtx d = dest;
20957
20958 if (!nonimmediate_operand (op_true, mode))
20959 op_true = force_reg (mode, op_true);
20960
20961 op_false = force_reg (mode, op_false);
20962
20963 switch (mode)
20964 {
20965 case V4SFmode:
20966 if (TARGET_SSE4_1)
20967 gen = gen_sse4_1_blendvps;
20968 break;
20969 case V2DFmode:
20970 if (TARGET_SSE4_1)
20971 gen = gen_sse4_1_blendvpd;
20972 break;
20973 case V16QImode:
20974 case V8HImode:
20975 case V4SImode:
20976 case V2DImode:
20977 if (TARGET_SSE4_1)
20978 {
20979 gen = gen_sse4_1_pblendvb;
20980 if (mode != V16QImode)
20981 d = gen_reg_rtx (V16QImode);
20982 op_false = gen_lowpart (V16QImode, op_false);
20983 op_true = gen_lowpart (V16QImode, op_true);
20984 cmp = gen_lowpart (V16QImode, cmp);
20985 }
20986 break;
20987 case V8SFmode:
20988 if (TARGET_AVX)
20989 gen = gen_avx_blendvps256;
20990 break;
20991 case V4DFmode:
20992 if (TARGET_AVX)
20993 gen = gen_avx_blendvpd256;
20994 break;
20995 case V32QImode:
20996 case V16HImode:
20997 case V8SImode:
20998 case V4DImode:
20999 if (TARGET_AVX2)
21000 {
21001 gen = gen_avx2_pblendvb;
21002 if (mode != V32QImode)
21003 d = gen_reg_rtx (V32QImode);
21004 op_false = gen_lowpart (V32QImode, op_false);
21005 op_true = gen_lowpart (V32QImode, op_true);
21006 cmp = gen_lowpart (V32QImode, cmp);
21007 }
21008 break;
21009
21010 case V16SImode:
21011 gen = gen_avx512f_blendmv16si;
21012 break;
21013 case V8DImode:
21014 gen = gen_avx512f_blendmv8di;
21015 break;
21016 case V8DFmode:
21017 gen = gen_avx512f_blendmv8df;
21018 break;
21019 case V16SFmode:
21020 gen = gen_avx512f_blendmv16sf;
21021 break;
21022
21023 default:
21024 break;
21025 }
21026
21027 if (gen != NULL)
21028 {
21029 emit_insn (gen (d, op_false, op_true, cmp));
21030 if (d != dest)
21031 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
21032 }
21033 else
21034 {
21035 op_true = force_reg (mode, op_true);
21036
21037 t2 = gen_reg_rtx (mode);
21038 if (optimize)
21039 t3 = gen_reg_rtx (mode);
21040 else
21041 t3 = dest;
21042
21043 x = gen_rtx_AND (mode, op_true, cmp);
21044 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
21045
21046 x = gen_rtx_NOT (mode, cmp);
21047 x = gen_rtx_AND (mode, x, op_false);
21048 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
21049
21050 x = gen_rtx_IOR (mode, t3, t2);
21051 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21052 }
21053 }
21054 }
21055
21056 /* Expand a floating-point conditional move. Return true if successful. */
21057
21058 bool
21059 ix86_expand_fp_movcc (rtx operands[])
21060 {
21061 enum machine_mode mode = GET_MODE (operands[0]);
21062 enum rtx_code code = GET_CODE (operands[1]);
21063 rtx tmp, compare_op;
21064 rtx op0 = XEXP (operands[1], 0);
21065 rtx op1 = XEXP (operands[1], 1);
21066
21067 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
21068 {
21069 enum machine_mode cmode;
21070
21071 /* Since we've no cmove for sse registers, don't force bad register
21072 allocation just to gain access to it. Deny movcc when the
21073 comparison mode doesn't match the move mode. */
21074 cmode = GET_MODE (op0);
21075 if (cmode == VOIDmode)
21076 cmode = GET_MODE (op1);
21077 if (cmode != mode)
21078 return false;
21079
21080 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
21081 if (code == UNKNOWN)
21082 return false;
21083
21084 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
21085 operands[2], operands[3]))
21086 return true;
21087
21088 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
21089 operands[2], operands[3]);
21090 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
21091 return true;
21092 }
21093
21094 if (GET_MODE (op0) == TImode
21095 || (GET_MODE (op0) == DImode
21096 && !TARGET_64BIT))
21097 return false;
21098
21099 /* The floating point conditional move instructions don't directly
21100 support conditions resulting from a signed integer comparison. */
21101
21102 compare_op = ix86_expand_compare (code, op0, op1);
21103 if (!fcmov_comparison_operator (compare_op, VOIDmode))
21104 {
21105 tmp = gen_reg_rtx (QImode);
21106 ix86_expand_setcc (tmp, code, op0, op1);
21107
21108 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
21109 }
21110
21111 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
21112 gen_rtx_IF_THEN_ELSE (mode, compare_op,
21113 operands[2], operands[3])));
21114
21115 return true;
21116 }
21117
21118 /* Expand a floating-point vector conditional move; a vcond operation
21119 rather than a movcc operation. */
21120
21121 bool
21122 ix86_expand_fp_vcond (rtx operands[])
21123 {
21124 enum rtx_code code = GET_CODE (operands[3]);
21125 rtx cmp;
21126
21127 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
21128 &operands[4], &operands[5]);
21129 if (code == UNKNOWN)
21130 {
21131 rtx temp;
21132 switch (GET_CODE (operands[3]))
21133 {
21134 case LTGT:
21135 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
21136 operands[5], operands[0], operands[0]);
21137 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
21138 operands[5], operands[1], operands[2]);
21139 code = AND;
21140 break;
21141 case UNEQ:
21142 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
21143 operands[5], operands[0], operands[0]);
21144 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
21145 operands[5], operands[1], operands[2]);
21146 code = IOR;
21147 break;
21148 default:
21149 gcc_unreachable ();
21150 }
21151 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
21152 OPTAB_DIRECT);
21153 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21154 return true;
21155 }
21156
21157 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
21158 operands[5], operands[1], operands[2]))
21159 return true;
21160
21161 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
21162 operands[1], operands[2]);
21163 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21164 return true;
21165 }
21166
21167 /* Expand a signed/unsigned integral vector conditional move. */
21168
21169 bool
21170 ix86_expand_int_vcond (rtx operands[])
21171 {
21172 enum machine_mode data_mode = GET_MODE (operands[0]);
21173 enum machine_mode mode = GET_MODE (operands[4]);
21174 enum rtx_code code = GET_CODE (operands[3]);
21175 bool negate = false;
21176 rtx x, cop0, cop1;
21177
21178 cop0 = operands[4];
21179 cop1 = operands[5];
21180
21181 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
21182 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
21183 if ((code == LT || code == GE)
21184 && data_mode == mode
21185 && cop1 == CONST0_RTX (mode)
21186 && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
21187 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) > 1
21188 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) <= 8
21189 && (GET_MODE_SIZE (data_mode) == 16
21190 || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
21191 {
21192 rtx negop = operands[2 - (code == LT)];
21193 int shift = GET_MODE_BITSIZE (GET_MODE_INNER (data_mode)) - 1;
21194 if (negop == CONST1_RTX (data_mode))
21195 {
21196 rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
21197 operands[0], 1, OPTAB_DIRECT);
21198 if (res != operands[0])
21199 emit_move_insn (operands[0], res);
21200 return true;
21201 }
21202 else if (GET_MODE_INNER (data_mode) != DImode
21203 && vector_all_ones_operand (negop, data_mode))
21204 {
21205 rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
21206 operands[0], 0, OPTAB_DIRECT);
21207 if (res != operands[0])
21208 emit_move_insn (operands[0], res);
21209 return true;
21210 }
21211 }
21212
21213 if (!nonimmediate_operand (cop1, mode))
21214 cop1 = force_reg (mode, cop1);
21215 if (!general_operand (operands[1], data_mode))
21216 operands[1] = force_reg (data_mode, operands[1]);
21217 if (!general_operand (operands[2], data_mode))
21218 operands[2] = force_reg (data_mode, operands[2]);
21219
21220 /* XOP supports all of the comparisons on all 128-bit vector int types. */
21221 if (TARGET_XOP
21222 && (mode == V16QImode || mode == V8HImode
21223 || mode == V4SImode || mode == V2DImode))
21224 ;
21225 else
21226 {
21227 /* Canonicalize the comparison to EQ, GT, GTU. */
21228 switch (code)
21229 {
21230 case EQ:
21231 case GT:
21232 case GTU:
21233 break;
21234
21235 case NE:
21236 case LE:
21237 case LEU:
21238 code = reverse_condition (code);
21239 negate = true;
21240 break;
21241
21242 case GE:
21243 case GEU:
21244 code = reverse_condition (code);
21245 negate = true;
21246 /* FALLTHRU */
21247
21248 case LT:
21249 case LTU:
21250 code = swap_condition (code);
21251 x = cop0, cop0 = cop1, cop1 = x;
21252 break;
21253
21254 default:
21255 gcc_unreachable ();
21256 }
21257
21258 /* Only SSE4.1/SSE4.2 supports V2DImode. */
21259 if (mode == V2DImode)
21260 {
21261 switch (code)
21262 {
21263 case EQ:
21264 /* SSE4.1 supports EQ. */
21265 if (!TARGET_SSE4_1)
21266 return false;
21267 break;
21268
21269 case GT:
21270 case GTU:
21271 /* SSE4.2 supports GT/GTU. */
21272 if (!TARGET_SSE4_2)
21273 return false;
21274 break;
21275
21276 default:
21277 gcc_unreachable ();
21278 }
21279 }
21280
21281 /* Unsigned parallel compare is not supported by the hardware.
21282 Play some tricks to turn this into a signed comparison
21283 against 0. */
21284 if (code == GTU)
21285 {
21286 cop0 = force_reg (mode, cop0);
21287
21288 switch (mode)
21289 {
21290 case V16SImode:
21291 case V8DImode:
21292 case V8SImode:
21293 case V4DImode:
21294 case V4SImode:
21295 case V2DImode:
21296 {
21297 rtx t1, t2, mask;
21298 rtx (*gen_sub3) (rtx, rtx, rtx);
21299
21300 switch (mode)
21301 {
21302 case V16SImode: gen_sub3 = gen_subv16si3; break;
21303 case V8DImode: gen_sub3 = gen_subv8di3; break;
21304 case V8SImode: gen_sub3 = gen_subv8si3; break;
21305 case V4DImode: gen_sub3 = gen_subv4di3; break;
21306 case V4SImode: gen_sub3 = gen_subv4si3; break;
21307 case V2DImode: gen_sub3 = gen_subv2di3; break;
21308 default:
21309 gcc_unreachable ();
21310 }
21311 /* Subtract (-(INT MAX) - 1) from both operands to make
21312 them signed. */
21313 mask = ix86_build_signbit_mask (mode, true, false);
21314 t1 = gen_reg_rtx (mode);
21315 emit_insn (gen_sub3 (t1, cop0, mask));
21316
21317 t2 = gen_reg_rtx (mode);
21318 emit_insn (gen_sub3 (t2, cop1, mask));
21319
21320 cop0 = t1;
21321 cop1 = t2;
21322 code = GT;
21323 }
21324 break;
21325
21326 case V32QImode:
21327 case V16HImode:
21328 case V16QImode:
21329 case V8HImode:
21330 /* Perform a parallel unsigned saturating subtraction. */
21331 x = gen_reg_rtx (mode);
21332 emit_insn (gen_rtx_SET (VOIDmode, x,
21333 gen_rtx_US_MINUS (mode, cop0, cop1)));
21334
21335 cop0 = x;
21336 cop1 = CONST0_RTX (mode);
21337 code = EQ;
21338 negate = !negate;
21339 break;
21340
21341 default:
21342 gcc_unreachable ();
21343 }
21344 }
21345 }
21346
21347 /* Allow the comparison to be done in one mode, but the movcc to
21348 happen in another mode. */
21349 if (data_mode == mode)
21350 {
21351 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
21352 operands[1+negate], operands[2-negate]);
21353 }
21354 else
21355 {
21356 gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
21357 x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1,
21358 operands[1+negate], operands[2-negate]);
21359 if (GET_MODE (x) == mode)
21360 x = gen_lowpart (data_mode, x);
21361 }
21362
21363 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
21364 operands[2-negate]);
21365 return true;
21366 }
21367
21368 static bool
21369 ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1)
21370 {
21371 enum machine_mode mode = GET_MODE (op0);
21372 switch (mode)
21373 {
21374 case V16SImode:
21375 emit_insn (gen_avx512f_vpermi2varv16si3 (target, op0,
21376 force_reg (V16SImode, mask),
21377 op1));
21378 return true;
21379 case V16SFmode:
21380 emit_insn (gen_avx512f_vpermi2varv16sf3 (target, op0,
21381 force_reg (V16SImode, mask),
21382 op1));
21383 return true;
21384 case V8DImode:
21385 emit_insn (gen_avx512f_vpermi2varv8di3 (target, op0,
21386 force_reg (V8DImode, mask), op1));
21387 return true;
21388 case V8DFmode:
21389 emit_insn (gen_avx512f_vpermi2varv8df3 (target, op0,
21390 force_reg (V8DImode, mask), op1));
21391 return true;
21392 default:
21393 return false;
21394 }
21395 }
21396
21397 /* Expand a variable vector permutation. */
21398
21399 void
21400 ix86_expand_vec_perm (rtx operands[])
21401 {
21402 rtx target = operands[0];
21403 rtx op0 = operands[1];
21404 rtx op1 = operands[2];
21405 rtx mask = operands[3];
21406 rtx t1, t2, t3, t4, t5, t6, t7, t8, vt, vt2, vec[32];
21407 enum machine_mode mode = GET_MODE (op0);
21408 enum machine_mode maskmode = GET_MODE (mask);
21409 int w, e, i;
21410 bool one_operand_shuffle = rtx_equal_p (op0, op1);
21411
21412 /* Number of elements in the vector. */
21413 w = GET_MODE_NUNITS (mode);
21414 e = GET_MODE_UNIT_SIZE (mode);
21415 gcc_assert (w <= 64);
21416
21417 if (ix86_expand_vec_perm_vpermi2 (target, op0, mask, op1))
21418 return;
21419
21420 if (TARGET_AVX2)
21421 {
21422 if (mode == V4DImode || mode == V4DFmode || mode == V16HImode)
21423 {
21424 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
21425 an constant shuffle operand. With a tiny bit of effort we can
21426 use VPERMD instead. A re-interpretation stall for V4DFmode is
21427 unfortunate but there's no avoiding it.
21428 Similarly for V16HImode we don't have instructions for variable
21429 shuffling, while for V32QImode we can use after preparing suitable
21430 masks vpshufb; vpshufb; vpermq; vpor. */
21431
21432 if (mode == V16HImode)
21433 {
21434 maskmode = mode = V32QImode;
21435 w = 32;
21436 e = 1;
21437 }
21438 else
21439 {
21440 maskmode = mode = V8SImode;
21441 w = 8;
21442 e = 4;
21443 }
21444 t1 = gen_reg_rtx (maskmode);
21445
21446 /* Replicate the low bits of the V4DImode mask into V8SImode:
21447 mask = { A B C D }
21448 t1 = { A A B B C C D D }. */
21449 for (i = 0; i < w / 2; ++i)
21450 vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
21451 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21452 vt = force_reg (maskmode, vt);
21453 mask = gen_lowpart (maskmode, mask);
21454 if (maskmode == V8SImode)
21455 emit_insn (gen_avx2_permvarv8si (t1, mask, vt));
21456 else
21457 emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
21458
21459 /* Multiply the shuffle indicies by two. */
21460 t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
21461 OPTAB_DIRECT);
21462
21463 /* Add one to the odd shuffle indicies:
21464 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
21465 for (i = 0; i < w / 2; ++i)
21466 {
21467 vec[i * 2] = const0_rtx;
21468 vec[i * 2 + 1] = const1_rtx;
21469 }
21470 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21471 vt = validize_mem (force_const_mem (maskmode, vt));
21472 t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
21473 OPTAB_DIRECT);
21474
21475 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
21476 operands[3] = mask = t1;
21477 target = gen_reg_rtx (mode);
21478 op0 = gen_lowpart (mode, op0);
21479 op1 = gen_lowpart (mode, op1);
21480 }
21481
21482 switch (mode)
21483 {
21484 case V8SImode:
21485 /* The VPERMD and VPERMPS instructions already properly ignore
21486 the high bits of the shuffle elements. No need for us to
21487 perform an AND ourselves. */
21488 if (one_operand_shuffle)
21489 {
21490 emit_insn (gen_avx2_permvarv8si (target, op0, mask));
21491 if (target != operands[0])
21492 emit_move_insn (operands[0],
21493 gen_lowpart (GET_MODE (operands[0]), target));
21494 }
21495 else
21496 {
21497 t1 = gen_reg_rtx (V8SImode);
21498 t2 = gen_reg_rtx (V8SImode);
21499 emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
21500 emit_insn (gen_avx2_permvarv8si (t2, op1, mask));
21501 goto merge_two;
21502 }
21503 return;
21504
21505 case V8SFmode:
21506 mask = gen_lowpart (V8SImode, mask);
21507 if (one_operand_shuffle)
21508 emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
21509 else
21510 {
21511 t1 = gen_reg_rtx (V8SFmode);
21512 t2 = gen_reg_rtx (V8SFmode);
21513 emit_insn (gen_avx2_permvarv8sf (t1, op0, mask));
21514 emit_insn (gen_avx2_permvarv8sf (t2, op1, mask));
21515 goto merge_two;
21516 }
21517 return;
21518
21519 case V4SImode:
21520 /* By combining the two 128-bit input vectors into one 256-bit
21521 input vector, we can use VPERMD and VPERMPS for the full
21522 two-operand shuffle. */
21523 t1 = gen_reg_rtx (V8SImode);
21524 t2 = gen_reg_rtx (V8SImode);
21525 emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
21526 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
21527 emit_insn (gen_avx2_permvarv8si (t1, t1, t2));
21528 emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
21529 return;
21530
21531 case V4SFmode:
21532 t1 = gen_reg_rtx (V8SFmode);
21533 t2 = gen_reg_rtx (V8SImode);
21534 mask = gen_lowpart (V4SImode, mask);
21535 emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
21536 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
21537 emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
21538 emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
21539 return;
21540
21541 case V32QImode:
21542 t1 = gen_reg_rtx (V32QImode);
21543 t2 = gen_reg_rtx (V32QImode);
21544 t3 = gen_reg_rtx (V32QImode);
21545 vt2 = GEN_INT (-128);
21546 for (i = 0; i < 32; i++)
21547 vec[i] = vt2;
21548 vt = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
21549 vt = force_reg (V32QImode, vt);
21550 for (i = 0; i < 32; i++)
21551 vec[i] = i < 16 ? vt2 : const0_rtx;
21552 vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
21553 vt2 = force_reg (V32QImode, vt2);
21554 /* From mask create two adjusted masks, which contain the same
21555 bits as mask in the low 7 bits of each vector element.
21556 The first mask will have the most significant bit clear
21557 if it requests element from the same 128-bit lane
21558 and MSB set if it requests element from the other 128-bit lane.
21559 The second mask will have the opposite values of the MSB,
21560 and additionally will have its 128-bit lanes swapped.
21561 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
21562 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
21563 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
21564 stands for other 12 bytes. */
21565 /* The bit whether element is from the same lane or the other
21566 lane is bit 4, so shift it up by 3 to the MSB position. */
21567 t5 = gen_reg_rtx (V4DImode);
21568 emit_insn (gen_ashlv4di3 (t5, gen_lowpart (V4DImode, mask),
21569 GEN_INT (3)));
21570 /* Clear MSB bits from the mask just in case it had them set. */
21571 emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
21572 /* After this t1 will have MSB set for elements from other lane. */
21573 emit_insn (gen_xorv32qi3 (t1, gen_lowpart (V32QImode, t5), vt2));
21574 /* Clear bits other than MSB. */
21575 emit_insn (gen_andv32qi3 (t1, t1, vt));
21576 /* Or in the lower bits from mask into t3. */
21577 emit_insn (gen_iorv32qi3 (t3, t1, t2));
21578 /* And invert MSB bits in t1, so MSB is set for elements from the same
21579 lane. */
21580 emit_insn (gen_xorv32qi3 (t1, t1, vt));
21581 /* Swap 128-bit lanes in t3. */
21582 t6 = gen_reg_rtx (V4DImode);
21583 emit_insn (gen_avx2_permv4di_1 (t6, gen_lowpart (V4DImode, t3),
21584 const2_rtx, GEN_INT (3),
21585 const0_rtx, const1_rtx));
21586 /* And or in the lower bits from mask into t1. */
21587 emit_insn (gen_iorv32qi3 (t1, t1, t2));
21588 if (one_operand_shuffle)
21589 {
21590 /* Each of these shuffles will put 0s in places where
21591 element from the other 128-bit lane is needed, otherwise
21592 will shuffle in the requested value. */
21593 emit_insn (gen_avx2_pshufbv32qi3 (t3, op0,
21594 gen_lowpart (V32QImode, t6)));
21595 emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
21596 /* For t3 the 128-bit lanes are swapped again. */
21597 t7 = gen_reg_rtx (V4DImode);
21598 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t3),
21599 const2_rtx, GEN_INT (3),
21600 const0_rtx, const1_rtx));
21601 /* And oring both together leads to the result. */
21602 emit_insn (gen_iorv32qi3 (target, t1,
21603 gen_lowpart (V32QImode, t7)));
21604 if (target != operands[0])
21605 emit_move_insn (operands[0],
21606 gen_lowpart (GET_MODE (operands[0]), target));
21607 return;
21608 }
21609
21610 t4 = gen_reg_rtx (V32QImode);
21611 /* Similarly to the above one_operand_shuffle code,
21612 just for repeated twice for each operand. merge_two:
21613 code will merge the two results together. */
21614 emit_insn (gen_avx2_pshufbv32qi3 (t4, op0,
21615 gen_lowpart (V32QImode, t6)));
21616 emit_insn (gen_avx2_pshufbv32qi3 (t3, op1,
21617 gen_lowpart (V32QImode, t6)));
21618 emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
21619 emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
21620 t7 = gen_reg_rtx (V4DImode);
21621 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t4),
21622 const2_rtx, GEN_INT (3),
21623 const0_rtx, const1_rtx));
21624 t8 = gen_reg_rtx (V4DImode);
21625 emit_insn (gen_avx2_permv4di_1 (t8, gen_lowpart (V4DImode, t3),
21626 const2_rtx, GEN_INT (3),
21627 const0_rtx, const1_rtx));
21628 emit_insn (gen_iorv32qi3 (t4, t2, gen_lowpart (V32QImode, t7)));
21629 emit_insn (gen_iorv32qi3 (t3, t1, gen_lowpart (V32QImode, t8)));
21630 t1 = t4;
21631 t2 = t3;
21632 goto merge_two;
21633
21634 default:
21635 gcc_assert (GET_MODE_SIZE (mode) <= 16);
21636 break;
21637 }
21638 }
21639
21640 if (TARGET_XOP)
21641 {
21642 /* The XOP VPPERM insn supports three inputs. By ignoring the
21643 one_operand_shuffle special case, we avoid creating another
21644 set of constant vectors in memory. */
21645 one_operand_shuffle = false;
21646
21647 /* mask = mask & {2*w-1, ...} */
21648 vt = GEN_INT (2*w - 1);
21649 }
21650 else
21651 {
21652 /* mask = mask & {w-1, ...} */
21653 vt = GEN_INT (w - 1);
21654 }
21655
21656 for (i = 0; i < w; i++)
21657 vec[i] = vt;
21658 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21659 mask = expand_simple_binop (maskmode, AND, mask, vt,
21660 NULL_RTX, 0, OPTAB_DIRECT);
21661
21662 /* For non-QImode operations, convert the word permutation control
21663 into a byte permutation control. */
21664 if (mode != V16QImode)
21665 {
21666 mask = expand_simple_binop (maskmode, ASHIFT, mask,
21667 GEN_INT (exact_log2 (e)),
21668 NULL_RTX, 0, OPTAB_DIRECT);
21669
21670 /* Convert mask to vector of chars. */
21671 mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
21672
21673 /* Replicate each of the input bytes into byte positions:
21674 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
21675 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
21676 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
21677 for (i = 0; i < 16; ++i)
21678 vec[i] = GEN_INT (i/e * e);
21679 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
21680 vt = validize_mem (force_const_mem (V16QImode, vt));
21681 if (TARGET_XOP)
21682 emit_insn (gen_xop_pperm (mask, mask, mask, vt));
21683 else
21684 emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
21685
21686 /* Convert it into the byte positions by doing
21687 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
21688 for (i = 0; i < 16; ++i)
21689 vec[i] = GEN_INT (i % e);
21690 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
21691 vt = validize_mem (force_const_mem (V16QImode, vt));
21692 emit_insn (gen_addv16qi3 (mask, mask, vt));
21693 }
21694
21695 /* The actual shuffle operations all operate on V16QImode. */
21696 op0 = gen_lowpart (V16QImode, op0);
21697 op1 = gen_lowpart (V16QImode, op1);
21698
21699 if (TARGET_XOP)
21700 {
21701 if (GET_MODE (target) != V16QImode)
21702 target = gen_reg_rtx (V16QImode);
21703 emit_insn (gen_xop_pperm (target, op0, op1, mask));
21704 if (target != operands[0])
21705 emit_move_insn (operands[0],
21706 gen_lowpart (GET_MODE (operands[0]), target));
21707 }
21708 else if (one_operand_shuffle)
21709 {
21710 if (GET_MODE (target) != V16QImode)
21711 target = gen_reg_rtx (V16QImode);
21712 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
21713 if (target != operands[0])
21714 emit_move_insn (operands[0],
21715 gen_lowpart (GET_MODE (operands[0]), target));
21716 }
21717 else
21718 {
21719 rtx xops[6];
21720 bool ok;
21721
21722 /* Shuffle the two input vectors independently. */
21723 t1 = gen_reg_rtx (V16QImode);
21724 t2 = gen_reg_rtx (V16QImode);
21725 emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
21726 emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
21727
21728 merge_two:
21729 /* Then merge them together. The key is whether any given control
21730 element contained a bit set that indicates the second word. */
21731 mask = operands[3];
21732 vt = GEN_INT (w);
21733 if (maskmode == V2DImode && !TARGET_SSE4_1)
21734 {
21735 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
21736 more shuffle to convert the V2DI input mask into a V4SI
21737 input mask. At which point the masking that expand_int_vcond
21738 will work as desired. */
21739 rtx t3 = gen_reg_rtx (V4SImode);
21740 emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
21741 const0_rtx, const0_rtx,
21742 const2_rtx, const2_rtx));
21743 mask = t3;
21744 maskmode = V4SImode;
21745 e = w = 4;
21746 }
21747
21748 for (i = 0; i < w; i++)
21749 vec[i] = vt;
21750 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21751 vt = force_reg (maskmode, vt);
21752 mask = expand_simple_binop (maskmode, AND, mask, vt,
21753 NULL_RTX, 0, OPTAB_DIRECT);
21754
21755 if (GET_MODE (target) != mode)
21756 target = gen_reg_rtx (mode);
21757 xops[0] = target;
21758 xops[1] = gen_lowpart (mode, t2);
21759 xops[2] = gen_lowpart (mode, t1);
21760 xops[3] = gen_rtx_EQ (maskmode, mask, vt);
21761 xops[4] = mask;
21762 xops[5] = vt;
21763 ok = ix86_expand_int_vcond (xops);
21764 gcc_assert (ok);
21765 if (target != operands[0])
21766 emit_move_insn (operands[0],
21767 gen_lowpart (GET_MODE (operands[0]), target));
21768 }
21769 }
21770
21771 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
21772 true if we should do zero extension, else sign extension. HIGH_P is
21773 true if we want the N/2 high elements, else the low elements. */
21774
21775 void
21776 ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
21777 {
21778 enum machine_mode imode = GET_MODE (src);
21779 rtx tmp;
21780
21781 if (TARGET_SSE4_1)
21782 {
21783 rtx (*unpack)(rtx, rtx);
21784 rtx (*extract)(rtx, rtx) = NULL;
21785 enum machine_mode halfmode = BLKmode;
21786
21787 switch (imode)
21788 {
21789 case V32QImode:
21790 if (unsigned_p)
21791 unpack = gen_avx2_zero_extendv16qiv16hi2;
21792 else
21793 unpack = gen_avx2_sign_extendv16qiv16hi2;
21794 halfmode = V16QImode;
21795 extract
21796 = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
21797 break;
21798 case V32HImode:
21799 if (unsigned_p)
21800 unpack = gen_avx512f_zero_extendv16hiv16si2;
21801 else
21802 unpack = gen_avx512f_sign_extendv16hiv16si2;
21803 halfmode = V16HImode;
21804 extract
21805 = high_p ? gen_vec_extract_hi_v32hi : gen_vec_extract_lo_v32hi;
21806 break;
21807 case V16HImode:
21808 if (unsigned_p)
21809 unpack = gen_avx2_zero_extendv8hiv8si2;
21810 else
21811 unpack = gen_avx2_sign_extendv8hiv8si2;
21812 halfmode = V8HImode;
21813 extract
21814 = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
21815 break;
21816 case V16SImode:
21817 if (unsigned_p)
21818 unpack = gen_avx512f_zero_extendv8siv8di2;
21819 else
21820 unpack = gen_avx512f_sign_extendv8siv8di2;
21821 halfmode = V8SImode;
21822 extract
21823 = high_p ? gen_vec_extract_hi_v16si : gen_vec_extract_lo_v16si;
21824 break;
21825 case V8SImode:
21826 if (unsigned_p)
21827 unpack = gen_avx2_zero_extendv4siv4di2;
21828 else
21829 unpack = gen_avx2_sign_extendv4siv4di2;
21830 halfmode = V4SImode;
21831 extract
21832 = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si;
21833 break;
21834 case V16QImode:
21835 if (unsigned_p)
21836 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
21837 else
21838 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
21839 break;
21840 case V8HImode:
21841 if (unsigned_p)
21842 unpack = gen_sse4_1_zero_extendv4hiv4si2;
21843 else
21844 unpack = gen_sse4_1_sign_extendv4hiv4si2;
21845 break;
21846 case V4SImode:
21847 if (unsigned_p)
21848 unpack = gen_sse4_1_zero_extendv2siv2di2;
21849 else
21850 unpack = gen_sse4_1_sign_extendv2siv2di2;
21851 break;
21852 default:
21853 gcc_unreachable ();
21854 }
21855
21856 if (GET_MODE_SIZE (imode) >= 32)
21857 {
21858 tmp = gen_reg_rtx (halfmode);
21859 emit_insn (extract (tmp, src));
21860 }
21861 else if (high_p)
21862 {
21863 /* Shift higher 8 bytes to lower 8 bytes. */
21864 tmp = gen_reg_rtx (V1TImode);
21865 emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, src),
21866 GEN_INT (64)));
21867 tmp = gen_lowpart (imode, tmp);
21868 }
21869 else
21870 tmp = src;
21871
21872 emit_insn (unpack (dest, tmp));
21873 }
21874 else
21875 {
21876 rtx (*unpack)(rtx, rtx, rtx);
21877
21878 switch (imode)
21879 {
21880 case V16QImode:
21881 if (high_p)
21882 unpack = gen_vec_interleave_highv16qi;
21883 else
21884 unpack = gen_vec_interleave_lowv16qi;
21885 break;
21886 case V8HImode:
21887 if (high_p)
21888 unpack = gen_vec_interleave_highv8hi;
21889 else
21890 unpack = gen_vec_interleave_lowv8hi;
21891 break;
21892 case V4SImode:
21893 if (high_p)
21894 unpack = gen_vec_interleave_highv4si;
21895 else
21896 unpack = gen_vec_interleave_lowv4si;
21897 break;
21898 default:
21899 gcc_unreachable ();
21900 }
21901
21902 if (unsigned_p)
21903 tmp = force_reg (imode, CONST0_RTX (imode));
21904 else
21905 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
21906 src, pc_rtx, pc_rtx);
21907
21908 rtx tmp2 = gen_reg_rtx (imode);
21909 emit_insn (unpack (tmp2, src, tmp));
21910 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), tmp2));
21911 }
21912 }
21913
21914 /* Expand conditional increment or decrement using adb/sbb instructions.
21915 The default case using setcc followed by the conditional move can be
21916 done by generic code. */
21917 bool
21918 ix86_expand_int_addcc (rtx operands[])
21919 {
21920 enum rtx_code code = GET_CODE (operands[1]);
21921 rtx flags;
21922 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
21923 rtx compare_op;
21924 rtx val = const0_rtx;
21925 bool fpcmp = false;
21926 enum machine_mode mode;
21927 rtx op0 = XEXP (operands[1], 0);
21928 rtx op1 = XEXP (operands[1], 1);
21929
21930 if (operands[3] != const1_rtx
21931 && operands[3] != constm1_rtx)
21932 return false;
21933 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
21934 return false;
21935 code = GET_CODE (compare_op);
21936
21937 flags = XEXP (compare_op, 0);
21938
21939 if (GET_MODE (flags) == CCFPmode
21940 || GET_MODE (flags) == CCFPUmode)
21941 {
21942 fpcmp = true;
21943 code = ix86_fp_compare_code_to_integer (code);
21944 }
21945
21946 if (code != LTU)
21947 {
21948 val = constm1_rtx;
21949 if (fpcmp)
21950 PUT_CODE (compare_op,
21951 reverse_condition_maybe_unordered
21952 (GET_CODE (compare_op)));
21953 else
21954 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
21955 }
21956
21957 mode = GET_MODE (operands[0]);
21958
21959 /* Construct either adc or sbb insn. */
21960 if ((code == LTU) == (operands[3] == constm1_rtx))
21961 {
21962 switch (mode)
21963 {
21964 case QImode:
21965 insn = gen_subqi3_carry;
21966 break;
21967 case HImode:
21968 insn = gen_subhi3_carry;
21969 break;
21970 case SImode:
21971 insn = gen_subsi3_carry;
21972 break;
21973 case DImode:
21974 insn = gen_subdi3_carry;
21975 break;
21976 default:
21977 gcc_unreachable ();
21978 }
21979 }
21980 else
21981 {
21982 switch (mode)
21983 {
21984 case QImode:
21985 insn = gen_addqi3_carry;
21986 break;
21987 case HImode:
21988 insn = gen_addhi3_carry;
21989 break;
21990 case SImode:
21991 insn = gen_addsi3_carry;
21992 break;
21993 case DImode:
21994 insn = gen_adddi3_carry;
21995 break;
21996 default:
21997 gcc_unreachable ();
21998 }
21999 }
22000 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
22001
22002 return true;
22003 }
22004
22005
22006 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
22007 but works for floating pointer parameters and nonoffsetable memories.
22008 For pushes, it returns just stack offsets; the values will be saved
22009 in the right order. Maximally three parts are generated. */
22010
22011 static int
22012 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
22013 {
22014 int size;
22015
22016 if (!TARGET_64BIT)
22017 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
22018 else
22019 size = (GET_MODE_SIZE (mode) + 4) / 8;
22020
22021 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
22022 gcc_assert (size >= 2 && size <= 4);
22023
22024 /* Optimize constant pool reference to immediates. This is used by fp
22025 moves, that force all constants to memory to allow combining. */
22026 if (MEM_P (operand) && MEM_READONLY_P (operand))
22027 {
22028 rtx tmp = maybe_get_pool_constant (operand);
22029 if (tmp)
22030 operand = tmp;
22031 }
22032
22033 if (MEM_P (operand) && !offsettable_memref_p (operand))
22034 {
22035 /* The only non-offsetable memories we handle are pushes. */
22036 int ok = push_operand (operand, VOIDmode);
22037
22038 gcc_assert (ok);
22039
22040 operand = copy_rtx (operand);
22041 PUT_MODE (operand, word_mode);
22042 parts[0] = parts[1] = parts[2] = parts[3] = operand;
22043 return size;
22044 }
22045
22046 if (GET_CODE (operand) == CONST_VECTOR)
22047 {
22048 enum machine_mode imode = int_mode_for_mode (mode);
22049 /* Caution: if we looked through a constant pool memory above,
22050 the operand may actually have a different mode now. That's
22051 ok, since we want to pun this all the way back to an integer. */
22052 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
22053 gcc_assert (operand != NULL);
22054 mode = imode;
22055 }
22056
22057 if (!TARGET_64BIT)
22058 {
22059 if (mode == DImode)
22060 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22061 else
22062 {
22063 int i;
22064
22065 if (REG_P (operand))
22066 {
22067 gcc_assert (reload_completed);
22068 for (i = 0; i < size; i++)
22069 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
22070 }
22071 else if (offsettable_memref_p (operand))
22072 {
22073 operand = adjust_address (operand, SImode, 0);
22074 parts[0] = operand;
22075 for (i = 1; i < size; i++)
22076 parts[i] = adjust_address (operand, SImode, 4 * i);
22077 }
22078 else if (GET_CODE (operand) == CONST_DOUBLE)
22079 {
22080 REAL_VALUE_TYPE r;
22081 long l[4];
22082
22083 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22084 switch (mode)
22085 {
22086 case TFmode:
22087 real_to_target (l, &r, mode);
22088 parts[3] = gen_int_mode (l[3], SImode);
22089 parts[2] = gen_int_mode (l[2], SImode);
22090 break;
22091 case XFmode:
22092 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
22093 long double may not be 80-bit. */
22094 real_to_target (l, &r, mode);
22095 parts[2] = gen_int_mode (l[2], SImode);
22096 break;
22097 case DFmode:
22098 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
22099 break;
22100 default:
22101 gcc_unreachable ();
22102 }
22103 parts[1] = gen_int_mode (l[1], SImode);
22104 parts[0] = gen_int_mode (l[0], SImode);
22105 }
22106 else
22107 gcc_unreachable ();
22108 }
22109 }
22110 else
22111 {
22112 if (mode == TImode)
22113 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22114 if (mode == XFmode || mode == TFmode)
22115 {
22116 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
22117 if (REG_P (operand))
22118 {
22119 gcc_assert (reload_completed);
22120 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
22121 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
22122 }
22123 else if (offsettable_memref_p (operand))
22124 {
22125 operand = adjust_address (operand, DImode, 0);
22126 parts[0] = operand;
22127 parts[1] = adjust_address (operand, upper_mode, 8);
22128 }
22129 else if (GET_CODE (operand) == CONST_DOUBLE)
22130 {
22131 REAL_VALUE_TYPE r;
22132 long l[4];
22133
22134 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22135 real_to_target (l, &r, mode);
22136
22137 /* Do not use shift by 32 to avoid warning on 32bit systems. */
22138 if (HOST_BITS_PER_WIDE_INT >= 64)
22139 parts[0]
22140 = gen_int_mode
22141 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
22142 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
22143 DImode);
22144 else
22145 parts[0] = immed_double_const (l[0], l[1], DImode);
22146
22147 if (upper_mode == SImode)
22148 parts[1] = gen_int_mode (l[2], SImode);
22149 else if (HOST_BITS_PER_WIDE_INT >= 64)
22150 parts[1]
22151 = gen_int_mode
22152 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
22153 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
22154 DImode);
22155 else
22156 parts[1] = immed_double_const (l[2], l[3], DImode);
22157 }
22158 else
22159 gcc_unreachable ();
22160 }
22161 }
22162
22163 return size;
22164 }
22165
22166 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
22167 Return false when normal moves are needed; true when all required
22168 insns have been emitted. Operands 2-4 contain the input values
22169 int the correct order; operands 5-7 contain the output values. */
22170
22171 void
22172 ix86_split_long_move (rtx operands[])
22173 {
22174 rtx part[2][4];
22175 int nparts, i, j;
22176 int push = 0;
22177 int collisions = 0;
22178 enum machine_mode mode = GET_MODE (operands[0]);
22179 bool collisionparts[4];
22180
22181 /* The DFmode expanders may ask us to move double.
22182 For 64bit target this is single move. By hiding the fact
22183 here we simplify i386.md splitters. */
22184 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
22185 {
22186 /* Optimize constant pool reference to immediates. This is used by
22187 fp moves, that force all constants to memory to allow combining. */
22188
22189 if (MEM_P (operands[1])
22190 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
22191 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
22192 operands[1] = get_pool_constant (XEXP (operands[1], 0));
22193 if (push_operand (operands[0], VOIDmode))
22194 {
22195 operands[0] = copy_rtx (operands[0]);
22196 PUT_MODE (operands[0], word_mode);
22197 }
22198 else
22199 operands[0] = gen_lowpart (DImode, operands[0]);
22200 operands[1] = gen_lowpart (DImode, operands[1]);
22201 emit_move_insn (operands[0], operands[1]);
22202 return;
22203 }
22204
22205 /* The only non-offsettable memory we handle is push. */
22206 if (push_operand (operands[0], VOIDmode))
22207 push = 1;
22208 else
22209 gcc_assert (!MEM_P (operands[0])
22210 || offsettable_memref_p (operands[0]));
22211
22212 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
22213 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
22214
22215 /* When emitting push, take care for source operands on the stack. */
22216 if (push && MEM_P (operands[1])
22217 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
22218 {
22219 rtx src_base = XEXP (part[1][nparts - 1], 0);
22220
22221 /* Compensate for the stack decrement by 4. */
22222 if (!TARGET_64BIT && nparts == 3
22223 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
22224 src_base = plus_constant (Pmode, src_base, 4);
22225
22226 /* src_base refers to the stack pointer and is
22227 automatically decreased by emitted push. */
22228 for (i = 0; i < nparts; i++)
22229 part[1][i] = change_address (part[1][i],
22230 GET_MODE (part[1][i]), src_base);
22231 }
22232
22233 /* We need to do copy in the right order in case an address register
22234 of the source overlaps the destination. */
22235 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
22236 {
22237 rtx tmp;
22238
22239 for (i = 0; i < nparts; i++)
22240 {
22241 collisionparts[i]
22242 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
22243 if (collisionparts[i])
22244 collisions++;
22245 }
22246
22247 /* Collision in the middle part can be handled by reordering. */
22248 if (collisions == 1 && nparts == 3 && collisionparts [1])
22249 {
22250 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
22251 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
22252 }
22253 else if (collisions == 1
22254 && nparts == 4
22255 && (collisionparts [1] || collisionparts [2]))
22256 {
22257 if (collisionparts [1])
22258 {
22259 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
22260 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
22261 }
22262 else
22263 {
22264 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
22265 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
22266 }
22267 }
22268
22269 /* If there are more collisions, we can't handle it by reordering.
22270 Do an lea to the last part and use only one colliding move. */
22271 else if (collisions > 1)
22272 {
22273 rtx base;
22274
22275 collisions = 1;
22276
22277 base = part[0][nparts - 1];
22278
22279 /* Handle the case when the last part isn't valid for lea.
22280 Happens in 64-bit mode storing the 12-byte XFmode. */
22281 if (GET_MODE (base) != Pmode)
22282 base = gen_rtx_REG (Pmode, REGNO (base));
22283
22284 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
22285 part[1][0] = replace_equiv_address (part[1][0], base);
22286 for (i = 1; i < nparts; i++)
22287 {
22288 tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i);
22289 part[1][i] = replace_equiv_address (part[1][i], tmp);
22290 }
22291 }
22292 }
22293
22294 if (push)
22295 {
22296 if (!TARGET_64BIT)
22297 {
22298 if (nparts == 3)
22299 {
22300 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
22301 emit_insn (ix86_gen_add3 (stack_pointer_rtx,
22302 stack_pointer_rtx, GEN_INT (-4)));
22303 emit_move_insn (part[0][2], part[1][2]);
22304 }
22305 else if (nparts == 4)
22306 {
22307 emit_move_insn (part[0][3], part[1][3]);
22308 emit_move_insn (part[0][2], part[1][2]);
22309 }
22310 }
22311 else
22312 {
22313 /* In 64bit mode we don't have 32bit push available. In case this is
22314 register, it is OK - we will just use larger counterpart. We also
22315 retype memory - these comes from attempt to avoid REX prefix on
22316 moving of second half of TFmode value. */
22317 if (GET_MODE (part[1][1]) == SImode)
22318 {
22319 switch (GET_CODE (part[1][1]))
22320 {
22321 case MEM:
22322 part[1][1] = adjust_address (part[1][1], DImode, 0);
22323 break;
22324
22325 case REG:
22326 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
22327 break;
22328
22329 default:
22330 gcc_unreachable ();
22331 }
22332
22333 if (GET_MODE (part[1][0]) == SImode)
22334 part[1][0] = part[1][1];
22335 }
22336 }
22337 emit_move_insn (part[0][1], part[1][1]);
22338 emit_move_insn (part[0][0], part[1][0]);
22339 return;
22340 }
22341
22342 /* Choose correct order to not overwrite the source before it is copied. */
22343 if ((REG_P (part[0][0])
22344 && REG_P (part[1][1])
22345 && (REGNO (part[0][0]) == REGNO (part[1][1])
22346 || (nparts == 3
22347 && REGNO (part[0][0]) == REGNO (part[1][2]))
22348 || (nparts == 4
22349 && REGNO (part[0][0]) == REGNO (part[1][3]))))
22350 || (collisions > 0
22351 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
22352 {
22353 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
22354 {
22355 operands[2 + i] = part[0][j];
22356 operands[6 + i] = part[1][j];
22357 }
22358 }
22359 else
22360 {
22361 for (i = 0; i < nparts; i++)
22362 {
22363 operands[2 + i] = part[0][i];
22364 operands[6 + i] = part[1][i];
22365 }
22366 }
22367
22368 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
22369 if (optimize_insn_for_size_p ())
22370 {
22371 for (j = 0; j < nparts - 1; j++)
22372 if (CONST_INT_P (operands[6 + j])
22373 && operands[6 + j] != const0_rtx
22374 && REG_P (operands[2 + j]))
22375 for (i = j; i < nparts - 1; i++)
22376 if (CONST_INT_P (operands[7 + i])
22377 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
22378 operands[7 + i] = operands[2 + j];
22379 }
22380
22381 for (i = 0; i < nparts; i++)
22382 emit_move_insn (operands[2 + i], operands[6 + i]);
22383
22384 return;
22385 }
22386
22387 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
22388 left shift by a constant, either using a single shift or
22389 a sequence of add instructions. */
22390
22391 static void
22392 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
22393 {
22394 rtx (*insn)(rtx, rtx, rtx);
22395
22396 if (count == 1
22397 || (count * ix86_cost->add <= ix86_cost->shift_const
22398 && !optimize_insn_for_size_p ()))
22399 {
22400 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
22401 while (count-- > 0)
22402 emit_insn (insn (operand, operand, operand));
22403 }
22404 else
22405 {
22406 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
22407 emit_insn (insn (operand, operand, GEN_INT (count)));
22408 }
22409 }
22410
22411 void
22412 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
22413 {
22414 rtx (*gen_ashl3)(rtx, rtx, rtx);
22415 rtx (*gen_shld)(rtx, rtx, rtx);
22416 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22417
22418 rtx low[2], high[2];
22419 int count;
22420
22421 if (CONST_INT_P (operands[2]))
22422 {
22423 split_double_mode (mode, operands, 2, low, high);
22424 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22425
22426 if (count >= half_width)
22427 {
22428 emit_move_insn (high[0], low[1]);
22429 emit_move_insn (low[0], const0_rtx);
22430
22431 if (count > half_width)
22432 ix86_expand_ashl_const (high[0], count - half_width, mode);
22433 }
22434 else
22435 {
22436 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
22437
22438 if (!rtx_equal_p (operands[0], operands[1]))
22439 emit_move_insn (operands[0], operands[1]);
22440
22441 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
22442 ix86_expand_ashl_const (low[0], count, mode);
22443 }
22444 return;
22445 }
22446
22447 split_double_mode (mode, operands, 1, low, high);
22448
22449 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
22450
22451 if (operands[1] == const1_rtx)
22452 {
22453 /* Assuming we've chosen a QImode capable registers, then 1 << N
22454 can be done with two 32/64-bit shifts, no branches, no cmoves. */
22455 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
22456 {
22457 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
22458
22459 ix86_expand_clear (low[0]);
22460 ix86_expand_clear (high[0]);
22461 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
22462
22463 d = gen_lowpart (QImode, low[0]);
22464 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
22465 s = gen_rtx_EQ (QImode, flags, const0_rtx);
22466 emit_insn (gen_rtx_SET (VOIDmode, d, s));
22467
22468 d = gen_lowpart (QImode, high[0]);
22469 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
22470 s = gen_rtx_NE (QImode, flags, const0_rtx);
22471 emit_insn (gen_rtx_SET (VOIDmode, d, s));
22472 }
22473
22474 /* Otherwise, we can get the same results by manually performing
22475 a bit extract operation on bit 5/6, and then performing the two
22476 shifts. The two methods of getting 0/1 into low/high are exactly
22477 the same size. Avoiding the shift in the bit extract case helps
22478 pentium4 a bit; no one else seems to care much either way. */
22479 else
22480 {
22481 enum machine_mode half_mode;
22482 rtx (*gen_lshr3)(rtx, rtx, rtx);
22483 rtx (*gen_and3)(rtx, rtx, rtx);
22484 rtx (*gen_xor3)(rtx, rtx, rtx);
22485 HOST_WIDE_INT bits;
22486 rtx x;
22487
22488 if (mode == DImode)
22489 {
22490 half_mode = SImode;
22491 gen_lshr3 = gen_lshrsi3;
22492 gen_and3 = gen_andsi3;
22493 gen_xor3 = gen_xorsi3;
22494 bits = 5;
22495 }
22496 else
22497 {
22498 half_mode = DImode;
22499 gen_lshr3 = gen_lshrdi3;
22500 gen_and3 = gen_anddi3;
22501 gen_xor3 = gen_xordi3;
22502 bits = 6;
22503 }
22504
22505 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
22506 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
22507 else
22508 x = gen_lowpart (half_mode, operands[2]);
22509 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
22510
22511 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
22512 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
22513 emit_move_insn (low[0], high[0]);
22514 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
22515 }
22516
22517 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
22518 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
22519 return;
22520 }
22521
22522 if (operands[1] == constm1_rtx)
22523 {
22524 /* For -1 << N, we can avoid the shld instruction, because we
22525 know that we're shifting 0...31/63 ones into a -1. */
22526 emit_move_insn (low[0], constm1_rtx);
22527 if (optimize_insn_for_size_p ())
22528 emit_move_insn (high[0], low[0]);
22529 else
22530 emit_move_insn (high[0], constm1_rtx);
22531 }
22532 else
22533 {
22534 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
22535
22536 if (!rtx_equal_p (operands[0], operands[1]))
22537 emit_move_insn (operands[0], operands[1]);
22538
22539 split_double_mode (mode, operands, 1, low, high);
22540 emit_insn (gen_shld (high[0], low[0], operands[2]));
22541 }
22542
22543 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
22544
22545 if (TARGET_CMOVE && scratch)
22546 {
22547 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22548 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22549
22550 ix86_expand_clear (scratch);
22551 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
22552 }
22553 else
22554 {
22555 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
22556 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
22557
22558 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
22559 }
22560 }
22561
22562 void
22563 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
22564 {
22565 rtx (*gen_ashr3)(rtx, rtx, rtx)
22566 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
22567 rtx (*gen_shrd)(rtx, rtx, rtx);
22568 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22569
22570 rtx low[2], high[2];
22571 int count;
22572
22573 if (CONST_INT_P (operands[2]))
22574 {
22575 split_double_mode (mode, operands, 2, low, high);
22576 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22577
22578 if (count == GET_MODE_BITSIZE (mode) - 1)
22579 {
22580 emit_move_insn (high[0], high[1]);
22581 emit_insn (gen_ashr3 (high[0], high[0],
22582 GEN_INT (half_width - 1)));
22583 emit_move_insn (low[0], high[0]);
22584
22585 }
22586 else if (count >= half_width)
22587 {
22588 emit_move_insn (low[0], high[1]);
22589 emit_move_insn (high[0], low[0]);
22590 emit_insn (gen_ashr3 (high[0], high[0],
22591 GEN_INT (half_width - 1)));
22592
22593 if (count > half_width)
22594 emit_insn (gen_ashr3 (low[0], low[0],
22595 GEN_INT (count - half_width)));
22596 }
22597 else
22598 {
22599 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22600
22601 if (!rtx_equal_p (operands[0], operands[1]))
22602 emit_move_insn (operands[0], operands[1]);
22603
22604 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
22605 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
22606 }
22607 }
22608 else
22609 {
22610 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22611
22612 if (!rtx_equal_p (operands[0], operands[1]))
22613 emit_move_insn (operands[0], operands[1]);
22614
22615 split_double_mode (mode, operands, 1, low, high);
22616
22617 emit_insn (gen_shrd (low[0], high[0], operands[2]));
22618 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
22619
22620 if (TARGET_CMOVE && scratch)
22621 {
22622 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22623 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22624
22625 emit_move_insn (scratch, high[0]);
22626 emit_insn (gen_ashr3 (scratch, scratch,
22627 GEN_INT (half_width - 1)));
22628 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
22629 scratch));
22630 }
22631 else
22632 {
22633 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
22634 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
22635
22636 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
22637 }
22638 }
22639 }
22640
22641 void
22642 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
22643 {
22644 rtx (*gen_lshr3)(rtx, rtx, rtx)
22645 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
22646 rtx (*gen_shrd)(rtx, rtx, rtx);
22647 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22648
22649 rtx low[2], high[2];
22650 int count;
22651
22652 if (CONST_INT_P (operands[2]))
22653 {
22654 split_double_mode (mode, operands, 2, low, high);
22655 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22656
22657 if (count >= half_width)
22658 {
22659 emit_move_insn (low[0], high[1]);
22660 ix86_expand_clear (high[0]);
22661
22662 if (count > half_width)
22663 emit_insn (gen_lshr3 (low[0], low[0],
22664 GEN_INT (count - half_width)));
22665 }
22666 else
22667 {
22668 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22669
22670 if (!rtx_equal_p (operands[0], operands[1]))
22671 emit_move_insn (operands[0], operands[1]);
22672
22673 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
22674 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
22675 }
22676 }
22677 else
22678 {
22679 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22680
22681 if (!rtx_equal_p (operands[0], operands[1]))
22682 emit_move_insn (operands[0], operands[1]);
22683
22684 split_double_mode (mode, operands, 1, low, high);
22685
22686 emit_insn (gen_shrd (low[0], high[0], operands[2]));
22687 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
22688
22689 if (TARGET_CMOVE && scratch)
22690 {
22691 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22692 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22693
22694 ix86_expand_clear (scratch);
22695 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
22696 scratch));
22697 }
22698 else
22699 {
22700 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
22701 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
22702
22703 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
22704 }
22705 }
22706 }
22707
22708 /* Predict just emitted jump instruction to be taken with probability PROB. */
22709 static void
22710 predict_jump (int prob)
22711 {
22712 rtx insn = get_last_insn ();
22713 gcc_assert (JUMP_P (insn));
22714 add_int_reg_note (insn, REG_BR_PROB, prob);
22715 }
22716
22717 /* Helper function for the string operations below. Dest VARIABLE whether
22718 it is aligned to VALUE bytes. If true, jump to the label. */
22719 static rtx_code_label *
22720 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
22721 {
22722 rtx_code_label *label = gen_label_rtx ();
22723 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
22724 if (GET_MODE (variable) == DImode)
22725 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
22726 else
22727 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
22728 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
22729 1, label);
22730 if (epilogue)
22731 predict_jump (REG_BR_PROB_BASE * 50 / 100);
22732 else
22733 predict_jump (REG_BR_PROB_BASE * 90 / 100);
22734 return label;
22735 }
22736
22737 /* Adjust COUNTER by the VALUE. */
22738 static void
22739 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
22740 {
22741 rtx (*gen_add)(rtx, rtx, rtx)
22742 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
22743
22744 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
22745 }
22746
22747 /* Zero extend possibly SImode EXP to Pmode register. */
22748 rtx
22749 ix86_zero_extend_to_Pmode (rtx exp)
22750 {
22751 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
22752 }
22753
22754 /* Divide COUNTREG by SCALE. */
22755 static rtx
22756 scale_counter (rtx countreg, int scale)
22757 {
22758 rtx sc;
22759
22760 if (scale == 1)
22761 return countreg;
22762 if (CONST_INT_P (countreg))
22763 return GEN_INT (INTVAL (countreg) / scale);
22764 gcc_assert (REG_P (countreg));
22765
22766 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
22767 GEN_INT (exact_log2 (scale)),
22768 NULL, 1, OPTAB_DIRECT);
22769 return sc;
22770 }
22771
22772 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
22773 DImode for constant loop counts. */
22774
22775 static enum machine_mode
22776 counter_mode (rtx count_exp)
22777 {
22778 if (GET_MODE (count_exp) != VOIDmode)
22779 return GET_MODE (count_exp);
22780 if (!CONST_INT_P (count_exp))
22781 return Pmode;
22782 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
22783 return DImode;
22784 return SImode;
22785 }
22786
22787 /* Copy the address to a Pmode register. This is used for x32 to
22788 truncate DImode TLS address to a SImode register. */
22789
22790 static rtx
22791 ix86_copy_addr_to_reg (rtx addr)
22792 {
22793 if (GET_MODE (addr) == Pmode || GET_MODE (addr) == VOIDmode)
22794 return copy_addr_to_reg (addr);
22795 else
22796 {
22797 gcc_assert (GET_MODE (addr) == DImode && Pmode == SImode);
22798 return gen_rtx_SUBREG (SImode, copy_to_mode_reg (DImode, addr), 0);
22799 }
22800 }
22801
22802 /* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR
22803 to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT
22804 specified in bytes. When ISSETMEM is TRUE, output the equivalent loop to set
22805 memory by VALUE (supposed to be in MODE).
22806
22807 The size is rounded down to whole number of chunk size moved at once.
22808 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
22809
22810
22811 static void
22812 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
22813 rtx destptr, rtx srcptr, rtx value,
22814 rtx count, enum machine_mode mode, int unroll,
22815 int expected_size, bool issetmem)
22816 {
22817 rtx_code_label *out_label, *top_label;
22818 rtx iter, tmp;
22819 enum machine_mode iter_mode = counter_mode (count);
22820 int piece_size_n = GET_MODE_SIZE (mode) * unroll;
22821 rtx piece_size = GEN_INT (piece_size_n);
22822 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
22823 rtx size;
22824 int i;
22825
22826 top_label = gen_label_rtx ();
22827 out_label = gen_label_rtx ();
22828 iter = gen_reg_rtx (iter_mode);
22829
22830 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
22831 NULL, 1, OPTAB_DIRECT);
22832 /* Those two should combine. */
22833 if (piece_size == const1_rtx)
22834 {
22835 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
22836 true, out_label);
22837 predict_jump (REG_BR_PROB_BASE * 10 / 100);
22838 }
22839 emit_move_insn (iter, const0_rtx);
22840
22841 emit_label (top_label);
22842
22843 tmp = convert_modes (Pmode, iter_mode, iter, true);
22844
22845 /* This assert could be relaxed - in this case we'll need to compute
22846 smallest power of two, containing in PIECE_SIZE_N and pass it to
22847 offset_address. */
22848 gcc_assert ((piece_size_n & (piece_size_n - 1)) == 0);
22849 destmem = offset_address (destmem, tmp, piece_size_n);
22850 destmem = adjust_address (destmem, mode, 0);
22851
22852 if (!issetmem)
22853 {
22854 srcmem = offset_address (srcmem, copy_rtx (tmp), piece_size_n);
22855 srcmem = adjust_address (srcmem, mode, 0);
22856
22857 /* When unrolling for chips that reorder memory reads and writes,
22858 we can save registers by using single temporary.
22859 Also using 4 temporaries is overkill in 32bit mode. */
22860 if (!TARGET_64BIT && 0)
22861 {
22862 for (i = 0; i < unroll; i++)
22863 {
22864 if (i)
22865 {
22866 destmem =
22867 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
22868 srcmem =
22869 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
22870 }
22871 emit_move_insn (destmem, srcmem);
22872 }
22873 }
22874 else
22875 {
22876 rtx tmpreg[4];
22877 gcc_assert (unroll <= 4);
22878 for (i = 0; i < unroll; i++)
22879 {
22880 tmpreg[i] = gen_reg_rtx (mode);
22881 if (i)
22882 {
22883 srcmem =
22884 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
22885 }
22886 emit_move_insn (tmpreg[i], srcmem);
22887 }
22888 for (i = 0; i < unroll; i++)
22889 {
22890 if (i)
22891 {
22892 destmem =
22893 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
22894 }
22895 emit_move_insn (destmem, tmpreg[i]);
22896 }
22897 }
22898 }
22899 else
22900 for (i = 0; i < unroll; i++)
22901 {
22902 if (i)
22903 destmem =
22904 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
22905 emit_move_insn (destmem, value);
22906 }
22907
22908 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
22909 true, OPTAB_LIB_WIDEN);
22910 if (tmp != iter)
22911 emit_move_insn (iter, tmp);
22912
22913 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
22914 true, top_label);
22915 if (expected_size != -1)
22916 {
22917 expected_size /= GET_MODE_SIZE (mode) * unroll;
22918 if (expected_size == 0)
22919 predict_jump (0);
22920 else if (expected_size > REG_BR_PROB_BASE)
22921 predict_jump (REG_BR_PROB_BASE - 1);
22922 else
22923 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
22924 }
22925 else
22926 predict_jump (REG_BR_PROB_BASE * 80 / 100);
22927 iter = ix86_zero_extend_to_Pmode (iter);
22928 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
22929 true, OPTAB_LIB_WIDEN);
22930 if (tmp != destptr)
22931 emit_move_insn (destptr, tmp);
22932 if (!issetmem)
22933 {
22934 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
22935 true, OPTAB_LIB_WIDEN);
22936 if (tmp != srcptr)
22937 emit_move_insn (srcptr, tmp);
22938 }
22939 emit_label (out_label);
22940 }
22941
22942 /* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument.
22943 When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored.
22944 When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored.
22945 For setmem case, VALUE is a promoted to a wider size ORIG_VALUE.
22946 ORIG_VALUE is the original value passed to memset to fill the memory with.
22947 Other arguments have same meaning as for previous function. */
22948
22949 static void
22950 expand_set_or_movmem_via_rep (rtx destmem, rtx srcmem,
22951 rtx destptr, rtx srcptr, rtx value, rtx orig_value,
22952 rtx count,
22953 enum machine_mode mode, bool issetmem)
22954 {
22955 rtx destexp;
22956 rtx srcexp;
22957 rtx countreg;
22958 HOST_WIDE_INT rounded_count;
22959
22960 /* If possible, it is shorter to use rep movs.
22961 TODO: Maybe it is better to move this logic to decide_alg. */
22962 if (mode == QImode && CONST_INT_P (count) && !(INTVAL (count) & 3)
22963 && (!issetmem || orig_value == const0_rtx))
22964 mode = SImode;
22965
22966 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
22967 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
22968
22969 countreg = ix86_zero_extend_to_Pmode (scale_counter (count,
22970 GET_MODE_SIZE (mode)));
22971 if (mode != QImode)
22972 {
22973 destexp = gen_rtx_ASHIFT (Pmode, countreg,
22974 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
22975 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
22976 }
22977 else
22978 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
22979 if ((!issetmem || orig_value == const0_rtx) && CONST_INT_P (count))
22980 {
22981 rounded_count = (INTVAL (count)
22982 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
22983 destmem = shallow_copy_rtx (destmem);
22984 set_mem_size (destmem, rounded_count);
22985 }
22986 else if (MEM_SIZE_KNOWN_P (destmem))
22987 clear_mem_size (destmem);
22988
22989 if (issetmem)
22990 {
22991 value = force_reg (mode, gen_lowpart (mode, value));
22992 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
22993 }
22994 else
22995 {
22996 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
22997 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
22998 if (mode != QImode)
22999 {
23000 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
23001 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
23002 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
23003 }
23004 else
23005 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
23006 if (CONST_INT_P (count))
23007 {
23008 rounded_count = (INTVAL (count)
23009 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23010 srcmem = shallow_copy_rtx (srcmem);
23011 set_mem_size (srcmem, rounded_count);
23012 }
23013 else
23014 {
23015 if (MEM_SIZE_KNOWN_P (srcmem))
23016 clear_mem_size (srcmem);
23017 }
23018 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
23019 destexp, srcexp));
23020 }
23021 }
23022
23023 /* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to
23024 DESTMEM.
23025 SRC is passed by pointer to be updated on return.
23026 Return value is updated DST. */
23027 static rtx
23028 emit_memmov (rtx destmem, rtx *srcmem, rtx destptr, rtx srcptr,
23029 HOST_WIDE_INT size_to_move)
23030 {
23031 rtx dst = destmem, src = *srcmem, adjust, tempreg;
23032 enum insn_code code;
23033 enum machine_mode move_mode;
23034 int piece_size, i;
23035
23036 /* Find the widest mode in which we could perform moves.
23037 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23038 it until move of such size is supported. */
23039 piece_size = 1 << floor_log2 (size_to_move);
23040 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23041 code = optab_handler (mov_optab, move_mode);
23042 while (code == CODE_FOR_nothing && piece_size > 1)
23043 {
23044 piece_size >>= 1;
23045 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23046 code = optab_handler (mov_optab, move_mode);
23047 }
23048
23049 /* Find the corresponding vector mode with the same size as MOVE_MODE.
23050 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
23051 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
23052 {
23053 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
23054 move_mode = mode_for_vector (word_mode, nunits);
23055 code = optab_handler (mov_optab, move_mode);
23056 if (code == CODE_FOR_nothing)
23057 {
23058 move_mode = word_mode;
23059 piece_size = GET_MODE_SIZE (move_mode);
23060 code = optab_handler (mov_optab, move_mode);
23061 }
23062 }
23063 gcc_assert (code != CODE_FOR_nothing);
23064
23065 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23066 src = adjust_automodify_address_nv (src, move_mode, srcptr, 0);
23067
23068 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23069 gcc_assert (size_to_move % piece_size == 0);
23070 adjust = GEN_INT (piece_size);
23071 for (i = 0; i < size_to_move; i += piece_size)
23072 {
23073 /* We move from memory to memory, so we'll need to do it via
23074 a temporary register. */
23075 tempreg = gen_reg_rtx (move_mode);
23076 emit_insn (GEN_FCN (code) (tempreg, src));
23077 emit_insn (GEN_FCN (code) (dst, tempreg));
23078
23079 emit_move_insn (destptr,
23080 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23081 emit_move_insn (srcptr,
23082 gen_rtx_PLUS (Pmode, copy_rtx (srcptr), adjust));
23083
23084 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23085 piece_size);
23086 src = adjust_automodify_address_nv (src, move_mode, srcptr,
23087 piece_size);
23088 }
23089
23090 /* Update DST and SRC rtx. */
23091 *srcmem = src;
23092 return dst;
23093 }
23094
23095 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
23096 static void
23097 expand_movmem_epilogue (rtx destmem, rtx srcmem,
23098 rtx destptr, rtx srcptr, rtx count, int max_size)
23099 {
23100 rtx src, dest;
23101 if (CONST_INT_P (count))
23102 {
23103 HOST_WIDE_INT countval = INTVAL (count);
23104 HOST_WIDE_INT epilogue_size = countval % max_size;
23105 int i;
23106
23107 /* For now MAX_SIZE should be a power of 2. This assert could be
23108 relaxed, but it'll require a bit more complicated epilogue
23109 expanding. */
23110 gcc_assert ((max_size & (max_size - 1)) == 0);
23111 for (i = max_size; i >= 1; i >>= 1)
23112 {
23113 if (epilogue_size & i)
23114 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23115 }
23116 return;
23117 }
23118 if (max_size > 8)
23119 {
23120 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
23121 count, 1, OPTAB_DIRECT);
23122 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
23123 count, QImode, 1, 4, false);
23124 return;
23125 }
23126
23127 /* When there are stringops, we can cheaply increase dest and src pointers.
23128 Otherwise we save code size by maintaining offset (zero is readily
23129 available from preceding rep operation) and using x86 addressing modes.
23130 */
23131 if (TARGET_SINGLE_STRINGOP)
23132 {
23133 if (max_size > 4)
23134 {
23135 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23136 src = change_address (srcmem, SImode, srcptr);
23137 dest = change_address (destmem, SImode, destptr);
23138 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23139 emit_label (label);
23140 LABEL_NUSES (label) = 1;
23141 }
23142 if (max_size > 2)
23143 {
23144 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23145 src = change_address (srcmem, HImode, srcptr);
23146 dest = change_address (destmem, HImode, destptr);
23147 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23148 emit_label (label);
23149 LABEL_NUSES (label) = 1;
23150 }
23151 if (max_size > 1)
23152 {
23153 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23154 src = change_address (srcmem, QImode, srcptr);
23155 dest = change_address (destmem, QImode, destptr);
23156 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23157 emit_label (label);
23158 LABEL_NUSES (label) = 1;
23159 }
23160 }
23161 else
23162 {
23163 rtx offset = force_reg (Pmode, const0_rtx);
23164 rtx tmp;
23165
23166 if (max_size > 4)
23167 {
23168 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23169 src = change_address (srcmem, SImode, srcptr);
23170 dest = change_address (destmem, SImode, destptr);
23171 emit_move_insn (dest, src);
23172 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
23173 true, OPTAB_LIB_WIDEN);
23174 if (tmp != offset)
23175 emit_move_insn (offset, tmp);
23176 emit_label (label);
23177 LABEL_NUSES (label) = 1;
23178 }
23179 if (max_size > 2)
23180 {
23181 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23182 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23183 src = change_address (srcmem, HImode, tmp);
23184 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23185 dest = change_address (destmem, HImode, tmp);
23186 emit_move_insn (dest, src);
23187 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
23188 true, OPTAB_LIB_WIDEN);
23189 if (tmp != offset)
23190 emit_move_insn (offset, tmp);
23191 emit_label (label);
23192 LABEL_NUSES (label) = 1;
23193 }
23194 if (max_size > 1)
23195 {
23196 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23197 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23198 src = change_address (srcmem, QImode, tmp);
23199 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23200 dest = change_address (destmem, QImode, tmp);
23201 emit_move_insn (dest, src);
23202 emit_label (label);
23203 LABEL_NUSES (label) = 1;
23204 }
23205 }
23206 }
23207
23208 /* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM
23209 with value PROMOTED_VAL.
23210 SRC is passed by pointer to be updated on return.
23211 Return value is updated DST. */
23212 static rtx
23213 emit_memset (rtx destmem, rtx destptr, rtx promoted_val,
23214 HOST_WIDE_INT size_to_move)
23215 {
23216 rtx dst = destmem, adjust;
23217 enum insn_code code;
23218 enum machine_mode move_mode;
23219 int piece_size, i;
23220
23221 /* Find the widest mode in which we could perform moves.
23222 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23223 it until move of such size is supported. */
23224 move_mode = GET_MODE (promoted_val);
23225 if (move_mode == VOIDmode)
23226 move_mode = QImode;
23227 if (size_to_move < GET_MODE_SIZE (move_mode))
23228 {
23229 move_mode = mode_for_size (size_to_move * BITS_PER_UNIT, MODE_INT, 0);
23230 promoted_val = gen_lowpart (move_mode, promoted_val);
23231 }
23232 piece_size = GET_MODE_SIZE (move_mode);
23233 code = optab_handler (mov_optab, move_mode);
23234 gcc_assert (code != CODE_FOR_nothing && promoted_val != NULL_RTX);
23235
23236 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23237
23238 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23239 gcc_assert (size_to_move % piece_size == 0);
23240 adjust = GEN_INT (piece_size);
23241 for (i = 0; i < size_to_move; i += piece_size)
23242 {
23243 if (piece_size <= GET_MODE_SIZE (word_mode))
23244 {
23245 emit_insn (gen_strset (destptr, dst, promoted_val));
23246 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23247 piece_size);
23248 continue;
23249 }
23250
23251 emit_insn (GEN_FCN (code) (dst, promoted_val));
23252
23253 emit_move_insn (destptr,
23254 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23255
23256 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23257 piece_size);
23258 }
23259
23260 /* Update DST rtx. */
23261 return dst;
23262 }
23263 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23264 static void
23265 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
23266 rtx count, int max_size)
23267 {
23268 count =
23269 expand_simple_binop (counter_mode (count), AND, count,
23270 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
23271 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
23272 gen_lowpart (QImode, value), count, QImode,
23273 1, max_size / 2, true);
23274 }
23275
23276 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23277 static void
23278 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value,
23279 rtx count, int max_size)
23280 {
23281 rtx dest;
23282
23283 if (CONST_INT_P (count))
23284 {
23285 HOST_WIDE_INT countval = INTVAL (count);
23286 HOST_WIDE_INT epilogue_size = countval % max_size;
23287 int i;
23288
23289 /* For now MAX_SIZE should be a power of 2. This assert could be
23290 relaxed, but it'll require a bit more complicated epilogue
23291 expanding. */
23292 gcc_assert ((max_size & (max_size - 1)) == 0);
23293 for (i = max_size; i >= 1; i >>= 1)
23294 {
23295 if (epilogue_size & i)
23296 {
23297 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23298 destmem = emit_memset (destmem, destptr, vec_value, i);
23299 else
23300 destmem = emit_memset (destmem, destptr, value, i);
23301 }
23302 }
23303 return;
23304 }
23305 if (max_size > 32)
23306 {
23307 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
23308 return;
23309 }
23310 if (max_size > 16)
23311 {
23312 rtx_code_label *label = ix86_expand_aligntest (count, 16, true);
23313 if (TARGET_64BIT)
23314 {
23315 dest = change_address (destmem, DImode, destptr);
23316 emit_insn (gen_strset (destptr, dest, value));
23317 dest = adjust_automodify_address_nv (dest, DImode, destptr, 8);
23318 emit_insn (gen_strset (destptr, dest, value));
23319 }
23320 else
23321 {
23322 dest = change_address (destmem, SImode, destptr);
23323 emit_insn (gen_strset (destptr, dest, value));
23324 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23325 emit_insn (gen_strset (destptr, dest, value));
23326 dest = adjust_automodify_address_nv (dest, SImode, destptr, 8);
23327 emit_insn (gen_strset (destptr, dest, value));
23328 dest = adjust_automodify_address_nv (dest, SImode, destptr, 12);
23329 emit_insn (gen_strset (destptr, dest, value));
23330 }
23331 emit_label (label);
23332 LABEL_NUSES (label) = 1;
23333 }
23334 if (max_size > 8)
23335 {
23336 rtx_code_label *label = ix86_expand_aligntest (count, 8, true);
23337 if (TARGET_64BIT)
23338 {
23339 dest = change_address (destmem, DImode, destptr);
23340 emit_insn (gen_strset (destptr, dest, value));
23341 }
23342 else
23343 {
23344 dest = change_address (destmem, SImode, destptr);
23345 emit_insn (gen_strset (destptr, dest, value));
23346 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23347 emit_insn (gen_strset (destptr, dest, value));
23348 }
23349 emit_label (label);
23350 LABEL_NUSES (label) = 1;
23351 }
23352 if (max_size > 4)
23353 {
23354 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23355 dest = change_address (destmem, SImode, destptr);
23356 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
23357 emit_label (label);
23358 LABEL_NUSES (label) = 1;
23359 }
23360 if (max_size > 2)
23361 {
23362 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23363 dest = change_address (destmem, HImode, destptr);
23364 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
23365 emit_label (label);
23366 LABEL_NUSES (label) = 1;
23367 }
23368 if (max_size > 1)
23369 {
23370 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23371 dest = change_address (destmem, QImode, destptr);
23372 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
23373 emit_label (label);
23374 LABEL_NUSES (label) = 1;
23375 }
23376 }
23377
23378 /* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to
23379 DESTMEM to align it to DESIRED_ALIGNMENT. Original alignment is ALIGN.
23380 Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are
23381 ignored.
23382 Return value is updated DESTMEM. */
23383 static rtx
23384 expand_set_or_movmem_prologue (rtx destmem, rtx srcmem,
23385 rtx destptr, rtx srcptr, rtx value,
23386 rtx vec_value, rtx count, int align,
23387 int desired_alignment, bool issetmem)
23388 {
23389 int i;
23390 for (i = 1; i < desired_alignment; i <<= 1)
23391 {
23392 if (align <= i)
23393 {
23394 rtx_code_label *label = ix86_expand_aligntest (destptr, i, false);
23395 if (issetmem)
23396 {
23397 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23398 destmem = emit_memset (destmem, destptr, vec_value, i);
23399 else
23400 destmem = emit_memset (destmem, destptr, value, i);
23401 }
23402 else
23403 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23404 ix86_adjust_counter (count, i);
23405 emit_label (label);
23406 LABEL_NUSES (label) = 1;
23407 set_mem_align (destmem, i * 2 * BITS_PER_UNIT);
23408 }
23409 }
23410 return destmem;
23411 }
23412
23413 /* Test if COUNT&SIZE is nonzero and if so, expand movme
23414 or setmem sequence that is valid for SIZE..2*SIZE-1 bytes
23415 and jump to DONE_LABEL. */
23416 static void
23417 expand_small_movmem_or_setmem (rtx destmem, rtx srcmem,
23418 rtx destptr, rtx srcptr,
23419 rtx value, rtx vec_value,
23420 rtx count, int size,
23421 rtx done_label, bool issetmem)
23422 {
23423 rtx_code_label *label = ix86_expand_aligntest (count, size, false);
23424 enum machine_mode mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 1);
23425 rtx modesize;
23426 int n;
23427
23428 /* If we do not have vector value to copy, we must reduce size. */
23429 if (issetmem)
23430 {
23431 if (!vec_value)
23432 {
23433 if (GET_MODE (value) == VOIDmode && size > 8)
23434 mode = Pmode;
23435 else if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (value)))
23436 mode = GET_MODE (value);
23437 }
23438 else
23439 mode = GET_MODE (vec_value), value = vec_value;
23440 }
23441 else
23442 {
23443 /* Choose appropriate vector mode. */
23444 if (size >= 32)
23445 mode = TARGET_AVX ? V32QImode : TARGET_SSE ? V16QImode : DImode;
23446 else if (size >= 16)
23447 mode = TARGET_SSE ? V16QImode : DImode;
23448 srcmem = change_address (srcmem, mode, srcptr);
23449 }
23450 destmem = change_address (destmem, mode, destptr);
23451 modesize = GEN_INT (GET_MODE_SIZE (mode));
23452 gcc_assert (GET_MODE_SIZE (mode) <= size);
23453 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
23454 {
23455 if (issetmem)
23456 emit_move_insn (destmem, gen_lowpart (mode, value));
23457 else
23458 {
23459 emit_move_insn (destmem, srcmem);
23460 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23461 }
23462 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23463 }
23464
23465 destmem = offset_address (destmem, count, 1);
23466 destmem = offset_address (destmem, GEN_INT (-2 * size),
23467 GET_MODE_SIZE (mode));
23468 if (!issetmem)
23469 {
23470 srcmem = offset_address (srcmem, count, 1);
23471 srcmem = offset_address (srcmem, GEN_INT (-2 * size),
23472 GET_MODE_SIZE (mode));
23473 }
23474 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
23475 {
23476 if (issetmem)
23477 emit_move_insn (destmem, gen_lowpart (mode, value));
23478 else
23479 {
23480 emit_move_insn (destmem, srcmem);
23481 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23482 }
23483 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23484 }
23485 emit_jump_insn (gen_jump (done_label));
23486 emit_barrier ();
23487
23488 emit_label (label);
23489 LABEL_NUSES (label) = 1;
23490 }
23491
23492 /* Handle small memcpy (up to SIZE that is supposed to be small power of 2.
23493 and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN
23494 bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can
23495 proceed with an loop copying SIZE bytes at once. Do moves in MODE.
23496 DONE_LABEL is a label after the whole copying sequence. The label is created
23497 on demand if *DONE_LABEL is NULL.
23498 MIN_SIZE is minimal size of block copied. This value gets adjusted for new
23499 bounds after the initial copies.
23500
23501 DESTMEM/SRCMEM are memory expressions pointing to the copies block,
23502 DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether
23503 we will dispatch to a library call for large blocks.
23504
23505 In pseudocode we do:
23506
23507 if (COUNT < SIZE)
23508 {
23509 Assume that SIZE is 4. Bigger sizes are handled analogously
23510 if (COUNT & 4)
23511 {
23512 copy 4 bytes from SRCPTR to DESTPTR
23513 copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4
23514 goto done_label
23515 }
23516 if (!COUNT)
23517 goto done_label;
23518 copy 1 byte from SRCPTR to DESTPTR
23519 if (COUNT & 2)
23520 {
23521 copy 2 bytes from SRCPTR to DESTPTR
23522 copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2
23523 }
23524 }
23525 else
23526 {
23527 copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR
23528 copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE
23529
23530 OLD_DESPTR = DESTPTR;
23531 Align DESTPTR up to DESIRED_ALIGN
23532 SRCPTR += DESTPTR - OLD_DESTPTR
23533 COUNT -= DEST_PTR - OLD_DESTPTR
23534 if (DYNAMIC_CHECK)
23535 Round COUNT down to multiple of SIZE
23536 << optional caller supplied zero size guard is here >>
23537 << optional caller suppplied dynamic check is here >>
23538 << caller supplied main copy loop is here >>
23539 }
23540 done_label:
23541 */
23542 static void
23543 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves (rtx destmem, rtx srcmem,
23544 rtx *destptr, rtx *srcptr,
23545 enum machine_mode mode,
23546 rtx value, rtx vec_value,
23547 rtx *count,
23548 rtx_code_label **done_label,
23549 int size,
23550 int desired_align,
23551 int align,
23552 unsigned HOST_WIDE_INT *min_size,
23553 bool dynamic_check,
23554 bool issetmem)
23555 {
23556 rtx_code_label *loop_label = NULL, *label;
23557 int n;
23558 rtx modesize;
23559 int prolog_size = 0;
23560 rtx mode_value;
23561
23562 /* Chose proper value to copy. */
23563 if (issetmem && VECTOR_MODE_P (mode))
23564 mode_value = vec_value;
23565 else
23566 mode_value = value;
23567 gcc_assert (GET_MODE_SIZE (mode) <= size);
23568
23569 /* See if block is big or small, handle small blocks. */
23570 if (!CONST_INT_P (*count) && *min_size < (unsigned HOST_WIDE_INT)size)
23571 {
23572 int size2 = size;
23573 loop_label = gen_label_rtx ();
23574
23575 if (!*done_label)
23576 *done_label = gen_label_rtx ();
23577
23578 emit_cmp_and_jump_insns (*count, GEN_INT (size2), GE, 0, GET_MODE (*count),
23579 1, loop_label);
23580 size2 >>= 1;
23581
23582 /* Handle sizes > 3. */
23583 for (;size2 > 2; size2 >>= 1)
23584 expand_small_movmem_or_setmem (destmem, srcmem,
23585 *destptr, *srcptr,
23586 value, vec_value,
23587 *count,
23588 size2, *done_label, issetmem);
23589 /* Nothing to copy? Jump to DONE_LABEL if so */
23590 emit_cmp_and_jump_insns (*count, const0_rtx, EQ, 0, GET_MODE (*count),
23591 1, *done_label);
23592
23593 /* Do a byte copy. */
23594 destmem = change_address (destmem, QImode, *destptr);
23595 if (issetmem)
23596 emit_move_insn (destmem, gen_lowpart (QImode, value));
23597 else
23598 {
23599 srcmem = change_address (srcmem, QImode, *srcptr);
23600 emit_move_insn (destmem, srcmem);
23601 }
23602
23603 /* Handle sizes 2 and 3. */
23604 label = ix86_expand_aligntest (*count, 2, false);
23605 destmem = change_address (destmem, HImode, *destptr);
23606 destmem = offset_address (destmem, *count, 1);
23607 destmem = offset_address (destmem, GEN_INT (-2), 2);
23608 if (issetmem)
23609 emit_move_insn (destmem, gen_lowpart (HImode, value));
23610 else
23611 {
23612 srcmem = change_address (srcmem, HImode, *srcptr);
23613 srcmem = offset_address (srcmem, *count, 1);
23614 srcmem = offset_address (srcmem, GEN_INT (-2), 2);
23615 emit_move_insn (destmem, srcmem);
23616 }
23617
23618 emit_label (label);
23619 LABEL_NUSES (label) = 1;
23620 emit_jump_insn (gen_jump (*done_label));
23621 emit_barrier ();
23622 }
23623 else
23624 gcc_assert (*min_size >= (unsigned HOST_WIDE_INT)size
23625 || UINTVAL (*count) >= (unsigned HOST_WIDE_INT)size);
23626
23627 /* Start memcpy for COUNT >= SIZE. */
23628 if (loop_label)
23629 {
23630 emit_label (loop_label);
23631 LABEL_NUSES (loop_label) = 1;
23632 }
23633
23634 /* Copy first desired_align bytes. */
23635 if (!issetmem)
23636 srcmem = change_address (srcmem, mode, *srcptr);
23637 destmem = change_address (destmem, mode, *destptr);
23638 modesize = GEN_INT (GET_MODE_SIZE (mode));
23639 for (n = 0; prolog_size < desired_align - align; n++)
23640 {
23641 if (issetmem)
23642 emit_move_insn (destmem, mode_value);
23643 else
23644 {
23645 emit_move_insn (destmem, srcmem);
23646 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23647 }
23648 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23649 prolog_size += GET_MODE_SIZE (mode);
23650 }
23651
23652
23653 /* Copy last SIZE bytes. */
23654 destmem = offset_address (destmem, *count, 1);
23655 destmem = offset_address (destmem,
23656 GEN_INT (-size - prolog_size),
23657 1);
23658 if (issetmem)
23659 emit_move_insn (destmem, mode_value);
23660 else
23661 {
23662 srcmem = offset_address (srcmem, *count, 1);
23663 srcmem = offset_address (srcmem,
23664 GEN_INT (-size - prolog_size),
23665 1);
23666 emit_move_insn (destmem, srcmem);
23667 }
23668 for (n = 1; n * GET_MODE_SIZE (mode) < size; n++)
23669 {
23670 destmem = offset_address (destmem, modesize, 1);
23671 if (issetmem)
23672 emit_move_insn (destmem, mode_value);
23673 else
23674 {
23675 srcmem = offset_address (srcmem, modesize, 1);
23676 emit_move_insn (destmem, srcmem);
23677 }
23678 }
23679
23680 /* Align destination. */
23681 if (desired_align > 1 && desired_align > align)
23682 {
23683 rtx saveddest = *destptr;
23684
23685 gcc_assert (desired_align <= size);
23686 /* Align destptr up, place it to new register. */
23687 *destptr = expand_simple_binop (GET_MODE (*destptr), PLUS, *destptr,
23688 GEN_INT (prolog_size),
23689 NULL_RTX, 1, OPTAB_DIRECT);
23690 *destptr = expand_simple_binop (GET_MODE (*destptr), AND, *destptr,
23691 GEN_INT (-desired_align),
23692 *destptr, 1, OPTAB_DIRECT);
23693 /* See how many bytes we skipped. */
23694 saveddest = expand_simple_binop (GET_MODE (*destptr), MINUS, saveddest,
23695 *destptr,
23696 saveddest, 1, OPTAB_DIRECT);
23697 /* Adjust srcptr and count. */
23698 if (!issetmem)
23699 *srcptr = expand_simple_binop (GET_MODE (*srcptr), MINUS, *srcptr, saveddest,
23700 *srcptr, 1, OPTAB_DIRECT);
23701 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
23702 saveddest, *count, 1, OPTAB_DIRECT);
23703 /* We copied at most size + prolog_size. */
23704 if (*min_size > (unsigned HOST_WIDE_INT)(size + prolog_size))
23705 *min_size = (*min_size - size) & ~(unsigned HOST_WIDE_INT)(size - 1);
23706 else
23707 *min_size = 0;
23708
23709 /* Our loops always round down the bock size, but for dispatch to library
23710 we need precise value. */
23711 if (dynamic_check)
23712 *count = expand_simple_binop (GET_MODE (*count), AND, *count,
23713 GEN_INT (-size), *count, 1, OPTAB_DIRECT);
23714 }
23715 else
23716 {
23717 gcc_assert (prolog_size == 0);
23718 /* Decrease count, so we won't end up copying last word twice. */
23719 if (!CONST_INT_P (*count))
23720 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
23721 constm1_rtx, *count, 1, OPTAB_DIRECT);
23722 else
23723 *count = GEN_INT ((UINTVAL (*count) - 1) & ~(unsigned HOST_WIDE_INT)(size - 1));
23724 if (*min_size)
23725 *min_size = (*min_size - 1) & ~(unsigned HOST_WIDE_INT)(size - 1);
23726 }
23727 }
23728
23729
23730 /* This function is like the previous one, except here we know how many bytes
23731 need to be copied. That allows us to update alignment not only of DST, which
23732 is returned, but also of SRC, which is passed as a pointer for that
23733 reason. */
23734 static rtx
23735 expand_set_or_movmem_constant_prologue (rtx dst, rtx *srcp, rtx destreg,
23736 rtx srcreg, rtx value, rtx vec_value,
23737 int desired_align, int align_bytes,
23738 bool issetmem)
23739 {
23740 rtx src = NULL;
23741 rtx orig_dst = dst;
23742 rtx orig_src = NULL;
23743 int piece_size = 1;
23744 int copied_bytes = 0;
23745
23746 if (!issetmem)
23747 {
23748 gcc_assert (srcp != NULL);
23749 src = *srcp;
23750 orig_src = src;
23751 }
23752
23753 for (piece_size = 1;
23754 piece_size <= desired_align && copied_bytes < align_bytes;
23755 piece_size <<= 1)
23756 {
23757 if (align_bytes & piece_size)
23758 {
23759 if (issetmem)
23760 {
23761 if (vec_value && piece_size > GET_MODE_SIZE (GET_MODE (value)))
23762 dst = emit_memset (dst, destreg, vec_value, piece_size);
23763 else
23764 dst = emit_memset (dst, destreg, value, piece_size);
23765 }
23766 else
23767 dst = emit_memmov (dst, &src, destreg, srcreg, piece_size);
23768 copied_bytes += piece_size;
23769 }
23770 }
23771 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
23772 set_mem_align (dst, desired_align * BITS_PER_UNIT);
23773 if (MEM_SIZE_KNOWN_P (orig_dst))
23774 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
23775
23776 if (!issetmem)
23777 {
23778 int src_align_bytes = get_mem_align_offset (src, desired_align
23779 * BITS_PER_UNIT);
23780 if (src_align_bytes >= 0)
23781 src_align_bytes = desired_align - src_align_bytes;
23782 if (src_align_bytes >= 0)
23783 {
23784 unsigned int src_align;
23785 for (src_align = desired_align; src_align >= 2; src_align >>= 1)
23786 {
23787 if ((src_align_bytes & (src_align - 1))
23788 == (align_bytes & (src_align - 1)))
23789 break;
23790 }
23791 if (src_align > (unsigned int) desired_align)
23792 src_align = desired_align;
23793 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
23794 set_mem_align (src, src_align * BITS_PER_UNIT);
23795 }
23796 if (MEM_SIZE_KNOWN_P (orig_src))
23797 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
23798 *srcp = src;
23799 }
23800
23801 return dst;
23802 }
23803
23804 /* Return true if ALG can be used in current context.
23805 Assume we expand memset if MEMSET is true. */
23806 static bool
23807 alg_usable_p (enum stringop_alg alg, bool memset)
23808 {
23809 if (alg == no_stringop)
23810 return false;
23811 if (alg == vector_loop)
23812 return TARGET_SSE || TARGET_AVX;
23813 /* Algorithms using the rep prefix want at least edi and ecx;
23814 additionally, memset wants eax and memcpy wants esi. Don't
23815 consider such algorithms if the user has appropriated those
23816 registers for their own purposes. */
23817 if (alg == rep_prefix_1_byte
23818 || alg == rep_prefix_4_byte
23819 || alg == rep_prefix_8_byte)
23820 return !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
23821 || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
23822 return true;
23823 }
23824
23825 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
23826 static enum stringop_alg
23827 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
23828 unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size,
23829 bool memset, bool zero_memset, int *dynamic_check, bool *noalign)
23830 {
23831 const struct stringop_algs * algs;
23832 bool optimize_for_speed;
23833 int max = 0;
23834 const struct processor_costs *cost;
23835 int i;
23836 bool any_alg_usable_p = false;
23837
23838 *noalign = false;
23839 *dynamic_check = -1;
23840
23841 /* Even if the string operation call is cold, we still might spend a lot
23842 of time processing large blocks. */
23843 if (optimize_function_for_size_p (cfun)
23844 || (optimize_insn_for_size_p ()
23845 && (max_size < 256
23846 || (expected_size != -1 && expected_size < 256))))
23847 optimize_for_speed = false;
23848 else
23849 optimize_for_speed = true;
23850
23851 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
23852 if (memset)
23853 algs = &cost->memset[TARGET_64BIT != 0];
23854 else
23855 algs = &cost->memcpy[TARGET_64BIT != 0];
23856
23857 /* See maximal size for user defined algorithm. */
23858 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
23859 {
23860 enum stringop_alg candidate = algs->size[i].alg;
23861 bool usable = alg_usable_p (candidate, memset);
23862 any_alg_usable_p |= usable;
23863
23864 if (candidate != libcall && candidate && usable)
23865 max = algs->size[i].max;
23866 }
23867
23868 /* If expected size is not known but max size is small enough
23869 so inline version is a win, set expected size into
23870 the range. */
23871 if (((max > 1 && (unsigned HOST_WIDE_INT) max >= max_size) || max == -1)
23872 && expected_size == -1)
23873 expected_size = min_size / 2 + max_size / 2;
23874
23875 /* If user specified the algorithm, honnor it if possible. */
23876 if (ix86_stringop_alg != no_stringop
23877 && alg_usable_p (ix86_stringop_alg, memset))
23878 return ix86_stringop_alg;
23879 /* rep; movq or rep; movl is the smallest variant. */
23880 else if (!optimize_for_speed)
23881 {
23882 *noalign = true;
23883 if (!count || (count & 3) || (memset && !zero_memset))
23884 return alg_usable_p (rep_prefix_1_byte, memset)
23885 ? rep_prefix_1_byte : loop_1_byte;
23886 else
23887 return alg_usable_p (rep_prefix_4_byte, memset)
23888 ? rep_prefix_4_byte : loop;
23889 }
23890 /* Very tiny blocks are best handled via the loop, REP is expensive to
23891 setup. */
23892 else if (expected_size != -1 && expected_size < 4)
23893 return loop_1_byte;
23894 else if (expected_size != -1)
23895 {
23896 enum stringop_alg alg = libcall;
23897 bool alg_noalign = false;
23898 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
23899 {
23900 /* We get here if the algorithms that were not libcall-based
23901 were rep-prefix based and we are unable to use rep prefixes
23902 based on global register usage. Break out of the loop and
23903 use the heuristic below. */
23904 if (algs->size[i].max == 0)
23905 break;
23906 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
23907 {
23908 enum stringop_alg candidate = algs->size[i].alg;
23909
23910 if (candidate != libcall && alg_usable_p (candidate, memset))
23911 {
23912 alg = candidate;
23913 alg_noalign = algs->size[i].noalign;
23914 }
23915 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
23916 last non-libcall inline algorithm. */
23917 if (TARGET_INLINE_ALL_STRINGOPS)
23918 {
23919 /* When the current size is best to be copied by a libcall,
23920 but we are still forced to inline, run the heuristic below
23921 that will pick code for medium sized blocks. */
23922 if (alg != libcall)
23923 {
23924 *noalign = alg_noalign;
23925 return alg;
23926 }
23927 break;
23928 }
23929 else if (alg_usable_p (candidate, memset))
23930 {
23931 *noalign = algs->size[i].noalign;
23932 return candidate;
23933 }
23934 }
23935 }
23936 }
23937 /* When asked to inline the call anyway, try to pick meaningful choice.
23938 We look for maximal size of block that is faster to copy by hand and
23939 take blocks of at most of that size guessing that average size will
23940 be roughly half of the block.
23941
23942 If this turns out to be bad, we might simply specify the preferred
23943 choice in ix86_costs. */
23944 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
23945 && (algs->unknown_size == libcall
23946 || !alg_usable_p (algs->unknown_size, memset)))
23947 {
23948 enum stringop_alg alg;
23949
23950 /* If there aren't any usable algorithms, then recursing on
23951 smaller sizes isn't going to find anything. Just return the
23952 simple byte-at-a-time copy loop. */
23953 if (!any_alg_usable_p)
23954 {
23955 /* Pick something reasonable. */
23956 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
23957 *dynamic_check = 128;
23958 return loop_1_byte;
23959 }
23960 if (max <= 0)
23961 max = 4096;
23962 alg = decide_alg (count, max / 2, min_size, max_size, memset,
23963 zero_memset, dynamic_check, noalign);
23964 gcc_assert (*dynamic_check == -1);
23965 gcc_assert (alg != libcall);
23966 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
23967 *dynamic_check = max;
23968 return alg;
23969 }
23970 return (alg_usable_p (algs->unknown_size, memset)
23971 ? algs->unknown_size : libcall);
23972 }
23973
23974 /* Decide on alignment. We know that the operand is already aligned to ALIGN
23975 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
23976 static int
23977 decide_alignment (int align,
23978 enum stringop_alg alg,
23979 int expected_size,
23980 enum machine_mode move_mode)
23981 {
23982 int desired_align = 0;
23983
23984 gcc_assert (alg != no_stringop);
23985
23986 if (alg == libcall)
23987 return 0;
23988 if (move_mode == VOIDmode)
23989 return 0;
23990
23991 desired_align = GET_MODE_SIZE (move_mode);
23992 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
23993 copying whole cacheline at once. */
23994 if (TARGET_PENTIUMPRO
23995 && (alg == rep_prefix_4_byte || alg == rep_prefix_1_byte))
23996 desired_align = 8;
23997
23998 if (optimize_size)
23999 desired_align = 1;
24000 if (desired_align < align)
24001 desired_align = align;
24002 if (expected_size != -1 && expected_size < 4)
24003 desired_align = align;
24004
24005 return desired_align;
24006 }
24007
24008
24009 /* Helper function for memcpy. For QImode value 0xXY produce
24010 0xXYXYXYXY of wide specified by MODE. This is essentially
24011 a * 0x10101010, but we can do slightly better than
24012 synth_mult by unwinding the sequence by hand on CPUs with
24013 slow multiply. */
24014 static rtx
24015 promote_duplicated_reg (enum machine_mode mode, rtx val)
24016 {
24017 enum machine_mode valmode = GET_MODE (val);
24018 rtx tmp;
24019 int nops = mode == DImode ? 3 : 2;
24020
24021 gcc_assert (mode == SImode || mode == DImode || val == const0_rtx);
24022 if (val == const0_rtx)
24023 return copy_to_mode_reg (mode, CONST0_RTX (mode));
24024 if (CONST_INT_P (val))
24025 {
24026 HOST_WIDE_INT v = INTVAL (val) & 255;
24027
24028 v |= v << 8;
24029 v |= v << 16;
24030 if (mode == DImode)
24031 v |= (v << 16) << 16;
24032 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
24033 }
24034
24035 if (valmode == VOIDmode)
24036 valmode = QImode;
24037 if (valmode != QImode)
24038 val = gen_lowpart (QImode, val);
24039 if (mode == QImode)
24040 return val;
24041 if (!TARGET_PARTIAL_REG_STALL)
24042 nops--;
24043 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
24044 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
24045 <= (ix86_cost->shift_const + ix86_cost->add) * nops
24046 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
24047 {
24048 rtx reg = convert_modes (mode, QImode, val, true);
24049 tmp = promote_duplicated_reg (mode, const1_rtx);
24050 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
24051 OPTAB_DIRECT);
24052 }
24053 else
24054 {
24055 rtx reg = convert_modes (mode, QImode, val, true);
24056
24057 if (!TARGET_PARTIAL_REG_STALL)
24058 if (mode == SImode)
24059 emit_insn (gen_movsi_insv_1 (reg, reg));
24060 else
24061 emit_insn (gen_movdi_insv_1 (reg, reg));
24062 else
24063 {
24064 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
24065 NULL, 1, OPTAB_DIRECT);
24066 reg =
24067 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24068 }
24069 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
24070 NULL, 1, OPTAB_DIRECT);
24071 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24072 if (mode == SImode)
24073 return reg;
24074 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
24075 NULL, 1, OPTAB_DIRECT);
24076 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24077 return reg;
24078 }
24079 }
24080
24081 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
24082 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
24083 alignment from ALIGN to DESIRED_ALIGN. */
24084 static rtx
24085 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align,
24086 int align)
24087 {
24088 rtx promoted_val;
24089
24090 if (TARGET_64BIT
24091 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
24092 promoted_val = promote_duplicated_reg (DImode, val);
24093 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
24094 promoted_val = promote_duplicated_reg (SImode, val);
24095 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
24096 promoted_val = promote_duplicated_reg (HImode, val);
24097 else
24098 promoted_val = val;
24099
24100 return promoted_val;
24101 }
24102
24103 /* Expand string move (memcpy) ot store (memset) operation. Use i386 string
24104 operations when profitable. The code depends upon architecture, block size
24105 and alignment, but always has one of the following overall structures:
24106
24107 Aligned move sequence:
24108
24109 1) Prologue guard: Conditional that jumps up to epilogues for small
24110 blocks that can be handled by epilogue alone. This is faster
24111 but also needed for correctness, since prologue assume the block
24112 is larger than the desired alignment.
24113
24114 Optional dynamic check for size and libcall for large
24115 blocks is emitted here too, with -minline-stringops-dynamically.
24116
24117 2) Prologue: copy first few bytes in order to get destination
24118 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
24119 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
24120 copied. We emit either a jump tree on power of two sized
24121 blocks, or a byte loop.
24122
24123 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24124 with specified algorithm.
24125
24126 4) Epilogue: code copying tail of the block that is too small to be
24127 handled by main body (or up to size guarded by prologue guard).
24128
24129 Misaligned move sequence
24130
24131 1) missaligned move prologue/epilogue containing:
24132 a) Prologue handling small memory blocks and jumping to done_label
24133 (skipped if blocks are known to be large enough)
24134 b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is
24135 needed by single possibly misaligned move
24136 (skipped if alignment is not needed)
24137 c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves
24138
24139 2) Zero size guard dispatching to done_label, if needed
24140
24141 3) dispatch to library call, if needed,
24142
24143 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24144 with specified algorithm. */
24145 bool
24146 ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
24147 rtx align_exp, rtx expected_align_exp,
24148 rtx expected_size_exp, rtx min_size_exp,
24149 rtx max_size_exp, rtx probable_max_size_exp,
24150 bool issetmem)
24151 {
24152 rtx destreg;
24153 rtx srcreg = NULL;
24154 rtx_code_label *label = NULL;
24155 rtx tmp;
24156 rtx_code_label *jump_around_label = NULL;
24157 HOST_WIDE_INT align = 1;
24158 unsigned HOST_WIDE_INT count = 0;
24159 HOST_WIDE_INT expected_size = -1;
24160 int size_needed = 0, epilogue_size_needed;
24161 int desired_align = 0, align_bytes = 0;
24162 enum stringop_alg alg;
24163 rtx promoted_val = NULL;
24164 rtx vec_promoted_val = NULL;
24165 bool force_loopy_epilogue = false;
24166 int dynamic_check;
24167 bool need_zero_guard = false;
24168 bool noalign;
24169 enum machine_mode move_mode = VOIDmode;
24170 int unroll_factor = 1;
24171 /* TODO: Once value ranges are available, fill in proper data. */
24172 unsigned HOST_WIDE_INT min_size = 0;
24173 unsigned HOST_WIDE_INT max_size = -1;
24174 unsigned HOST_WIDE_INT probable_max_size = -1;
24175 bool misaligned_prologue_used = false;
24176
24177 if (CONST_INT_P (align_exp))
24178 align = INTVAL (align_exp);
24179 /* i386 can do misaligned access on reasonably increased cost. */
24180 if (CONST_INT_P (expected_align_exp)
24181 && INTVAL (expected_align_exp) > align)
24182 align = INTVAL (expected_align_exp);
24183 /* ALIGN is the minimum of destination and source alignment, but we care here
24184 just about destination alignment. */
24185 else if (!issetmem
24186 && MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
24187 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
24188
24189 if (CONST_INT_P (count_exp))
24190 {
24191 min_size = max_size = probable_max_size = count = expected_size
24192 = INTVAL (count_exp);
24193 /* When COUNT is 0, there is nothing to do. */
24194 if (!count)
24195 return true;
24196 }
24197 else
24198 {
24199 if (min_size_exp)
24200 min_size = INTVAL (min_size_exp);
24201 if (max_size_exp)
24202 max_size = INTVAL (max_size_exp);
24203 if (probable_max_size_exp)
24204 probable_max_size = INTVAL (probable_max_size_exp);
24205 if (CONST_INT_P (expected_size_exp))
24206 expected_size = INTVAL (expected_size_exp);
24207 }
24208
24209 /* Make sure we don't need to care about overflow later on. */
24210 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
24211 return false;
24212
24213 /* Step 0: Decide on preferred algorithm, desired alignment and
24214 size of chunks to be copied by main loop. */
24215 alg = decide_alg (count, expected_size, min_size, probable_max_size,
24216 issetmem,
24217 issetmem && val_exp == const0_rtx,
24218 &dynamic_check, &noalign);
24219 if (alg == libcall)
24220 return false;
24221 gcc_assert (alg != no_stringop);
24222
24223 /* For now vector-version of memset is generated only for memory zeroing, as
24224 creating of promoted vector value is very cheap in this case. */
24225 if (issetmem && alg == vector_loop && val_exp != const0_rtx)
24226 alg = unrolled_loop;
24227
24228 if (!count)
24229 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
24230 destreg = ix86_copy_addr_to_reg (XEXP (dst, 0));
24231 if (!issetmem)
24232 srcreg = ix86_copy_addr_to_reg (XEXP (src, 0));
24233
24234 unroll_factor = 1;
24235 move_mode = word_mode;
24236 switch (alg)
24237 {
24238 case libcall:
24239 case no_stringop:
24240 case last_alg:
24241 gcc_unreachable ();
24242 case loop_1_byte:
24243 need_zero_guard = true;
24244 move_mode = QImode;
24245 break;
24246 case loop:
24247 need_zero_guard = true;
24248 break;
24249 case unrolled_loop:
24250 need_zero_guard = true;
24251 unroll_factor = (TARGET_64BIT ? 4 : 2);
24252 break;
24253 case vector_loop:
24254 need_zero_guard = true;
24255 unroll_factor = 4;
24256 /* Find the widest supported mode. */
24257 move_mode = word_mode;
24258 while (optab_handler (mov_optab, GET_MODE_WIDER_MODE (move_mode))
24259 != CODE_FOR_nothing)
24260 move_mode = GET_MODE_WIDER_MODE (move_mode);
24261
24262 /* Find the corresponding vector mode with the same size as MOVE_MODE.
24263 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
24264 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
24265 {
24266 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
24267 move_mode = mode_for_vector (word_mode, nunits);
24268 if (optab_handler (mov_optab, move_mode) == CODE_FOR_nothing)
24269 move_mode = word_mode;
24270 }
24271 gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing);
24272 break;
24273 case rep_prefix_8_byte:
24274 move_mode = DImode;
24275 break;
24276 case rep_prefix_4_byte:
24277 move_mode = SImode;
24278 break;
24279 case rep_prefix_1_byte:
24280 move_mode = QImode;
24281 break;
24282 }
24283 size_needed = GET_MODE_SIZE (move_mode) * unroll_factor;
24284 epilogue_size_needed = size_needed;
24285
24286 desired_align = decide_alignment (align, alg, expected_size, move_mode);
24287 if (!TARGET_ALIGN_STRINGOPS || noalign)
24288 align = desired_align;
24289
24290 /* Step 1: Prologue guard. */
24291
24292 /* Alignment code needs count to be in register. */
24293 if (CONST_INT_P (count_exp) && desired_align > align)
24294 {
24295 if (INTVAL (count_exp) > desired_align
24296 && INTVAL (count_exp) > size_needed)
24297 {
24298 align_bytes
24299 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
24300 if (align_bytes <= 0)
24301 align_bytes = 0;
24302 else
24303 align_bytes = desired_align - align_bytes;
24304 }
24305 if (align_bytes == 0)
24306 count_exp = force_reg (counter_mode (count_exp), count_exp);
24307 }
24308 gcc_assert (desired_align >= 1 && align >= 1);
24309
24310 /* Misaligned move sequences handle both prologue and epilogue at once.
24311 Default code generation results in a smaller code for large alignments
24312 and also avoids redundant job when sizes are known precisely. */
24313 misaligned_prologue_used
24314 = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES
24315 && MAX (desired_align, epilogue_size_needed) <= 32
24316 && desired_align <= epilogue_size_needed
24317 && ((desired_align > align && !align_bytes)
24318 || (!count && epilogue_size_needed > 1)));
24319
24320 /* Do the cheap promotion to allow better CSE across the
24321 main loop and epilogue (ie one load of the big constant in the
24322 front of all code.
24323 For now the misaligned move sequences do not have fast path
24324 without broadcasting. */
24325 if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used)))
24326 {
24327 if (alg == vector_loop)
24328 {
24329 gcc_assert (val_exp == const0_rtx);
24330 vec_promoted_val = promote_duplicated_reg (move_mode, val_exp);
24331 promoted_val = promote_duplicated_reg_to_size (val_exp,
24332 GET_MODE_SIZE (word_mode),
24333 desired_align, align);
24334 }
24335 else
24336 {
24337 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
24338 desired_align, align);
24339 }
24340 }
24341 /* Misaligned move sequences handles both prologues and epilogues at once.
24342 Default code generation results in smaller code for large alignments and
24343 also avoids redundant job when sizes are known precisely. */
24344 if (misaligned_prologue_used)
24345 {
24346 /* Misaligned move prologue handled small blocks by itself. */
24347 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves
24348 (dst, src, &destreg, &srcreg,
24349 move_mode, promoted_val, vec_promoted_val,
24350 &count_exp,
24351 &jump_around_label,
24352 desired_align < align
24353 ? MAX (desired_align, epilogue_size_needed) : epilogue_size_needed,
24354 desired_align, align, &min_size, dynamic_check, issetmem);
24355 if (!issetmem)
24356 src = change_address (src, BLKmode, srcreg);
24357 dst = change_address (dst, BLKmode, destreg);
24358 set_mem_align (dst, desired_align * BITS_PER_UNIT);
24359 epilogue_size_needed = 0;
24360 if (need_zero_guard && !min_size)
24361 {
24362 /* It is possible that we copied enough so the main loop will not
24363 execute. */
24364 gcc_assert (size_needed > 1);
24365 if (jump_around_label == NULL_RTX)
24366 jump_around_label = gen_label_rtx ();
24367 emit_cmp_and_jump_insns (count_exp,
24368 GEN_INT (size_needed),
24369 LTU, 0, counter_mode (count_exp), 1, jump_around_label);
24370 if (expected_size == -1
24371 || expected_size < (desired_align - align) / 2 + size_needed)
24372 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24373 else
24374 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24375 }
24376 }
24377 /* Ensure that alignment prologue won't copy past end of block. */
24378 else if (size_needed > 1 || (desired_align > 1 && desired_align > align))
24379 {
24380 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
24381 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
24382 Make sure it is power of 2. */
24383 epilogue_size_needed = 1 << (floor_log2 (epilogue_size_needed) + 1);
24384
24385 /* To improve performance of small blocks, we jump around the VAL
24386 promoting mode. This mean that if the promoted VAL is not constant,
24387 we might not use it in the epilogue and have to use byte
24388 loop variant. */
24389 if (issetmem && epilogue_size_needed > 2 && !promoted_val)
24390 force_loopy_epilogue = true;
24391 if ((count && count < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24392 || max_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24393 {
24394 /* If main algorithm works on QImode, no epilogue is needed.
24395 For small sizes just don't align anything. */
24396 if (size_needed == 1)
24397 desired_align = align;
24398 else
24399 goto epilogue;
24400 }
24401 else if (!count
24402 && min_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24403 {
24404 label = gen_label_rtx ();
24405 emit_cmp_and_jump_insns (count_exp,
24406 GEN_INT (epilogue_size_needed),
24407 LTU, 0, counter_mode (count_exp), 1, label);
24408 if (expected_size == -1 || expected_size < epilogue_size_needed)
24409 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24410 else
24411 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24412 }
24413 }
24414
24415 /* Emit code to decide on runtime whether library call or inline should be
24416 used. */
24417 if (dynamic_check != -1)
24418 {
24419 if (!issetmem && CONST_INT_P (count_exp))
24420 {
24421 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
24422 {
24423 emit_block_move_via_libcall (dst, src, count_exp, false);
24424 count_exp = const0_rtx;
24425 goto epilogue;
24426 }
24427 }
24428 else
24429 {
24430 rtx_code_label *hot_label = gen_label_rtx ();
24431 if (jump_around_label == NULL_RTX)
24432 jump_around_label = gen_label_rtx ();
24433 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
24434 LEU, 0, counter_mode (count_exp),
24435 1, hot_label);
24436 predict_jump (REG_BR_PROB_BASE * 90 / 100);
24437 if (issetmem)
24438 set_storage_via_libcall (dst, count_exp, val_exp, false);
24439 else
24440 emit_block_move_via_libcall (dst, src, count_exp, false);
24441 emit_jump (jump_around_label);
24442 emit_label (hot_label);
24443 }
24444 }
24445
24446 /* Step 2: Alignment prologue. */
24447 /* Do the expensive promotion once we branched off the small blocks. */
24448 if (issetmem && !promoted_val)
24449 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
24450 desired_align, align);
24451
24452 if (desired_align > align && !misaligned_prologue_used)
24453 {
24454 if (align_bytes == 0)
24455 {
24456 /* Except for the first move in prologue, we no longer know
24457 constant offset in aliasing info. It don't seems to worth
24458 the pain to maintain it for the first move, so throw away
24459 the info early. */
24460 dst = change_address (dst, BLKmode, destreg);
24461 if (!issetmem)
24462 src = change_address (src, BLKmode, srcreg);
24463 dst = expand_set_or_movmem_prologue (dst, src, destreg, srcreg,
24464 promoted_val, vec_promoted_val,
24465 count_exp, align, desired_align,
24466 issetmem);
24467 /* At most desired_align - align bytes are copied. */
24468 if (min_size < (unsigned)(desired_align - align))
24469 min_size = 0;
24470 else
24471 min_size -= desired_align - align;
24472 }
24473 else
24474 {
24475 /* If we know how many bytes need to be stored before dst is
24476 sufficiently aligned, maintain aliasing info accurately. */
24477 dst = expand_set_or_movmem_constant_prologue (dst, &src, destreg,
24478 srcreg,
24479 promoted_val,
24480 vec_promoted_val,
24481 desired_align,
24482 align_bytes,
24483 issetmem);
24484
24485 count_exp = plus_constant (counter_mode (count_exp),
24486 count_exp, -align_bytes);
24487 count -= align_bytes;
24488 min_size -= align_bytes;
24489 max_size -= align_bytes;
24490 }
24491 if (need_zero_guard
24492 && !min_size
24493 && (count < (unsigned HOST_WIDE_INT) size_needed
24494 || (align_bytes == 0
24495 && count < ((unsigned HOST_WIDE_INT) size_needed
24496 + desired_align - align))))
24497 {
24498 /* It is possible that we copied enough so the main loop will not
24499 execute. */
24500 gcc_assert (size_needed > 1);
24501 if (label == NULL_RTX)
24502 label = gen_label_rtx ();
24503 emit_cmp_and_jump_insns (count_exp,
24504 GEN_INT (size_needed),
24505 LTU, 0, counter_mode (count_exp), 1, label);
24506 if (expected_size == -1
24507 || expected_size < (desired_align - align) / 2 + size_needed)
24508 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24509 else
24510 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24511 }
24512 }
24513 if (label && size_needed == 1)
24514 {
24515 emit_label (label);
24516 LABEL_NUSES (label) = 1;
24517 label = NULL;
24518 epilogue_size_needed = 1;
24519 if (issetmem)
24520 promoted_val = val_exp;
24521 }
24522 else if (label == NULL_RTX && !misaligned_prologue_used)
24523 epilogue_size_needed = size_needed;
24524
24525 /* Step 3: Main loop. */
24526
24527 switch (alg)
24528 {
24529 case libcall:
24530 case no_stringop:
24531 case last_alg:
24532 gcc_unreachable ();
24533 case loop_1_byte:
24534 case loop:
24535 case unrolled_loop:
24536 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, promoted_val,
24537 count_exp, move_mode, unroll_factor,
24538 expected_size, issetmem);
24539 break;
24540 case vector_loop:
24541 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg,
24542 vec_promoted_val, count_exp, move_mode,
24543 unroll_factor, expected_size, issetmem);
24544 break;
24545 case rep_prefix_8_byte:
24546 case rep_prefix_4_byte:
24547 case rep_prefix_1_byte:
24548 expand_set_or_movmem_via_rep (dst, src, destreg, srcreg, promoted_val,
24549 val_exp, count_exp, move_mode, issetmem);
24550 break;
24551 }
24552 /* Adjust properly the offset of src and dest memory for aliasing. */
24553 if (CONST_INT_P (count_exp))
24554 {
24555 if (!issetmem)
24556 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
24557 (count / size_needed) * size_needed);
24558 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
24559 (count / size_needed) * size_needed);
24560 }
24561 else
24562 {
24563 if (!issetmem)
24564 src = change_address (src, BLKmode, srcreg);
24565 dst = change_address (dst, BLKmode, destreg);
24566 }
24567
24568 /* Step 4: Epilogue to copy the remaining bytes. */
24569 epilogue:
24570 if (label)
24571 {
24572 /* When the main loop is done, COUNT_EXP might hold original count,
24573 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
24574 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
24575 bytes. Compensate if needed. */
24576
24577 if (size_needed < epilogue_size_needed)
24578 {
24579 tmp =
24580 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
24581 GEN_INT (size_needed - 1), count_exp, 1,
24582 OPTAB_DIRECT);
24583 if (tmp != count_exp)
24584 emit_move_insn (count_exp, tmp);
24585 }
24586 emit_label (label);
24587 LABEL_NUSES (label) = 1;
24588 }
24589
24590 if (count_exp != const0_rtx && epilogue_size_needed > 1)
24591 {
24592 if (force_loopy_epilogue)
24593 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
24594 epilogue_size_needed);
24595 else
24596 {
24597 if (issetmem)
24598 expand_setmem_epilogue (dst, destreg, promoted_val,
24599 vec_promoted_val, count_exp,
24600 epilogue_size_needed);
24601 else
24602 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
24603 epilogue_size_needed);
24604 }
24605 }
24606 if (jump_around_label)
24607 emit_label (jump_around_label);
24608 return true;
24609 }
24610
24611
24612 /* Expand the appropriate insns for doing strlen if not just doing
24613 repnz; scasb
24614
24615 out = result, initialized with the start address
24616 align_rtx = alignment of the address.
24617 scratch = scratch register, initialized with the startaddress when
24618 not aligned, otherwise undefined
24619
24620 This is just the body. It needs the initializations mentioned above and
24621 some address computing at the end. These things are done in i386.md. */
24622
24623 static void
24624 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
24625 {
24626 int align;
24627 rtx tmp;
24628 rtx_code_label *align_2_label = NULL;
24629 rtx_code_label *align_3_label = NULL;
24630 rtx_code_label *align_4_label = gen_label_rtx ();
24631 rtx_code_label *end_0_label = gen_label_rtx ();
24632 rtx mem;
24633 rtx tmpreg = gen_reg_rtx (SImode);
24634 rtx scratch = gen_reg_rtx (SImode);
24635 rtx cmp;
24636
24637 align = 0;
24638 if (CONST_INT_P (align_rtx))
24639 align = INTVAL (align_rtx);
24640
24641 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
24642
24643 /* Is there a known alignment and is it less than 4? */
24644 if (align < 4)
24645 {
24646 rtx scratch1 = gen_reg_rtx (Pmode);
24647 emit_move_insn (scratch1, out);
24648 /* Is there a known alignment and is it not 2? */
24649 if (align != 2)
24650 {
24651 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
24652 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
24653
24654 /* Leave just the 3 lower bits. */
24655 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
24656 NULL_RTX, 0, OPTAB_WIDEN);
24657
24658 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
24659 Pmode, 1, align_4_label);
24660 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
24661 Pmode, 1, align_2_label);
24662 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
24663 Pmode, 1, align_3_label);
24664 }
24665 else
24666 {
24667 /* Since the alignment is 2, we have to check 2 or 0 bytes;
24668 check if is aligned to 4 - byte. */
24669
24670 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
24671 NULL_RTX, 0, OPTAB_WIDEN);
24672
24673 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
24674 Pmode, 1, align_4_label);
24675 }
24676
24677 mem = change_address (src, QImode, out);
24678
24679 /* Now compare the bytes. */
24680
24681 /* Compare the first n unaligned byte on a byte per byte basis. */
24682 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
24683 QImode, 1, end_0_label);
24684
24685 /* Increment the address. */
24686 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
24687
24688 /* Not needed with an alignment of 2 */
24689 if (align != 2)
24690 {
24691 emit_label (align_2_label);
24692
24693 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
24694 end_0_label);
24695
24696 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
24697
24698 emit_label (align_3_label);
24699 }
24700
24701 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
24702 end_0_label);
24703
24704 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
24705 }
24706
24707 /* Generate loop to check 4 bytes at a time. It is not a good idea to
24708 align this loop. It gives only huge programs, but does not help to
24709 speed up. */
24710 emit_label (align_4_label);
24711
24712 mem = change_address (src, SImode, out);
24713 emit_move_insn (scratch, mem);
24714 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
24715
24716 /* This formula yields a nonzero result iff one of the bytes is zero.
24717 This saves three branches inside loop and many cycles. */
24718
24719 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
24720 emit_insn (gen_one_cmplsi2 (scratch, scratch));
24721 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
24722 emit_insn (gen_andsi3 (tmpreg, tmpreg,
24723 gen_int_mode (0x80808080, SImode)));
24724 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
24725 align_4_label);
24726
24727 if (TARGET_CMOVE)
24728 {
24729 rtx reg = gen_reg_rtx (SImode);
24730 rtx reg2 = gen_reg_rtx (Pmode);
24731 emit_move_insn (reg, tmpreg);
24732 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
24733
24734 /* If zero is not in the first two bytes, move two bytes forward. */
24735 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
24736 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24737 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
24738 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
24739 gen_rtx_IF_THEN_ELSE (SImode, tmp,
24740 reg,
24741 tmpreg)));
24742 /* Emit lea manually to avoid clobbering of flags. */
24743 emit_insn (gen_rtx_SET (SImode, reg2,
24744 gen_rtx_PLUS (Pmode, out, const2_rtx)));
24745
24746 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24747 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
24748 emit_insn (gen_rtx_SET (VOIDmode, out,
24749 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
24750 reg2,
24751 out)));
24752 }
24753 else
24754 {
24755 rtx_code_label *end_2_label = gen_label_rtx ();
24756 /* Is zero in the first two bytes? */
24757
24758 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
24759 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24760 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
24761 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
24762 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
24763 pc_rtx);
24764 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
24765 JUMP_LABEL (tmp) = end_2_label;
24766
24767 /* Not in the first two. Move two bytes forward. */
24768 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
24769 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
24770
24771 emit_label (end_2_label);
24772
24773 }
24774
24775 /* Avoid branch in fixing the byte. */
24776 tmpreg = gen_lowpart (QImode, tmpreg);
24777 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
24778 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
24779 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
24780 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
24781
24782 emit_label (end_0_label);
24783 }
24784
24785 /* Expand strlen. */
24786
24787 bool
24788 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
24789 {
24790 rtx addr, scratch1, scratch2, scratch3, scratch4;
24791
24792 /* The generic case of strlen expander is long. Avoid it's
24793 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
24794
24795 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
24796 && !TARGET_INLINE_ALL_STRINGOPS
24797 && !optimize_insn_for_size_p ()
24798 && (!CONST_INT_P (align) || INTVAL (align) < 4))
24799 return false;
24800
24801 addr = force_reg (Pmode, XEXP (src, 0));
24802 scratch1 = gen_reg_rtx (Pmode);
24803
24804 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
24805 && !optimize_insn_for_size_p ())
24806 {
24807 /* Well it seems that some optimizer does not combine a call like
24808 foo(strlen(bar), strlen(bar));
24809 when the move and the subtraction is done here. It does calculate
24810 the length just once when these instructions are done inside of
24811 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
24812 often used and I use one fewer register for the lifetime of
24813 output_strlen_unroll() this is better. */
24814
24815 emit_move_insn (out, addr);
24816
24817 ix86_expand_strlensi_unroll_1 (out, src, align);
24818
24819 /* strlensi_unroll_1 returns the address of the zero at the end of
24820 the string, like memchr(), so compute the length by subtracting
24821 the start address. */
24822 emit_insn (ix86_gen_sub3 (out, out, addr));
24823 }
24824 else
24825 {
24826 rtx unspec;
24827
24828 /* Can't use this if the user has appropriated eax, ecx, or edi. */
24829 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
24830 return false;
24831
24832 scratch2 = gen_reg_rtx (Pmode);
24833 scratch3 = gen_reg_rtx (Pmode);
24834 scratch4 = force_reg (Pmode, constm1_rtx);
24835
24836 emit_move_insn (scratch3, addr);
24837 eoschar = force_reg (QImode, eoschar);
24838
24839 src = replace_equiv_address_nv (src, scratch3);
24840
24841 /* If .md starts supporting :P, this can be done in .md. */
24842 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
24843 scratch4), UNSPEC_SCAS);
24844 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
24845 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
24846 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
24847 }
24848 return true;
24849 }
24850
24851 /* For given symbol (function) construct code to compute address of it's PLT
24852 entry in large x86-64 PIC model. */
24853 static rtx
24854 construct_plt_address (rtx symbol)
24855 {
24856 rtx tmp, unspec;
24857
24858 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
24859 gcc_assert (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF);
24860 gcc_assert (Pmode == DImode);
24861
24862 tmp = gen_reg_rtx (Pmode);
24863 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
24864
24865 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
24866 emit_insn (ix86_gen_add3 (tmp, tmp, pic_offset_table_rtx));
24867 return tmp;
24868 }
24869
24870 rtx
24871 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
24872 rtx callarg2,
24873 rtx pop, bool sibcall)
24874 {
24875 rtx vec[3];
24876 rtx use = NULL, call;
24877 unsigned int vec_len = 0;
24878
24879 if (pop == const0_rtx)
24880 pop = NULL;
24881 gcc_assert (!TARGET_64BIT || !pop);
24882
24883 if (TARGET_MACHO && !TARGET_64BIT)
24884 {
24885 #if TARGET_MACHO
24886 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
24887 fnaddr = machopic_indirect_call_target (fnaddr);
24888 #endif
24889 }
24890 else
24891 {
24892 /* Static functions and indirect calls don't need the pic register. */
24893 if (flag_pic
24894 && (!TARGET_64BIT
24895 || (ix86_cmodel == CM_LARGE_PIC
24896 && DEFAULT_ABI != MS_ABI))
24897 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
24898 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
24899 use_reg (&use, pic_offset_table_rtx);
24900 }
24901
24902 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
24903 {
24904 rtx al = gen_rtx_REG (QImode, AX_REG);
24905 emit_move_insn (al, callarg2);
24906 use_reg (&use, al);
24907 }
24908
24909 if (ix86_cmodel == CM_LARGE_PIC
24910 && !TARGET_PECOFF
24911 && MEM_P (fnaddr)
24912 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
24913 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
24914 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
24915 else if (sibcall
24916 ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
24917 : !call_insn_operand (XEXP (fnaddr, 0), word_mode))
24918 {
24919 fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
24920 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));
24921 }
24922
24923 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
24924 if (retval)
24925 call = gen_rtx_SET (VOIDmode, retval, call);
24926 vec[vec_len++] = call;
24927
24928 if (pop)
24929 {
24930 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
24931 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
24932 vec[vec_len++] = pop;
24933 }
24934
24935 if (TARGET_64BIT_MS_ABI
24936 && (!callarg2 || INTVAL (callarg2) != -2))
24937 {
24938 int const cregs_size
24939 = ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers);
24940 int i;
24941
24942 for (i = 0; i < cregs_size; i++)
24943 {
24944 int regno = x86_64_ms_sysv_extra_clobbered_registers[i];
24945 enum machine_mode mode = SSE_REGNO_P (regno) ? TImode : DImode;
24946
24947 clobber_reg (&use, gen_rtx_REG (mode, regno));
24948 }
24949 }
24950
24951 if (vec_len > 1)
24952 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
24953 call = emit_call_insn (call);
24954 if (use)
24955 CALL_INSN_FUNCTION_USAGE (call) = use;
24956
24957 return call;
24958 }
24959
24960 /* Output the assembly for a call instruction. */
24961
24962 const char *
24963 ix86_output_call_insn (rtx_insn *insn, rtx call_op)
24964 {
24965 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
24966 bool seh_nop_p = false;
24967 const char *xasm;
24968
24969 if (SIBLING_CALL_P (insn))
24970 {
24971 if (direct_p)
24972 xasm = "jmp\t%P0";
24973 /* SEH epilogue detection requires the indirect branch case
24974 to include REX.W. */
24975 else if (TARGET_SEH)
24976 xasm = "rex.W jmp %A0";
24977 else
24978 xasm = "jmp\t%A0";
24979
24980 output_asm_insn (xasm, &call_op);
24981 return "";
24982 }
24983
24984 /* SEH unwinding can require an extra nop to be emitted in several
24985 circumstances. Determine if we have one of those. */
24986 if (TARGET_SEH)
24987 {
24988 rtx_insn *i;
24989
24990 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
24991 {
24992 /* If we get to another real insn, we don't need the nop. */
24993 if (INSN_P (i))
24994 break;
24995
24996 /* If we get to the epilogue note, prevent a catch region from
24997 being adjacent to the standard epilogue sequence. If non-
24998 call-exceptions, we'll have done this during epilogue emission. */
24999 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
25000 && !flag_non_call_exceptions
25001 && !can_throw_internal (insn))
25002 {
25003 seh_nop_p = true;
25004 break;
25005 }
25006 }
25007
25008 /* If we didn't find a real insn following the call, prevent the
25009 unwinder from looking into the next function. */
25010 if (i == NULL)
25011 seh_nop_p = true;
25012 }
25013
25014 if (direct_p)
25015 xasm = "call\t%P0";
25016 else
25017 xasm = "call\t%A0";
25018
25019 output_asm_insn (xasm, &call_op);
25020
25021 if (seh_nop_p)
25022 return "nop";
25023
25024 return "";
25025 }
25026 \f
25027 /* Clear stack slot assignments remembered from previous functions.
25028 This is called from INIT_EXPANDERS once before RTL is emitted for each
25029 function. */
25030
25031 static struct machine_function *
25032 ix86_init_machine_status (void)
25033 {
25034 struct machine_function *f;
25035
25036 f = ggc_cleared_alloc<machine_function> ();
25037 f->use_fast_prologue_epilogue_nregs = -1;
25038 f->call_abi = ix86_abi;
25039
25040 return f;
25041 }
25042
25043 /* Return a MEM corresponding to a stack slot with mode MODE.
25044 Allocate a new slot if necessary.
25045
25046 The RTL for a function can have several slots available: N is
25047 which slot to use. */
25048
25049 rtx
25050 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
25051 {
25052 struct stack_local_entry *s;
25053
25054 gcc_assert (n < MAX_386_STACK_LOCALS);
25055
25056 for (s = ix86_stack_locals; s; s = s->next)
25057 if (s->mode == mode && s->n == n)
25058 return validize_mem (copy_rtx (s->rtl));
25059
25060 s = ggc_alloc<stack_local_entry> ();
25061 s->n = n;
25062 s->mode = mode;
25063 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
25064
25065 s->next = ix86_stack_locals;
25066 ix86_stack_locals = s;
25067 return validize_mem (copy_rtx (s->rtl));
25068 }
25069
25070 static void
25071 ix86_instantiate_decls (void)
25072 {
25073 struct stack_local_entry *s;
25074
25075 for (s = ix86_stack_locals; s; s = s->next)
25076 if (s->rtl != NULL_RTX)
25077 instantiate_decl_rtl (s->rtl);
25078 }
25079 \f
25080 /* Check whether x86 address PARTS is a pc-relative address. */
25081
25082 static bool
25083 rip_relative_addr_p (struct ix86_address *parts)
25084 {
25085 rtx base, index, disp;
25086
25087 base = parts->base;
25088 index = parts->index;
25089 disp = parts->disp;
25090
25091 if (disp && !base && !index)
25092 {
25093 if (TARGET_64BIT)
25094 {
25095 rtx symbol = disp;
25096
25097 if (GET_CODE (disp) == CONST)
25098 symbol = XEXP (disp, 0);
25099 if (GET_CODE (symbol) == PLUS
25100 && CONST_INT_P (XEXP (symbol, 1)))
25101 symbol = XEXP (symbol, 0);
25102
25103 if (GET_CODE (symbol) == LABEL_REF
25104 || (GET_CODE (symbol) == SYMBOL_REF
25105 && SYMBOL_REF_TLS_MODEL (symbol) == 0)
25106 || (GET_CODE (symbol) == UNSPEC
25107 && (XINT (symbol, 1) == UNSPEC_GOTPCREL
25108 || XINT (symbol, 1) == UNSPEC_PCREL
25109 || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
25110 return true;
25111 }
25112 }
25113 return false;
25114 }
25115
25116 /* Calculate the length of the memory address in the instruction encoding.
25117 Includes addr32 prefix, does not include the one-byte modrm, opcode,
25118 or other prefixes. We never generate addr32 prefix for LEA insn. */
25119
25120 int
25121 memory_address_length (rtx addr, bool lea)
25122 {
25123 struct ix86_address parts;
25124 rtx base, index, disp;
25125 int len;
25126 int ok;
25127
25128 if (GET_CODE (addr) == PRE_DEC
25129 || GET_CODE (addr) == POST_INC
25130 || GET_CODE (addr) == PRE_MODIFY
25131 || GET_CODE (addr) == POST_MODIFY)
25132 return 0;
25133
25134 ok = ix86_decompose_address (addr, &parts);
25135 gcc_assert (ok);
25136
25137 len = (parts.seg == SEG_DEFAULT) ? 0 : 1;
25138
25139 /* If this is not LEA instruction, add the length of addr32 prefix. */
25140 if (TARGET_64BIT && !lea
25141 && (SImode_address_operand (addr, VOIDmode)
25142 || (parts.base && GET_MODE (parts.base) == SImode)
25143 || (parts.index && GET_MODE (parts.index) == SImode)))
25144 len++;
25145
25146 base = parts.base;
25147 index = parts.index;
25148 disp = parts.disp;
25149
25150 if (base && GET_CODE (base) == SUBREG)
25151 base = SUBREG_REG (base);
25152 if (index && GET_CODE (index) == SUBREG)
25153 index = SUBREG_REG (index);
25154
25155 gcc_assert (base == NULL_RTX || REG_P (base));
25156 gcc_assert (index == NULL_RTX || REG_P (index));
25157
25158 /* Rule of thumb:
25159 - esp as the base always wants an index,
25160 - ebp as the base always wants a displacement,
25161 - r12 as the base always wants an index,
25162 - r13 as the base always wants a displacement. */
25163
25164 /* Register Indirect. */
25165 if (base && !index && !disp)
25166 {
25167 /* esp (for its index) and ebp (for its displacement) need
25168 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
25169 code. */
25170 if (base == arg_pointer_rtx
25171 || base == frame_pointer_rtx
25172 || REGNO (base) == SP_REG
25173 || REGNO (base) == BP_REG
25174 || REGNO (base) == R12_REG
25175 || REGNO (base) == R13_REG)
25176 len++;
25177 }
25178
25179 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
25180 is not disp32, but disp32(%rip), so for disp32
25181 SIB byte is needed, unless print_operand_address
25182 optimizes it into disp32(%rip) or (%rip) is implied
25183 by UNSPEC. */
25184 else if (disp && !base && !index)
25185 {
25186 len += 4;
25187 if (rip_relative_addr_p (&parts))
25188 len++;
25189 }
25190 else
25191 {
25192 /* Find the length of the displacement constant. */
25193 if (disp)
25194 {
25195 if (base && satisfies_constraint_K (disp))
25196 len += 1;
25197 else
25198 len += 4;
25199 }
25200 /* ebp always wants a displacement. Similarly r13. */
25201 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
25202 len++;
25203
25204 /* An index requires the two-byte modrm form.... */
25205 if (index
25206 /* ...like esp (or r12), which always wants an index. */
25207 || base == arg_pointer_rtx
25208 || base == frame_pointer_rtx
25209 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
25210 len++;
25211 }
25212
25213 return len;
25214 }
25215
25216 /* Compute default value for "length_immediate" attribute. When SHORTFORM
25217 is set, expect that insn have 8bit immediate alternative. */
25218 int
25219 ix86_attr_length_immediate_default (rtx_insn *insn, bool shortform)
25220 {
25221 int len = 0;
25222 int i;
25223 extract_insn_cached (insn);
25224 for (i = recog_data.n_operands - 1; i >= 0; --i)
25225 if (CONSTANT_P (recog_data.operand[i]))
25226 {
25227 enum attr_mode mode = get_attr_mode (insn);
25228
25229 gcc_assert (!len);
25230 if (shortform && CONST_INT_P (recog_data.operand[i]))
25231 {
25232 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
25233 switch (mode)
25234 {
25235 case MODE_QI:
25236 len = 1;
25237 continue;
25238 case MODE_HI:
25239 ival = trunc_int_for_mode (ival, HImode);
25240 break;
25241 case MODE_SI:
25242 ival = trunc_int_for_mode (ival, SImode);
25243 break;
25244 default:
25245 break;
25246 }
25247 if (IN_RANGE (ival, -128, 127))
25248 {
25249 len = 1;
25250 continue;
25251 }
25252 }
25253 switch (mode)
25254 {
25255 case MODE_QI:
25256 len = 1;
25257 break;
25258 case MODE_HI:
25259 len = 2;
25260 break;
25261 case MODE_SI:
25262 len = 4;
25263 break;
25264 /* Immediates for DImode instructions are encoded
25265 as 32bit sign extended values. */
25266 case MODE_DI:
25267 len = 4;
25268 break;
25269 default:
25270 fatal_insn ("unknown insn mode", insn);
25271 }
25272 }
25273 return len;
25274 }
25275
25276 /* Compute default value for "length_address" attribute. */
25277 int
25278 ix86_attr_length_address_default (rtx_insn *insn)
25279 {
25280 int i;
25281
25282 if (get_attr_type (insn) == TYPE_LEA)
25283 {
25284 rtx set = PATTERN (insn), addr;
25285
25286 if (GET_CODE (set) == PARALLEL)
25287 set = XVECEXP (set, 0, 0);
25288
25289 gcc_assert (GET_CODE (set) == SET);
25290
25291 addr = SET_SRC (set);
25292
25293 return memory_address_length (addr, true);
25294 }
25295
25296 extract_insn_cached (insn);
25297 for (i = recog_data.n_operands - 1; i >= 0; --i)
25298 if (MEM_P (recog_data.operand[i]))
25299 {
25300 constrain_operands_cached (reload_completed);
25301 if (which_alternative != -1)
25302 {
25303 const char *constraints = recog_data.constraints[i];
25304 int alt = which_alternative;
25305
25306 while (*constraints == '=' || *constraints == '+')
25307 constraints++;
25308 while (alt-- > 0)
25309 while (*constraints++ != ',')
25310 ;
25311 /* Skip ignored operands. */
25312 if (*constraints == 'X')
25313 continue;
25314 }
25315 return memory_address_length (XEXP (recog_data.operand[i], 0), false);
25316 }
25317 return 0;
25318 }
25319
25320 /* Compute default value for "length_vex" attribute. It includes
25321 2 or 3 byte VEX prefix and 1 opcode byte. */
25322
25323 int
25324 ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode,
25325 bool has_vex_w)
25326 {
25327 int i;
25328
25329 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
25330 byte VEX prefix. */
25331 if (!has_0f_opcode || has_vex_w)
25332 return 3 + 1;
25333
25334 /* We can always use 2 byte VEX prefix in 32bit. */
25335 if (!TARGET_64BIT)
25336 return 2 + 1;
25337
25338 extract_insn_cached (insn);
25339
25340 for (i = recog_data.n_operands - 1; i >= 0; --i)
25341 if (REG_P (recog_data.operand[i]))
25342 {
25343 /* REX.W bit uses 3 byte VEX prefix. */
25344 if (GET_MODE (recog_data.operand[i]) == DImode
25345 && GENERAL_REG_P (recog_data.operand[i]))
25346 return 3 + 1;
25347 }
25348 else
25349 {
25350 /* REX.X or REX.B bits use 3 byte VEX prefix. */
25351 if (MEM_P (recog_data.operand[i])
25352 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
25353 return 3 + 1;
25354 }
25355
25356 return 2 + 1;
25357 }
25358 \f
25359 /* Return the maximum number of instructions a cpu can issue. */
25360
25361 static int
25362 ix86_issue_rate (void)
25363 {
25364 switch (ix86_tune)
25365 {
25366 case PROCESSOR_PENTIUM:
25367 case PROCESSOR_BONNELL:
25368 case PROCESSOR_SILVERMONT:
25369 case PROCESSOR_INTEL:
25370 case PROCESSOR_K6:
25371 case PROCESSOR_BTVER2:
25372 case PROCESSOR_PENTIUM4:
25373 case PROCESSOR_NOCONA:
25374 return 2;
25375
25376 case PROCESSOR_PENTIUMPRO:
25377 case PROCESSOR_ATHLON:
25378 case PROCESSOR_K8:
25379 case PROCESSOR_AMDFAM10:
25380 case PROCESSOR_GENERIC:
25381 case PROCESSOR_BTVER1:
25382 return 3;
25383
25384 case PROCESSOR_BDVER1:
25385 case PROCESSOR_BDVER2:
25386 case PROCESSOR_BDVER3:
25387 case PROCESSOR_BDVER4:
25388 case PROCESSOR_CORE2:
25389 case PROCESSOR_NEHALEM:
25390 case PROCESSOR_SANDYBRIDGE:
25391 case PROCESSOR_HASWELL:
25392 return 4;
25393
25394 default:
25395 return 1;
25396 }
25397 }
25398
25399 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
25400 by DEP_INSN and nothing set by DEP_INSN. */
25401
25402 static bool
25403 ix86_flags_dependent (rtx_insn *insn, rtx_insn *dep_insn, enum attr_type insn_type)
25404 {
25405 rtx set, set2;
25406
25407 /* Simplify the test for uninteresting insns. */
25408 if (insn_type != TYPE_SETCC
25409 && insn_type != TYPE_ICMOV
25410 && insn_type != TYPE_FCMOV
25411 && insn_type != TYPE_IBR)
25412 return false;
25413
25414 if ((set = single_set (dep_insn)) != 0)
25415 {
25416 set = SET_DEST (set);
25417 set2 = NULL_RTX;
25418 }
25419 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
25420 && XVECLEN (PATTERN (dep_insn), 0) == 2
25421 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
25422 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
25423 {
25424 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
25425 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
25426 }
25427 else
25428 return false;
25429
25430 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
25431 return false;
25432
25433 /* This test is true if the dependent insn reads the flags but
25434 not any other potentially set register. */
25435 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
25436 return false;
25437
25438 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
25439 return false;
25440
25441 return true;
25442 }
25443
25444 /* Return true iff USE_INSN has a memory address with operands set by
25445 SET_INSN. */
25446
25447 bool
25448 ix86_agi_dependent (rtx_insn *set_insn, rtx_insn *use_insn)
25449 {
25450 int i;
25451 extract_insn_cached (use_insn);
25452 for (i = recog_data.n_operands - 1; i >= 0; --i)
25453 if (MEM_P (recog_data.operand[i]))
25454 {
25455 rtx addr = XEXP (recog_data.operand[i], 0);
25456 return modified_in_p (addr, set_insn) != 0;
25457 }
25458 return false;
25459 }
25460
25461 /* Helper function for exact_store_load_dependency.
25462 Return true if addr is found in insn. */
25463 static bool
25464 exact_dependency_1 (rtx addr, rtx insn)
25465 {
25466 enum rtx_code code;
25467 const char *format_ptr;
25468 int i, j;
25469
25470 code = GET_CODE (insn);
25471 switch (code)
25472 {
25473 case MEM:
25474 if (rtx_equal_p (addr, insn))
25475 return true;
25476 break;
25477 case REG:
25478 CASE_CONST_ANY:
25479 case SYMBOL_REF:
25480 case CODE_LABEL:
25481 case PC:
25482 case CC0:
25483 case EXPR_LIST:
25484 return false;
25485 default:
25486 break;
25487 }
25488
25489 format_ptr = GET_RTX_FORMAT (code);
25490 for (i = 0; i < GET_RTX_LENGTH (code); i++)
25491 {
25492 switch (*format_ptr++)
25493 {
25494 case 'e':
25495 if (exact_dependency_1 (addr, XEXP (insn, i)))
25496 return true;
25497 break;
25498 case 'E':
25499 for (j = 0; j < XVECLEN (insn, i); j++)
25500 if (exact_dependency_1 (addr, XVECEXP (insn, i, j)))
25501 return true;
25502 break;
25503 }
25504 }
25505 return false;
25506 }
25507
25508 /* Return true if there exists exact dependency for store & load, i.e.
25509 the same memory address is used in them. */
25510 static bool
25511 exact_store_load_dependency (rtx_insn *store, rtx_insn *load)
25512 {
25513 rtx set1, set2;
25514
25515 set1 = single_set (store);
25516 if (!set1)
25517 return false;
25518 if (!MEM_P (SET_DEST (set1)))
25519 return false;
25520 set2 = single_set (load);
25521 if (!set2)
25522 return false;
25523 if (exact_dependency_1 (SET_DEST (set1), SET_SRC (set2)))
25524 return true;
25525 return false;
25526 }
25527
25528 static int
25529 ix86_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
25530 {
25531 enum attr_type insn_type, dep_insn_type;
25532 enum attr_memory memory;
25533 rtx set, set2;
25534 int dep_insn_code_number;
25535
25536 /* Anti and output dependencies have zero cost on all CPUs. */
25537 if (REG_NOTE_KIND (link) != 0)
25538 return 0;
25539
25540 dep_insn_code_number = recog_memoized (dep_insn);
25541
25542 /* If we can't recognize the insns, we can't really do anything. */
25543 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
25544 return cost;
25545
25546 insn_type = get_attr_type (insn);
25547 dep_insn_type = get_attr_type (dep_insn);
25548
25549 switch (ix86_tune)
25550 {
25551 case PROCESSOR_PENTIUM:
25552 /* Address Generation Interlock adds a cycle of latency. */
25553 if (insn_type == TYPE_LEA)
25554 {
25555 rtx addr = PATTERN (insn);
25556
25557 if (GET_CODE (addr) == PARALLEL)
25558 addr = XVECEXP (addr, 0, 0);
25559
25560 gcc_assert (GET_CODE (addr) == SET);
25561
25562 addr = SET_SRC (addr);
25563 if (modified_in_p (addr, dep_insn))
25564 cost += 1;
25565 }
25566 else if (ix86_agi_dependent (dep_insn, insn))
25567 cost += 1;
25568
25569 /* ??? Compares pair with jump/setcc. */
25570 if (ix86_flags_dependent (insn, dep_insn, insn_type))
25571 cost = 0;
25572
25573 /* Floating point stores require value to be ready one cycle earlier. */
25574 if (insn_type == TYPE_FMOV
25575 && get_attr_memory (insn) == MEMORY_STORE
25576 && !ix86_agi_dependent (dep_insn, insn))
25577 cost += 1;
25578 break;
25579
25580 case PROCESSOR_PENTIUMPRO:
25581 /* INT->FP conversion is expensive. */
25582 if (get_attr_fp_int_src (dep_insn))
25583 cost += 5;
25584
25585 /* There is one cycle extra latency between an FP op and a store. */
25586 if (insn_type == TYPE_FMOV
25587 && (set = single_set (dep_insn)) != NULL_RTX
25588 && (set2 = single_set (insn)) != NULL_RTX
25589 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
25590 && MEM_P (SET_DEST (set2)))
25591 cost += 1;
25592
25593 memory = get_attr_memory (insn);
25594
25595 /* Show ability of reorder buffer to hide latency of load by executing
25596 in parallel with previous instruction in case
25597 previous instruction is not needed to compute the address. */
25598 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25599 && !ix86_agi_dependent (dep_insn, insn))
25600 {
25601 /* Claim moves to take one cycle, as core can issue one load
25602 at time and the next load can start cycle later. */
25603 if (dep_insn_type == TYPE_IMOV
25604 || dep_insn_type == TYPE_FMOV)
25605 cost = 1;
25606 else if (cost > 1)
25607 cost--;
25608 }
25609 break;
25610
25611 case PROCESSOR_K6:
25612 /* The esp dependency is resolved before
25613 the instruction is really finished. */
25614 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25615 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25616 return 1;
25617
25618 /* INT->FP conversion is expensive. */
25619 if (get_attr_fp_int_src (dep_insn))
25620 cost += 5;
25621
25622 memory = get_attr_memory (insn);
25623
25624 /* Show ability of reorder buffer to hide latency of load by executing
25625 in parallel with previous instruction in case
25626 previous instruction is not needed to compute the address. */
25627 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25628 && !ix86_agi_dependent (dep_insn, insn))
25629 {
25630 /* Claim moves to take one cycle, as core can issue one load
25631 at time and the next load can start cycle later. */
25632 if (dep_insn_type == TYPE_IMOV
25633 || dep_insn_type == TYPE_FMOV)
25634 cost = 1;
25635 else if (cost > 2)
25636 cost -= 2;
25637 else
25638 cost = 1;
25639 }
25640 break;
25641
25642 case PROCESSOR_AMDFAM10:
25643 case PROCESSOR_BDVER1:
25644 case PROCESSOR_BDVER2:
25645 case PROCESSOR_BDVER3:
25646 case PROCESSOR_BDVER4:
25647 case PROCESSOR_BTVER1:
25648 case PROCESSOR_BTVER2:
25649 case PROCESSOR_GENERIC:
25650 /* Stack engine allows to execute push&pop instructions in parall. */
25651 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25652 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25653 return 0;
25654 /* FALLTHRU */
25655
25656 case PROCESSOR_ATHLON:
25657 case PROCESSOR_K8:
25658 memory = get_attr_memory (insn);
25659
25660 /* Show ability of reorder buffer to hide latency of load by executing
25661 in parallel with previous instruction in case
25662 previous instruction is not needed to compute the address. */
25663 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25664 && !ix86_agi_dependent (dep_insn, insn))
25665 {
25666 enum attr_unit unit = get_attr_unit (insn);
25667 int loadcost = 3;
25668
25669 /* Because of the difference between the length of integer and
25670 floating unit pipeline preparation stages, the memory operands
25671 for floating point are cheaper.
25672
25673 ??? For Athlon it the difference is most probably 2. */
25674 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
25675 loadcost = 3;
25676 else
25677 loadcost = TARGET_ATHLON ? 2 : 0;
25678
25679 if (cost >= loadcost)
25680 cost -= loadcost;
25681 else
25682 cost = 0;
25683 }
25684 break;
25685
25686 case PROCESSOR_CORE2:
25687 case PROCESSOR_NEHALEM:
25688 case PROCESSOR_SANDYBRIDGE:
25689 case PROCESSOR_HASWELL:
25690 /* Stack engine allows to execute push&pop instructions in parall. */
25691 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25692 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25693 return 0;
25694
25695 memory = get_attr_memory (insn);
25696
25697 /* Show ability of reorder buffer to hide latency of load by executing
25698 in parallel with previous instruction in case
25699 previous instruction is not needed to compute the address. */
25700 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25701 && !ix86_agi_dependent (dep_insn, insn))
25702 {
25703 if (cost >= 4)
25704 cost -= 4;
25705 else
25706 cost = 0;
25707 }
25708 break;
25709
25710 case PROCESSOR_SILVERMONT:
25711 case PROCESSOR_INTEL:
25712 if (!reload_completed)
25713 return cost;
25714
25715 /* Increase cost of integer loads. */
25716 memory = get_attr_memory (dep_insn);
25717 if (memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25718 {
25719 enum attr_unit unit = get_attr_unit (dep_insn);
25720 if (unit == UNIT_INTEGER && cost == 1)
25721 {
25722 if (memory == MEMORY_LOAD)
25723 cost = 3;
25724 else
25725 {
25726 /* Increase cost of ld/st for short int types only
25727 because of store forwarding issue. */
25728 rtx set = single_set (dep_insn);
25729 if (set && (GET_MODE (SET_DEST (set)) == QImode
25730 || GET_MODE (SET_DEST (set)) == HImode))
25731 {
25732 /* Increase cost of store/load insn if exact
25733 dependence exists and it is load insn. */
25734 enum attr_memory insn_memory = get_attr_memory (insn);
25735 if (insn_memory == MEMORY_LOAD
25736 && exact_store_load_dependency (dep_insn, insn))
25737 cost = 3;
25738 }
25739 }
25740 }
25741 }
25742
25743 default:
25744 break;
25745 }
25746
25747 return cost;
25748 }
25749
25750 /* How many alternative schedules to try. This should be as wide as the
25751 scheduling freedom in the DFA, but no wider. Making this value too
25752 large results extra work for the scheduler. */
25753
25754 static int
25755 ia32_multipass_dfa_lookahead (void)
25756 {
25757 switch (ix86_tune)
25758 {
25759 case PROCESSOR_PENTIUM:
25760 return 2;
25761
25762 case PROCESSOR_PENTIUMPRO:
25763 case PROCESSOR_K6:
25764 return 1;
25765
25766 case PROCESSOR_BDVER1:
25767 case PROCESSOR_BDVER2:
25768 case PROCESSOR_BDVER3:
25769 case PROCESSOR_BDVER4:
25770 /* We use lookahead value 4 for BD both before and after reload
25771 schedules. Plan is to have value 8 included for O3. */
25772 return 4;
25773
25774 case PROCESSOR_CORE2:
25775 case PROCESSOR_NEHALEM:
25776 case PROCESSOR_SANDYBRIDGE:
25777 case PROCESSOR_HASWELL:
25778 case PROCESSOR_BONNELL:
25779 case PROCESSOR_SILVERMONT:
25780 case PROCESSOR_INTEL:
25781 /* Generally, we want haifa-sched:max_issue() to look ahead as far
25782 as many instructions can be executed on a cycle, i.e.,
25783 issue_rate. I wonder why tuning for many CPUs does not do this. */
25784 if (reload_completed)
25785 return ix86_issue_rate ();
25786 /* Don't use lookahead for pre-reload schedule to save compile time. */
25787 return 0;
25788
25789 default:
25790 return 0;
25791 }
25792 }
25793
25794 /* Return true if target platform supports macro-fusion. */
25795
25796 static bool
25797 ix86_macro_fusion_p ()
25798 {
25799 return TARGET_FUSE_CMP_AND_BRANCH;
25800 }
25801
25802 /* Check whether current microarchitecture support macro fusion
25803 for insn pair "CONDGEN + CONDJMP". Refer to
25804 "Intel Architectures Optimization Reference Manual". */
25805
25806 static bool
25807 ix86_macro_fusion_pair_p (rtx_insn *condgen, rtx_insn *condjmp)
25808 {
25809 rtx src, dest;
25810 enum rtx_code ccode;
25811 rtx compare_set = NULL_RTX, test_if, cond;
25812 rtx alu_set = NULL_RTX, addr = NULL_RTX;
25813
25814 if (!any_condjump_p (condjmp))
25815 return false;
25816
25817 if (get_attr_type (condgen) != TYPE_TEST
25818 && get_attr_type (condgen) != TYPE_ICMP
25819 && get_attr_type (condgen) != TYPE_INCDEC
25820 && get_attr_type (condgen) != TYPE_ALU)
25821 return false;
25822
25823 compare_set = single_set (condgen);
25824 if (compare_set == NULL_RTX
25825 && !TARGET_FUSE_ALU_AND_BRANCH)
25826 return false;
25827
25828 if (compare_set == NULL_RTX)
25829 {
25830 int i;
25831 rtx pat = PATTERN (condgen);
25832 for (i = 0; i < XVECLEN (pat, 0); i++)
25833 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
25834 {
25835 rtx set_src = SET_SRC (XVECEXP (pat, 0, i));
25836 if (GET_CODE (set_src) == COMPARE)
25837 compare_set = XVECEXP (pat, 0, i);
25838 else
25839 alu_set = XVECEXP (pat, 0, i);
25840 }
25841 }
25842 if (compare_set == NULL_RTX)
25843 return false;
25844 src = SET_SRC (compare_set);
25845 if (GET_CODE (src) != COMPARE)
25846 return false;
25847
25848 /* Macro-fusion for cmp/test MEM-IMM + conditional jmp is not
25849 supported. */
25850 if ((MEM_P (XEXP (src, 0))
25851 && CONST_INT_P (XEXP (src, 1)))
25852 || (MEM_P (XEXP (src, 1))
25853 && CONST_INT_P (XEXP (src, 0))))
25854 return false;
25855
25856 /* No fusion for RIP-relative address. */
25857 if (MEM_P (XEXP (src, 0)))
25858 addr = XEXP (XEXP (src, 0), 0);
25859 else if (MEM_P (XEXP (src, 1)))
25860 addr = XEXP (XEXP (src, 1), 0);
25861
25862 if (addr) {
25863 ix86_address parts;
25864 int ok = ix86_decompose_address (addr, &parts);
25865 gcc_assert (ok);
25866
25867 if (rip_relative_addr_p (&parts))
25868 return false;
25869 }
25870
25871 test_if = SET_SRC (pc_set (condjmp));
25872 cond = XEXP (test_if, 0);
25873 ccode = GET_CODE (cond);
25874 /* Check whether conditional jump use Sign or Overflow Flags. */
25875 if (!TARGET_FUSE_CMP_AND_BRANCH_SOFLAGS
25876 && (ccode == GE
25877 || ccode == GT
25878 || ccode == LE
25879 || ccode == LT))
25880 return false;
25881
25882 /* Return true for TYPE_TEST and TYPE_ICMP. */
25883 if (get_attr_type (condgen) == TYPE_TEST
25884 || get_attr_type (condgen) == TYPE_ICMP)
25885 return true;
25886
25887 /* The following is the case that macro-fusion for alu + jmp. */
25888 if (!TARGET_FUSE_ALU_AND_BRANCH || !alu_set)
25889 return false;
25890
25891 /* No fusion for alu op with memory destination operand. */
25892 dest = SET_DEST (alu_set);
25893 if (MEM_P (dest))
25894 return false;
25895
25896 /* Macro-fusion for inc/dec + unsigned conditional jump is not
25897 supported. */
25898 if (get_attr_type (condgen) == TYPE_INCDEC
25899 && (ccode == GEU
25900 || ccode == GTU
25901 || ccode == LEU
25902 || ccode == LTU))
25903 return false;
25904
25905 return true;
25906 }
25907
25908 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
25909 execution. It is applied if
25910 (1) IMUL instruction is on the top of list;
25911 (2) There exists the only producer of independent IMUL instruction in
25912 ready list.
25913 Return index of IMUL producer if it was found and -1 otherwise. */
25914 static int
25915 do_reorder_for_imul (rtx_insn **ready, int n_ready)
25916 {
25917 rtx_insn *insn;
25918 rtx set, insn1, insn2;
25919 sd_iterator_def sd_it;
25920 dep_t dep;
25921 int index = -1;
25922 int i;
25923
25924 if (!TARGET_BONNELL)
25925 return index;
25926
25927 /* Check that IMUL instruction is on the top of ready list. */
25928 insn = ready[n_ready - 1];
25929 set = single_set (insn);
25930 if (!set)
25931 return index;
25932 if (!(GET_CODE (SET_SRC (set)) == MULT
25933 && GET_MODE (SET_SRC (set)) == SImode))
25934 return index;
25935
25936 /* Search for producer of independent IMUL instruction. */
25937 for (i = n_ready - 2; i >= 0; i--)
25938 {
25939 insn = ready[i];
25940 if (!NONDEBUG_INSN_P (insn))
25941 continue;
25942 /* Skip IMUL instruction. */
25943 insn2 = PATTERN (insn);
25944 if (GET_CODE (insn2) == PARALLEL)
25945 insn2 = XVECEXP (insn2, 0, 0);
25946 if (GET_CODE (insn2) == SET
25947 && GET_CODE (SET_SRC (insn2)) == MULT
25948 && GET_MODE (SET_SRC (insn2)) == SImode)
25949 continue;
25950
25951 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
25952 {
25953 rtx con;
25954 con = DEP_CON (dep);
25955 if (!NONDEBUG_INSN_P (con))
25956 continue;
25957 insn1 = PATTERN (con);
25958 if (GET_CODE (insn1) == PARALLEL)
25959 insn1 = XVECEXP (insn1, 0, 0);
25960
25961 if (GET_CODE (insn1) == SET
25962 && GET_CODE (SET_SRC (insn1)) == MULT
25963 && GET_MODE (SET_SRC (insn1)) == SImode)
25964 {
25965 sd_iterator_def sd_it1;
25966 dep_t dep1;
25967 /* Check if there is no other dependee for IMUL. */
25968 index = i;
25969 FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1)
25970 {
25971 rtx pro;
25972 pro = DEP_PRO (dep1);
25973 if (!NONDEBUG_INSN_P (pro))
25974 continue;
25975 if (pro != insn)
25976 index = -1;
25977 }
25978 if (index >= 0)
25979 break;
25980 }
25981 }
25982 if (index >= 0)
25983 break;
25984 }
25985 return index;
25986 }
25987
25988 /* Try to find the best candidate on the top of ready list if two insns
25989 have the same priority - candidate is best if its dependees were
25990 scheduled earlier. Applied for Silvermont only.
25991 Return true if top 2 insns must be interchanged. */
25992 static bool
25993 swap_top_of_ready_list (rtx_insn **ready, int n_ready)
25994 {
25995 rtx_insn *top = ready[n_ready - 1];
25996 rtx_insn *next = ready[n_ready - 2];
25997 rtx set;
25998 sd_iterator_def sd_it;
25999 dep_t dep;
26000 int clock1 = -1;
26001 int clock2 = -1;
26002 #define INSN_TICK(INSN) (HID (INSN)->tick)
26003
26004 if (!TARGET_SILVERMONT && !TARGET_INTEL)
26005 return false;
26006
26007 if (!NONDEBUG_INSN_P (top))
26008 return false;
26009 if (!NONJUMP_INSN_P (top))
26010 return false;
26011 if (!NONDEBUG_INSN_P (next))
26012 return false;
26013 if (!NONJUMP_INSN_P (next))
26014 return false;
26015 set = single_set (top);
26016 if (!set)
26017 return false;
26018 set = single_set (next);
26019 if (!set)
26020 return false;
26021
26022 if (INSN_PRIORITY_KNOWN (top) && INSN_PRIORITY_KNOWN (next))
26023 {
26024 if (INSN_PRIORITY (top) != INSN_PRIORITY (next))
26025 return false;
26026 /* Determine winner more precise. */
26027 FOR_EACH_DEP (top, SD_LIST_RES_BACK, sd_it, dep)
26028 {
26029 rtx pro;
26030 pro = DEP_PRO (dep);
26031 if (!NONDEBUG_INSN_P (pro))
26032 continue;
26033 if (INSN_TICK (pro) > clock1)
26034 clock1 = INSN_TICK (pro);
26035 }
26036 FOR_EACH_DEP (next, SD_LIST_RES_BACK, sd_it, dep)
26037 {
26038 rtx pro;
26039 pro = DEP_PRO (dep);
26040 if (!NONDEBUG_INSN_P (pro))
26041 continue;
26042 if (INSN_TICK (pro) > clock2)
26043 clock2 = INSN_TICK (pro);
26044 }
26045
26046 if (clock1 == clock2)
26047 {
26048 /* Determine winner - load must win. */
26049 enum attr_memory memory1, memory2;
26050 memory1 = get_attr_memory (top);
26051 memory2 = get_attr_memory (next);
26052 if (memory2 == MEMORY_LOAD && memory1 != MEMORY_LOAD)
26053 return true;
26054 }
26055 return (bool) (clock2 < clock1);
26056 }
26057 return false;
26058 #undef INSN_TICK
26059 }
26060
26061 /* Perform possible reodering of ready list for Atom/Silvermont only.
26062 Return issue rate. */
26063 static int
26064 ix86_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready,
26065 int *pn_ready, int clock_var)
26066 {
26067 int issue_rate = -1;
26068 int n_ready = *pn_ready;
26069 int i;
26070 rtx_insn *insn;
26071 int index = -1;
26072
26073 /* Set up issue rate. */
26074 issue_rate = ix86_issue_rate ();
26075
26076 /* Do reodering for BONNELL/SILVERMONT only. */
26077 if (!TARGET_BONNELL && !TARGET_SILVERMONT && !TARGET_INTEL)
26078 return issue_rate;
26079
26080 /* Nothing to do if ready list contains only 1 instruction. */
26081 if (n_ready <= 1)
26082 return issue_rate;
26083
26084 /* Do reodering for post-reload scheduler only. */
26085 if (!reload_completed)
26086 return issue_rate;
26087
26088 if ((index = do_reorder_for_imul (ready, n_ready)) >= 0)
26089 {
26090 if (sched_verbose > 1)
26091 fprintf (dump, ";;\tatom sched_reorder: put %d insn on top\n",
26092 INSN_UID (ready[index]));
26093
26094 /* Put IMUL producer (ready[index]) at the top of ready list. */
26095 insn = ready[index];
26096 for (i = index; i < n_ready - 1; i++)
26097 ready[i] = ready[i + 1];
26098 ready[n_ready - 1] = insn;
26099 return issue_rate;
26100 }
26101 if (clock_var != 0 && swap_top_of_ready_list (ready, n_ready))
26102 {
26103 if (sched_verbose > 1)
26104 fprintf (dump, ";;\tslm sched_reorder: swap %d and %d insns\n",
26105 INSN_UID (ready[n_ready - 1]), INSN_UID (ready[n_ready - 2]));
26106 /* Swap 2 top elements of ready list. */
26107 insn = ready[n_ready - 1];
26108 ready[n_ready - 1] = ready[n_ready - 2];
26109 ready[n_ready - 2] = insn;
26110 }
26111 return issue_rate;
26112 }
26113
26114 static bool
26115 ix86_class_likely_spilled_p (reg_class_t);
26116
26117 /* Returns true if lhs of insn is HW function argument register and set up
26118 is_spilled to true if it is likely spilled HW register. */
26119 static bool
26120 insn_is_function_arg (rtx insn, bool* is_spilled)
26121 {
26122 rtx dst;
26123
26124 if (!NONDEBUG_INSN_P (insn))
26125 return false;
26126 /* Call instructions are not movable, ignore it. */
26127 if (CALL_P (insn))
26128 return false;
26129 insn = PATTERN (insn);
26130 if (GET_CODE (insn) == PARALLEL)
26131 insn = XVECEXP (insn, 0, 0);
26132 if (GET_CODE (insn) != SET)
26133 return false;
26134 dst = SET_DEST (insn);
26135 if (REG_P (dst) && HARD_REGISTER_P (dst)
26136 && ix86_function_arg_regno_p (REGNO (dst)))
26137 {
26138 /* Is it likely spilled HW register? */
26139 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
26140 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
26141 *is_spilled = true;
26142 return true;
26143 }
26144 return false;
26145 }
26146
26147 /* Add output dependencies for chain of function adjacent arguments if only
26148 there is a move to likely spilled HW register. Return first argument
26149 if at least one dependence was added or NULL otherwise. */
26150 static rtx_insn *
26151 add_parameter_dependencies (rtx_insn *call, rtx_insn *head)
26152 {
26153 rtx_insn *insn;
26154 rtx_insn *last = call;
26155 rtx_insn *first_arg = NULL;
26156 bool is_spilled = false;
26157
26158 head = PREV_INSN (head);
26159
26160 /* Find nearest to call argument passing instruction. */
26161 while (true)
26162 {
26163 last = PREV_INSN (last);
26164 if (last == head)
26165 return NULL;
26166 if (!NONDEBUG_INSN_P (last))
26167 continue;
26168 if (insn_is_function_arg (last, &is_spilled))
26169 break;
26170 return NULL;
26171 }
26172
26173 first_arg = last;
26174 while (true)
26175 {
26176 insn = PREV_INSN (last);
26177 if (!INSN_P (insn))
26178 break;
26179 if (insn == head)
26180 break;
26181 if (!NONDEBUG_INSN_P (insn))
26182 {
26183 last = insn;
26184 continue;
26185 }
26186 if (insn_is_function_arg (insn, &is_spilled))
26187 {
26188 /* Add output depdendence between two function arguments if chain
26189 of output arguments contains likely spilled HW registers. */
26190 if (is_spilled)
26191 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26192 first_arg = last = insn;
26193 }
26194 else
26195 break;
26196 }
26197 if (!is_spilled)
26198 return NULL;
26199 return first_arg;
26200 }
26201
26202 /* Add output or anti dependency from insn to first_arg to restrict its code
26203 motion. */
26204 static void
26205 avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn)
26206 {
26207 rtx set;
26208 rtx tmp;
26209
26210 set = single_set (insn);
26211 if (!set)
26212 return;
26213 tmp = SET_DEST (set);
26214 if (REG_P (tmp))
26215 {
26216 /* Add output dependency to the first function argument. */
26217 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26218 return;
26219 }
26220 /* Add anti dependency. */
26221 add_dependence (first_arg, insn, REG_DEP_ANTI);
26222 }
26223
26224 /* Avoid cross block motion of function argument through adding dependency
26225 from the first non-jump instruction in bb. */
26226 static void
26227 add_dependee_for_func_arg (rtx_insn *arg, basic_block bb)
26228 {
26229 rtx_insn *insn = BB_END (bb);
26230
26231 while (insn)
26232 {
26233 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
26234 {
26235 rtx set = single_set (insn);
26236 if (set)
26237 {
26238 avoid_func_arg_motion (arg, insn);
26239 return;
26240 }
26241 }
26242 if (insn == BB_HEAD (bb))
26243 return;
26244 insn = PREV_INSN (insn);
26245 }
26246 }
26247
26248 /* Hook for pre-reload schedule - avoid motion of function arguments
26249 passed in likely spilled HW registers. */
26250 static void
26251 ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
26252 {
26253 rtx_insn *insn;
26254 rtx_insn *first_arg = NULL;
26255 if (reload_completed)
26256 return;
26257 while (head != tail && DEBUG_INSN_P (head))
26258 head = NEXT_INSN (head);
26259 for (insn = tail; insn != head; insn = PREV_INSN (insn))
26260 if (INSN_P (insn) && CALL_P (insn))
26261 {
26262 first_arg = add_parameter_dependencies (insn, head);
26263 if (first_arg)
26264 {
26265 /* Add dependee for first argument to predecessors if only
26266 region contains more than one block. */
26267 basic_block bb = BLOCK_FOR_INSN (insn);
26268 int rgn = CONTAINING_RGN (bb->index);
26269 int nr_blks = RGN_NR_BLOCKS (rgn);
26270 /* Skip trivial regions and region head blocks that can have
26271 predecessors outside of region. */
26272 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
26273 {
26274 edge e;
26275 edge_iterator ei;
26276
26277 /* Regions are SCCs with the exception of selective
26278 scheduling with pipelining of outer blocks enabled.
26279 So also check that immediate predecessors of a non-head
26280 block are in the same region. */
26281 FOR_EACH_EDGE (e, ei, bb->preds)
26282 {
26283 /* Avoid creating of loop-carried dependencies through
26284 using topological ordering in the region. */
26285 if (rgn == CONTAINING_RGN (e->src->index)
26286 && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
26287 add_dependee_for_func_arg (first_arg, e->src);
26288 }
26289 }
26290 insn = first_arg;
26291 if (insn == head)
26292 break;
26293 }
26294 }
26295 else if (first_arg)
26296 avoid_func_arg_motion (first_arg, insn);
26297 }
26298
26299 /* Hook for pre-reload schedule - set priority of moves from likely spilled
26300 HW registers to maximum, to schedule them at soon as possible. These are
26301 moves from function argument registers at the top of the function entry
26302 and moves from function return value registers after call. */
26303 static int
26304 ix86_adjust_priority (rtx_insn *insn, int priority)
26305 {
26306 rtx set;
26307
26308 if (reload_completed)
26309 return priority;
26310
26311 if (!NONDEBUG_INSN_P (insn))
26312 return priority;
26313
26314 set = single_set (insn);
26315 if (set)
26316 {
26317 rtx tmp = SET_SRC (set);
26318 if (REG_P (tmp)
26319 && HARD_REGISTER_P (tmp)
26320 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
26321 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
26322 return current_sched_info->sched_max_insns_priority;
26323 }
26324
26325 return priority;
26326 }
26327
26328 /* Model decoder of Core 2/i7.
26329 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
26330 track the instruction fetch block boundaries and make sure that long
26331 (9+ bytes) instructions are assigned to D0. */
26332
26333 /* Maximum length of an insn that can be handled by
26334 a secondary decoder unit. '8' for Core 2/i7. */
26335 static int core2i7_secondary_decoder_max_insn_size;
26336
26337 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
26338 '16' for Core 2/i7. */
26339 static int core2i7_ifetch_block_size;
26340
26341 /* Maximum number of instructions decoder can handle per cycle.
26342 '6' for Core 2/i7. */
26343 static int core2i7_ifetch_block_max_insns;
26344
26345 typedef struct ix86_first_cycle_multipass_data_ *
26346 ix86_first_cycle_multipass_data_t;
26347 typedef const struct ix86_first_cycle_multipass_data_ *
26348 const_ix86_first_cycle_multipass_data_t;
26349
26350 /* A variable to store target state across calls to max_issue within
26351 one cycle. */
26352 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
26353 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
26354
26355 /* Initialize DATA. */
26356 static void
26357 core2i7_first_cycle_multipass_init (void *_data)
26358 {
26359 ix86_first_cycle_multipass_data_t data
26360 = (ix86_first_cycle_multipass_data_t) _data;
26361
26362 data->ifetch_block_len = 0;
26363 data->ifetch_block_n_insns = 0;
26364 data->ready_try_change = NULL;
26365 data->ready_try_change_size = 0;
26366 }
26367
26368 /* Advancing the cycle; reset ifetch block counts. */
26369 static void
26370 core2i7_dfa_post_advance_cycle (void)
26371 {
26372 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
26373
26374 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
26375
26376 data->ifetch_block_len = 0;
26377 data->ifetch_block_n_insns = 0;
26378 }
26379
26380 static int min_insn_size (rtx_insn *);
26381
26382 /* Filter out insns from ready_try that the core will not be able to issue
26383 on current cycle due to decoder. */
26384 static void
26385 core2i7_first_cycle_multipass_filter_ready_try
26386 (const_ix86_first_cycle_multipass_data_t data,
26387 signed char *ready_try, int n_ready, bool first_cycle_insn_p)
26388 {
26389 while (n_ready--)
26390 {
26391 rtx_insn *insn;
26392 int insn_size;
26393
26394 if (ready_try[n_ready])
26395 continue;
26396
26397 insn = get_ready_element (n_ready);
26398 insn_size = min_insn_size (insn);
26399
26400 if (/* If this is a too long an insn for a secondary decoder ... */
26401 (!first_cycle_insn_p
26402 && insn_size > core2i7_secondary_decoder_max_insn_size)
26403 /* ... or it would not fit into the ifetch block ... */
26404 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
26405 /* ... or the decoder is full already ... */
26406 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
26407 /* ... mask the insn out. */
26408 {
26409 ready_try[n_ready] = 1;
26410
26411 if (data->ready_try_change)
26412 bitmap_set_bit (data->ready_try_change, n_ready);
26413 }
26414 }
26415 }
26416
26417 /* Prepare for a new round of multipass lookahead scheduling. */
26418 static void
26419 core2i7_first_cycle_multipass_begin (void *_data,
26420 signed char *ready_try, int n_ready,
26421 bool first_cycle_insn_p)
26422 {
26423 ix86_first_cycle_multipass_data_t data
26424 = (ix86_first_cycle_multipass_data_t) _data;
26425 const_ix86_first_cycle_multipass_data_t prev_data
26426 = ix86_first_cycle_multipass_data;
26427
26428 /* Restore the state from the end of the previous round. */
26429 data->ifetch_block_len = prev_data->ifetch_block_len;
26430 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
26431
26432 /* Filter instructions that cannot be issued on current cycle due to
26433 decoder restrictions. */
26434 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
26435 first_cycle_insn_p);
26436 }
26437
26438 /* INSN is being issued in current solution. Account for its impact on
26439 the decoder model. */
26440 static void
26441 core2i7_first_cycle_multipass_issue (void *_data,
26442 signed char *ready_try, int n_ready,
26443 rtx_insn *insn, const void *_prev_data)
26444 {
26445 ix86_first_cycle_multipass_data_t data
26446 = (ix86_first_cycle_multipass_data_t) _data;
26447 const_ix86_first_cycle_multipass_data_t prev_data
26448 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
26449
26450 int insn_size = min_insn_size (insn);
26451
26452 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
26453 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
26454 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
26455 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
26456
26457 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
26458 if (!data->ready_try_change)
26459 {
26460 data->ready_try_change = sbitmap_alloc (n_ready);
26461 data->ready_try_change_size = n_ready;
26462 }
26463 else if (data->ready_try_change_size < n_ready)
26464 {
26465 data->ready_try_change = sbitmap_resize (data->ready_try_change,
26466 n_ready, 0);
26467 data->ready_try_change_size = n_ready;
26468 }
26469 bitmap_clear (data->ready_try_change);
26470
26471 /* Filter out insns from ready_try that the core will not be able to issue
26472 on current cycle due to decoder. */
26473 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
26474 false);
26475 }
26476
26477 /* Revert the effect on ready_try. */
26478 static void
26479 core2i7_first_cycle_multipass_backtrack (const void *_data,
26480 signed char *ready_try,
26481 int n_ready ATTRIBUTE_UNUSED)
26482 {
26483 const_ix86_first_cycle_multipass_data_t data
26484 = (const_ix86_first_cycle_multipass_data_t) _data;
26485 unsigned int i = 0;
26486 sbitmap_iterator sbi;
26487
26488 gcc_assert (bitmap_last_set_bit (data->ready_try_change) < n_ready);
26489 EXECUTE_IF_SET_IN_BITMAP (data->ready_try_change, 0, i, sbi)
26490 {
26491 ready_try[i] = 0;
26492 }
26493 }
26494
26495 /* Save the result of multipass lookahead scheduling for the next round. */
26496 static void
26497 core2i7_first_cycle_multipass_end (const void *_data)
26498 {
26499 const_ix86_first_cycle_multipass_data_t data
26500 = (const_ix86_first_cycle_multipass_data_t) _data;
26501 ix86_first_cycle_multipass_data_t next_data
26502 = ix86_first_cycle_multipass_data;
26503
26504 if (data != NULL)
26505 {
26506 next_data->ifetch_block_len = data->ifetch_block_len;
26507 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
26508 }
26509 }
26510
26511 /* Deallocate target data. */
26512 static void
26513 core2i7_first_cycle_multipass_fini (void *_data)
26514 {
26515 ix86_first_cycle_multipass_data_t data
26516 = (ix86_first_cycle_multipass_data_t) _data;
26517
26518 if (data->ready_try_change)
26519 {
26520 sbitmap_free (data->ready_try_change);
26521 data->ready_try_change = NULL;
26522 data->ready_try_change_size = 0;
26523 }
26524 }
26525
26526 /* Prepare for scheduling pass. */
26527 static void
26528 ix86_sched_init_global (FILE *, int, int)
26529 {
26530 /* Install scheduling hooks for current CPU. Some of these hooks are used
26531 in time-critical parts of the scheduler, so we only set them up when
26532 they are actually used. */
26533 switch (ix86_tune)
26534 {
26535 case PROCESSOR_CORE2:
26536 case PROCESSOR_NEHALEM:
26537 case PROCESSOR_SANDYBRIDGE:
26538 case PROCESSOR_HASWELL:
26539 /* Do not perform multipass scheduling for pre-reload schedule
26540 to save compile time. */
26541 if (reload_completed)
26542 {
26543 targetm.sched.dfa_post_advance_cycle
26544 = core2i7_dfa_post_advance_cycle;
26545 targetm.sched.first_cycle_multipass_init
26546 = core2i7_first_cycle_multipass_init;
26547 targetm.sched.first_cycle_multipass_begin
26548 = core2i7_first_cycle_multipass_begin;
26549 targetm.sched.first_cycle_multipass_issue
26550 = core2i7_first_cycle_multipass_issue;
26551 targetm.sched.first_cycle_multipass_backtrack
26552 = core2i7_first_cycle_multipass_backtrack;
26553 targetm.sched.first_cycle_multipass_end
26554 = core2i7_first_cycle_multipass_end;
26555 targetm.sched.first_cycle_multipass_fini
26556 = core2i7_first_cycle_multipass_fini;
26557
26558 /* Set decoder parameters. */
26559 core2i7_secondary_decoder_max_insn_size = 8;
26560 core2i7_ifetch_block_size = 16;
26561 core2i7_ifetch_block_max_insns = 6;
26562 break;
26563 }
26564 /* ... Fall through ... */
26565 default:
26566 targetm.sched.dfa_post_advance_cycle = NULL;
26567 targetm.sched.first_cycle_multipass_init = NULL;
26568 targetm.sched.first_cycle_multipass_begin = NULL;
26569 targetm.sched.first_cycle_multipass_issue = NULL;
26570 targetm.sched.first_cycle_multipass_backtrack = NULL;
26571 targetm.sched.first_cycle_multipass_end = NULL;
26572 targetm.sched.first_cycle_multipass_fini = NULL;
26573 break;
26574 }
26575 }
26576
26577 \f
26578 /* Compute the alignment given to a constant that is being placed in memory.
26579 EXP is the constant and ALIGN is the alignment that the object would
26580 ordinarily have.
26581 The value of this function is used instead of that alignment to align
26582 the object. */
26583
26584 int
26585 ix86_constant_alignment (tree exp, int align)
26586 {
26587 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
26588 || TREE_CODE (exp) == INTEGER_CST)
26589 {
26590 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
26591 return 64;
26592 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
26593 return 128;
26594 }
26595 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
26596 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
26597 return BITS_PER_WORD;
26598
26599 return align;
26600 }
26601
26602 /* Compute the alignment for a static variable.
26603 TYPE is the data type, and ALIGN is the alignment that
26604 the object would ordinarily have. The value of this function is used
26605 instead of that alignment to align the object. */
26606
26607 int
26608 ix86_data_alignment (tree type, int align, bool opt)
26609 {
26610 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
26611 for symbols from other compilation units or symbols that don't need
26612 to bind locally. In order to preserve some ABI compatibility with
26613 those compilers, ensure we don't decrease alignment from what we
26614 used to assume. */
26615
26616 int max_align_compat
26617 = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
26618
26619 /* A data structure, equal or greater than the size of a cache line
26620 (64 bytes in the Pentium 4 and other recent Intel processors, including
26621 processors based on Intel Core microarchitecture) should be aligned
26622 so that its base address is a multiple of a cache line size. */
26623
26624 int max_align
26625 = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
26626
26627 if (max_align < BITS_PER_WORD)
26628 max_align = BITS_PER_WORD;
26629
26630 if (opt
26631 && AGGREGATE_TYPE_P (type)
26632 && TYPE_SIZE (type)
26633 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
26634 {
26635 if (wi::geu_p (TYPE_SIZE (type), max_align_compat)
26636 && align < max_align_compat)
26637 align = max_align_compat;
26638 if (wi::geu_p (TYPE_SIZE (type), max_align)
26639 && align < max_align)
26640 align = max_align;
26641 }
26642
26643 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
26644 to 16byte boundary. */
26645 if (TARGET_64BIT)
26646 {
26647 if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
26648 && TYPE_SIZE (type)
26649 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
26650 && wi::geu_p (TYPE_SIZE (type), 128)
26651 && align < 128)
26652 return 128;
26653 }
26654
26655 if (!opt)
26656 return align;
26657
26658 if (TREE_CODE (type) == ARRAY_TYPE)
26659 {
26660 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
26661 return 64;
26662 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
26663 return 128;
26664 }
26665 else if (TREE_CODE (type) == COMPLEX_TYPE)
26666 {
26667
26668 if (TYPE_MODE (type) == DCmode && align < 64)
26669 return 64;
26670 if ((TYPE_MODE (type) == XCmode
26671 || TYPE_MODE (type) == TCmode) && align < 128)
26672 return 128;
26673 }
26674 else if ((TREE_CODE (type) == RECORD_TYPE
26675 || TREE_CODE (type) == UNION_TYPE
26676 || TREE_CODE (type) == QUAL_UNION_TYPE)
26677 && TYPE_FIELDS (type))
26678 {
26679 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
26680 return 64;
26681 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
26682 return 128;
26683 }
26684 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
26685 || TREE_CODE (type) == INTEGER_TYPE)
26686 {
26687 if (TYPE_MODE (type) == DFmode && align < 64)
26688 return 64;
26689 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
26690 return 128;
26691 }
26692
26693 return align;
26694 }
26695
26696 /* Compute the alignment for a local variable or a stack slot. EXP is
26697 the data type or decl itself, MODE is the widest mode available and
26698 ALIGN is the alignment that the object would ordinarily have. The
26699 value of this macro is used instead of that alignment to align the
26700 object. */
26701
26702 unsigned int
26703 ix86_local_alignment (tree exp, enum machine_mode mode,
26704 unsigned int align)
26705 {
26706 tree type, decl;
26707
26708 if (exp && DECL_P (exp))
26709 {
26710 type = TREE_TYPE (exp);
26711 decl = exp;
26712 }
26713 else
26714 {
26715 type = exp;
26716 decl = NULL;
26717 }
26718
26719 /* Don't do dynamic stack realignment for long long objects with
26720 -mpreferred-stack-boundary=2. */
26721 if (!TARGET_64BIT
26722 && align == 64
26723 && ix86_preferred_stack_boundary < 64
26724 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
26725 && (!type || !TYPE_USER_ALIGN (type))
26726 && (!decl || !DECL_USER_ALIGN (decl)))
26727 align = 32;
26728
26729 /* If TYPE is NULL, we are allocating a stack slot for caller-save
26730 register in MODE. We will return the largest alignment of XF
26731 and DF. */
26732 if (!type)
26733 {
26734 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
26735 align = GET_MODE_ALIGNMENT (DFmode);
26736 return align;
26737 }
26738
26739 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
26740 to 16byte boundary. Exact wording is:
26741
26742 An array uses the same alignment as its elements, except that a local or
26743 global array variable of length at least 16 bytes or
26744 a C99 variable-length array variable always has alignment of at least 16 bytes.
26745
26746 This was added to allow use of aligned SSE instructions at arrays. This
26747 rule is meant for static storage (where compiler can not do the analysis
26748 by itself). We follow it for automatic variables only when convenient.
26749 We fully control everything in the function compiled and functions from
26750 other unit can not rely on the alignment.
26751
26752 Exclude va_list type. It is the common case of local array where
26753 we can not benefit from the alignment.
26754
26755 TODO: Probably one should optimize for size only when var is not escaping. */
26756 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
26757 && TARGET_SSE)
26758 {
26759 if (AGGREGATE_TYPE_P (type)
26760 && (va_list_type_node == NULL_TREE
26761 || (TYPE_MAIN_VARIANT (type)
26762 != TYPE_MAIN_VARIANT (va_list_type_node)))
26763 && TYPE_SIZE (type)
26764 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
26765 && wi::geu_p (TYPE_SIZE (type), 16)
26766 && align < 128)
26767 return 128;
26768 }
26769 if (TREE_CODE (type) == ARRAY_TYPE)
26770 {
26771 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
26772 return 64;
26773 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
26774 return 128;
26775 }
26776 else if (TREE_CODE (type) == COMPLEX_TYPE)
26777 {
26778 if (TYPE_MODE (type) == DCmode && align < 64)
26779 return 64;
26780 if ((TYPE_MODE (type) == XCmode
26781 || TYPE_MODE (type) == TCmode) && align < 128)
26782 return 128;
26783 }
26784 else if ((TREE_CODE (type) == RECORD_TYPE
26785 || TREE_CODE (type) == UNION_TYPE
26786 || TREE_CODE (type) == QUAL_UNION_TYPE)
26787 && TYPE_FIELDS (type))
26788 {
26789 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
26790 return 64;
26791 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
26792 return 128;
26793 }
26794 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
26795 || TREE_CODE (type) == INTEGER_TYPE)
26796 {
26797
26798 if (TYPE_MODE (type) == DFmode && align < 64)
26799 return 64;
26800 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
26801 return 128;
26802 }
26803 return align;
26804 }
26805
26806 /* Compute the minimum required alignment for dynamic stack realignment
26807 purposes for a local variable, parameter or a stack slot. EXP is
26808 the data type or decl itself, MODE is its mode and ALIGN is the
26809 alignment that the object would ordinarily have. */
26810
26811 unsigned int
26812 ix86_minimum_alignment (tree exp, enum machine_mode mode,
26813 unsigned int align)
26814 {
26815 tree type, decl;
26816
26817 if (exp && DECL_P (exp))
26818 {
26819 type = TREE_TYPE (exp);
26820 decl = exp;
26821 }
26822 else
26823 {
26824 type = exp;
26825 decl = NULL;
26826 }
26827
26828 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
26829 return align;
26830
26831 /* Don't do dynamic stack realignment for long long objects with
26832 -mpreferred-stack-boundary=2. */
26833 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
26834 && (!type || !TYPE_USER_ALIGN (type))
26835 && (!decl || !DECL_USER_ALIGN (decl)))
26836 return 32;
26837
26838 return align;
26839 }
26840 \f
26841 /* Find a location for the static chain incoming to a nested function.
26842 This is a register, unless all free registers are used by arguments. */
26843
26844 static rtx
26845 ix86_static_chain (const_tree fndecl, bool incoming_p)
26846 {
26847 unsigned regno;
26848
26849 if (!DECL_STATIC_CHAIN (fndecl))
26850 return NULL;
26851
26852 if (TARGET_64BIT)
26853 {
26854 /* We always use R10 in 64-bit mode. */
26855 regno = R10_REG;
26856 }
26857 else
26858 {
26859 tree fntype;
26860 unsigned int ccvt;
26861
26862 /* By default in 32-bit mode we use ECX to pass the static chain. */
26863 regno = CX_REG;
26864
26865 fntype = TREE_TYPE (fndecl);
26866 ccvt = ix86_get_callcvt (fntype);
26867 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
26868 {
26869 /* Fastcall functions use ecx/edx for arguments, which leaves
26870 us with EAX for the static chain.
26871 Thiscall functions use ecx for arguments, which also
26872 leaves us with EAX for the static chain. */
26873 regno = AX_REG;
26874 }
26875 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
26876 {
26877 /* Thiscall functions use ecx for arguments, which leaves
26878 us with EAX and EDX for the static chain.
26879 We are using for abi-compatibility EAX. */
26880 regno = AX_REG;
26881 }
26882 else if (ix86_function_regparm (fntype, fndecl) == 3)
26883 {
26884 /* For regparm 3, we have no free call-clobbered registers in
26885 which to store the static chain. In order to implement this,
26886 we have the trampoline push the static chain to the stack.
26887 However, we can't push a value below the return address when
26888 we call the nested function directly, so we have to use an
26889 alternate entry point. For this we use ESI, and have the
26890 alternate entry point push ESI, so that things appear the
26891 same once we're executing the nested function. */
26892 if (incoming_p)
26893 {
26894 if (fndecl == current_function_decl)
26895 ix86_static_chain_on_stack = true;
26896 return gen_frame_mem (SImode,
26897 plus_constant (Pmode,
26898 arg_pointer_rtx, -8));
26899 }
26900 regno = SI_REG;
26901 }
26902 }
26903
26904 return gen_rtx_REG (Pmode, regno);
26905 }
26906
26907 /* Emit RTL insns to initialize the variable parts of a trampoline.
26908 FNDECL is the decl of the target address; M_TRAMP is a MEM for
26909 the trampoline, and CHAIN_VALUE is an RTX for the static chain
26910 to be passed to the target function. */
26911
26912 static void
26913 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
26914 {
26915 rtx mem, fnaddr;
26916 int opcode;
26917 int offset = 0;
26918
26919 fnaddr = XEXP (DECL_RTL (fndecl), 0);
26920
26921 if (TARGET_64BIT)
26922 {
26923 int size;
26924
26925 /* Load the function address to r11. Try to load address using
26926 the shorter movl instead of movabs. We may want to support
26927 movq for kernel mode, but kernel does not use trampolines at
26928 the moment. FNADDR is a 32bit address and may not be in
26929 DImode when ptr_mode == SImode. Always use movl in this
26930 case. */
26931 if (ptr_mode == SImode
26932 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
26933 {
26934 fnaddr = copy_addr_to_reg (fnaddr);
26935
26936 mem = adjust_address (m_tramp, HImode, offset);
26937 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
26938
26939 mem = adjust_address (m_tramp, SImode, offset + 2);
26940 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
26941 offset += 6;
26942 }
26943 else
26944 {
26945 mem = adjust_address (m_tramp, HImode, offset);
26946 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
26947
26948 mem = adjust_address (m_tramp, DImode, offset + 2);
26949 emit_move_insn (mem, fnaddr);
26950 offset += 10;
26951 }
26952
26953 /* Load static chain using movabs to r10. Use the shorter movl
26954 instead of movabs when ptr_mode == SImode. */
26955 if (ptr_mode == SImode)
26956 {
26957 opcode = 0xba41;
26958 size = 6;
26959 }
26960 else
26961 {
26962 opcode = 0xba49;
26963 size = 10;
26964 }
26965
26966 mem = adjust_address (m_tramp, HImode, offset);
26967 emit_move_insn (mem, gen_int_mode (opcode, HImode));
26968
26969 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
26970 emit_move_insn (mem, chain_value);
26971 offset += size;
26972
26973 /* Jump to r11; the last (unused) byte is a nop, only there to
26974 pad the write out to a single 32-bit store. */
26975 mem = adjust_address (m_tramp, SImode, offset);
26976 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
26977 offset += 4;
26978 }
26979 else
26980 {
26981 rtx disp, chain;
26982
26983 /* Depending on the static chain location, either load a register
26984 with a constant, or push the constant to the stack. All of the
26985 instructions are the same size. */
26986 chain = ix86_static_chain (fndecl, true);
26987 if (REG_P (chain))
26988 {
26989 switch (REGNO (chain))
26990 {
26991 case AX_REG:
26992 opcode = 0xb8; break;
26993 case CX_REG:
26994 opcode = 0xb9; break;
26995 default:
26996 gcc_unreachable ();
26997 }
26998 }
26999 else
27000 opcode = 0x68;
27001
27002 mem = adjust_address (m_tramp, QImode, offset);
27003 emit_move_insn (mem, gen_int_mode (opcode, QImode));
27004
27005 mem = adjust_address (m_tramp, SImode, offset + 1);
27006 emit_move_insn (mem, chain_value);
27007 offset += 5;
27008
27009 mem = adjust_address (m_tramp, QImode, offset);
27010 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
27011
27012 mem = adjust_address (m_tramp, SImode, offset + 1);
27013
27014 /* Compute offset from the end of the jmp to the target function.
27015 In the case in which the trampoline stores the static chain on
27016 the stack, we need to skip the first insn which pushes the
27017 (call-saved) register static chain; this push is 1 byte. */
27018 offset += 5;
27019 disp = expand_binop (SImode, sub_optab, fnaddr,
27020 plus_constant (Pmode, XEXP (m_tramp, 0),
27021 offset - (MEM_P (chain) ? 1 : 0)),
27022 NULL_RTX, 1, OPTAB_DIRECT);
27023 emit_move_insn (mem, disp);
27024 }
27025
27026 gcc_assert (offset <= TRAMPOLINE_SIZE);
27027
27028 #ifdef HAVE_ENABLE_EXECUTE_STACK
27029 #ifdef CHECK_EXECUTE_STACK_ENABLED
27030 if (CHECK_EXECUTE_STACK_ENABLED)
27031 #endif
27032 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
27033 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
27034 #endif
27035 }
27036 \f
27037 /* The following file contains several enumerations and data structures
27038 built from the definitions in i386-builtin-types.def. */
27039
27040 #include "i386-builtin-types.inc"
27041
27042 /* Table for the ix86 builtin non-function types. */
27043 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
27044
27045 /* Retrieve an element from the above table, building some of
27046 the types lazily. */
27047
27048 static tree
27049 ix86_get_builtin_type (enum ix86_builtin_type tcode)
27050 {
27051 unsigned int index;
27052 tree type, itype;
27053
27054 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
27055
27056 type = ix86_builtin_type_tab[(int) tcode];
27057 if (type != NULL)
27058 return type;
27059
27060 gcc_assert (tcode > IX86_BT_LAST_PRIM);
27061 if (tcode <= IX86_BT_LAST_VECT)
27062 {
27063 enum machine_mode mode;
27064
27065 index = tcode - IX86_BT_LAST_PRIM - 1;
27066 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
27067 mode = ix86_builtin_type_vect_mode[index];
27068
27069 type = build_vector_type_for_mode (itype, mode);
27070 }
27071 else
27072 {
27073 int quals;
27074
27075 index = tcode - IX86_BT_LAST_VECT - 1;
27076 if (tcode <= IX86_BT_LAST_PTR)
27077 quals = TYPE_UNQUALIFIED;
27078 else
27079 quals = TYPE_QUAL_CONST;
27080
27081 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
27082 if (quals != TYPE_UNQUALIFIED)
27083 itype = build_qualified_type (itype, quals);
27084
27085 type = build_pointer_type (itype);
27086 }
27087
27088 ix86_builtin_type_tab[(int) tcode] = type;
27089 return type;
27090 }
27091
27092 /* Table for the ix86 builtin function types. */
27093 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
27094
27095 /* Retrieve an element from the above table, building some of
27096 the types lazily. */
27097
27098 static tree
27099 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
27100 {
27101 tree type;
27102
27103 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
27104
27105 type = ix86_builtin_func_type_tab[(int) tcode];
27106 if (type != NULL)
27107 return type;
27108
27109 if (tcode <= IX86_BT_LAST_FUNC)
27110 {
27111 unsigned start = ix86_builtin_func_start[(int) tcode];
27112 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
27113 tree rtype, atype, args = void_list_node;
27114 unsigned i;
27115
27116 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
27117 for (i = after - 1; i > start; --i)
27118 {
27119 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
27120 args = tree_cons (NULL, atype, args);
27121 }
27122
27123 type = build_function_type (rtype, args);
27124 }
27125 else
27126 {
27127 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
27128 enum ix86_builtin_func_type icode;
27129
27130 icode = ix86_builtin_func_alias_base[index];
27131 type = ix86_get_builtin_func_type (icode);
27132 }
27133
27134 ix86_builtin_func_type_tab[(int) tcode] = type;
27135 return type;
27136 }
27137
27138
27139 /* Codes for all the SSE/MMX builtins. */
27140 enum ix86_builtins
27141 {
27142 IX86_BUILTIN_ADDPS,
27143 IX86_BUILTIN_ADDSS,
27144 IX86_BUILTIN_DIVPS,
27145 IX86_BUILTIN_DIVSS,
27146 IX86_BUILTIN_MULPS,
27147 IX86_BUILTIN_MULSS,
27148 IX86_BUILTIN_SUBPS,
27149 IX86_BUILTIN_SUBSS,
27150
27151 IX86_BUILTIN_CMPEQPS,
27152 IX86_BUILTIN_CMPLTPS,
27153 IX86_BUILTIN_CMPLEPS,
27154 IX86_BUILTIN_CMPGTPS,
27155 IX86_BUILTIN_CMPGEPS,
27156 IX86_BUILTIN_CMPNEQPS,
27157 IX86_BUILTIN_CMPNLTPS,
27158 IX86_BUILTIN_CMPNLEPS,
27159 IX86_BUILTIN_CMPNGTPS,
27160 IX86_BUILTIN_CMPNGEPS,
27161 IX86_BUILTIN_CMPORDPS,
27162 IX86_BUILTIN_CMPUNORDPS,
27163 IX86_BUILTIN_CMPEQSS,
27164 IX86_BUILTIN_CMPLTSS,
27165 IX86_BUILTIN_CMPLESS,
27166 IX86_BUILTIN_CMPNEQSS,
27167 IX86_BUILTIN_CMPNLTSS,
27168 IX86_BUILTIN_CMPNLESS,
27169 IX86_BUILTIN_CMPORDSS,
27170 IX86_BUILTIN_CMPUNORDSS,
27171
27172 IX86_BUILTIN_COMIEQSS,
27173 IX86_BUILTIN_COMILTSS,
27174 IX86_BUILTIN_COMILESS,
27175 IX86_BUILTIN_COMIGTSS,
27176 IX86_BUILTIN_COMIGESS,
27177 IX86_BUILTIN_COMINEQSS,
27178 IX86_BUILTIN_UCOMIEQSS,
27179 IX86_BUILTIN_UCOMILTSS,
27180 IX86_BUILTIN_UCOMILESS,
27181 IX86_BUILTIN_UCOMIGTSS,
27182 IX86_BUILTIN_UCOMIGESS,
27183 IX86_BUILTIN_UCOMINEQSS,
27184
27185 IX86_BUILTIN_CVTPI2PS,
27186 IX86_BUILTIN_CVTPS2PI,
27187 IX86_BUILTIN_CVTSI2SS,
27188 IX86_BUILTIN_CVTSI642SS,
27189 IX86_BUILTIN_CVTSS2SI,
27190 IX86_BUILTIN_CVTSS2SI64,
27191 IX86_BUILTIN_CVTTPS2PI,
27192 IX86_BUILTIN_CVTTSS2SI,
27193 IX86_BUILTIN_CVTTSS2SI64,
27194
27195 IX86_BUILTIN_MAXPS,
27196 IX86_BUILTIN_MAXSS,
27197 IX86_BUILTIN_MINPS,
27198 IX86_BUILTIN_MINSS,
27199
27200 IX86_BUILTIN_LOADUPS,
27201 IX86_BUILTIN_STOREUPS,
27202 IX86_BUILTIN_MOVSS,
27203
27204 IX86_BUILTIN_MOVHLPS,
27205 IX86_BUILTIN_MOVLHPS,
27206 IX86_BUILTIN_LOADHPS,
27207 IX86_BUILTIN_LOADLPS,
27208 IX86_BUILTIN_STOREHPS,
27209 IX86_BUILTIN_STORELPS,
27210
27211 IX86_BUILTIN_MASKMOVQ,
27212 IX86_BUILTIN_MOVMSKPS,
27213 IX86_BUILTIN_PMOVMSKB,
27214
27215 IX86_BUILTIN_MOVNTPS,
27216 IX86_BUILTIN_MOVNTQ,
27217
27218 IX86_BUILTIN_LOADDQU,
27219 IX86_BUILTIN_STOREDQU,
27220
27221 IX86_BUILTIN_PACKSSWB,
27222 IX86_BUILTIN_PACKSSDW,
27223 IX86_BUILTIN_PACKUSWB,
27224
27225 IX86_BUILTIN_PADDB,
27226 IX86_BUILTIN_PADDW,
27227 IX86_BUILTIN_PADDD,
27228 IX86_BUILTIN_PADDQ,
27229 IX86_BUILTIN_PADDSB,
27230 IX86_BUILTIN_PADDSW,
27231 IX86_BUILTIN_PADDUSB,
27232 IX86_BUILTIN_PADDUSW,
27233 IX86_BUILTIN_PSUBB,
27234 IX86_BUILTIN_PSUBW,
27235 IX86_BUILTIN_PSUBD,
27236 IX86_BUILTIN_PSUBQ,
27237 IX86_BUILTIN_PSUBSB,
27238 IX86_BUILTIN_PSUBSW,
27239 IX86_BUILTIN_PSUBUSB,
27240 IX86_BUILTIN_PSUBUSW,
27241
27242 IX86_BUILTIN_PAND,
27243 IX86_BUILTIN_PANDN,
27244 IX86_BUILTIN_POR,
27245 IX86_BUILTIN_PXOR,
27246
27247 IX86_BUILTIN_PAVGB,
27248 IX86_BUILTIN_PAVGW,
27249
27250 IX86_BUILTIN_PCMPEQB,
27251 IX86_BUILTIN_PCMPEQW,
27252 IX86_BUILTIN_PCMPEQD,
27253 IX86_BUILTIN_PCMPGTB,
27254 IX86_BUILTIN_PCMPGTW,
27255 IX86_BUILTIN_PCMPGTD,
27256
27257 IX86_BUILTIN_PMADDWD,
27258
27259 IX86_BUILTIN_PMAXSW,
27260 IX86_BUILTIN_PMAXUB,
27261 IX86_BUILTIN_PMINSW,
27262 IX86_BUILTIN_PMINUB,
27263
27264 IX86_BUILTIN_PMULHUW,
27265 IX86_BUILTIN_PMULHW,
27266 IX86_BUILTIN_PMULLW,
27267
27268 IX86_BUILTIN_PSADBW,
27269 IX86_BUILTIN_PSHUFW,
27270
27271 IX86_BUILTIN_PSLLW,
27272 IX86_BUILTIN_PSLLD,
27273 IX86_BUILTIN_PSLLQ,
27274 IX86_BUILTIN_PSRAW,
27275 IX86_BUILTIN_PSRAD,
27276 IX86_BUILTIN_PSRLW,
27277 IX86_BUILTIN_PSRLD,
27278 IX86_BUILTIN_PSRLQ,
27279 IX86_BUILTIN_PSLLWI,
27280 IX86_BUILTIN_PSLLDI,
27281 IX86_BUILTIN_PSLLQI,
27282 IX86_BUILTIN_PSRAWI,
27283 IX86_BUILTIN_PSRADI,
27284 IX86_BUILTIN_PSRLWI,
27285 IX86_BUILTIN_PSRLDI,
27286 IX86_BUILTIN_PSRLQI,
27287
27288 IX86_BUILTIN_PUNPCKHBW,
27289 IX86_BUILTIN_PUNPCKHWD,
27290 IX86_BUILTIN_PUNPCKHDQ,
27291 IX86_BUILTIN_PUNPCKLBW,
27292 IX86_BUILTIN_PUNPCKLWD,
27293 IX86_BUILTIN_PUNPCKLDQ,
27294
27295 IX86_BUILTIN_SHUFPS,
27296
27297 IX86_BUILTIN_RCPPS,
27298 IX86_BUILTIN_RCPSS,
27299 IX86_BUILTIN_RSQRTPS,
27300 IX86_BUILTIN_RSQRTPS_NR,
27301 IX86_BUILTIN_RSQRTSS,
27302 IX86_BUILTIN_RSQRTF,
27303 IX86_BUILTIN_SQRTPS,
27304 IX86_BUILTIN_SQRTPS_NR,
27305 IX86_BUILTIN_SQRTSS,
27306
27307 IX86_BUILTIN_UNPCKHPS,
27308 IX86_BUILTIN_UNPCKLPS,
27309
27310 IX86_BUILTIN_ANDPS,
27311 IX86_BUILTIN_ANDNPS,
27312 IX86_BUILTIN_ORPS,
27313 IX86_BUILTIN_XORPS,
27314
27315 IX86_BUILTIN_EMMS,
27316 IX86_BUILTIN_LDMXCSR,
27317 IX86_BUILTIN_STMXCSR,
27318 IX86_BUILTIN_SFENCE,
27319
27320 IX86_BUILTIN_FXSAVE,
27321 IX86_BUILTIN_FXRSTOR,
27322 IX86_BUILTIN_FXSAVE64,
27323 IX86_BUILTIN_FXRSTOR64,
27324
27325 IX86_BUILTIN_XSAVE,
27326 IX86_BUILTIN_XRSTOR,
27327 IX86_BUILTIN_XSAVE64,
27328 IX86_BUILTIN_XRSTOR64,
27329
27330 IX86_BUILTIN_XSAVEOPT,
27331 IX86_BUILTIN_XSAVEOPT64,
27332
27333 IX86_BUILTIN_XSAVEC,
27334 IX86_BUILTIN_XSAVEC64,
27335
27336 IX86_BUILTIN_XSAVES,
27337 IX86_BUILTIN_XRSTORS,
27338 IX86_BUILTIN_XSAVES64,
27339 IX86_BUILTIN_XRSTORS64,
27340
27341 /* 3DNow! Original */
27342 IX86_BUILTIN_FEMMS,
27343 IX86_BUILTIN_PAVGUSB,
27344 IX86_BUILTIN_PF2ID,
27345 IX86_BUILTIN_PFACC,
27346 IX86_BUILTIN_PFADD,
27347 IX86_BUILTIN_PFCMPEQ,
27348 IX86_BUILTIN_PFCMPGE,
27349 IX86_BUILTIN_PFCMPGT,
27350 IX86_BUILTIN_PFMAX,
27351 IX86_BUILTIN_PFMIN,
27352 IX86_BUILTIN_PFMUL,
27353 IX86_BUILTIN_PFRCP,
27354 IX86_BUILTIN_PFRCPIT1,
27355 IX86_BUILTIN_PFRCPIT2,
27356 IX86_BUILTIN_PFRSQIT1,
27357 IX86_BUILTIN_PFRSQRT,
27358 IX86_BUILTIN_PFSUB,
27359 IX86_BUILTIN_PFSUBR,
27360 IX86_BUILTIN_PI2FD,
27361 IX86_BUILTIN_PMULHRW,
27362
27363 /* 3DNow! Athlon Extensions */
27364 IX86_BUILTIN_PF2IW,
27365 IX86_BUILTIN_PFNACC,
27366 IX86_BUILTIN_PFPNACC,
27367 IX86_BUILTIN_PI2FW,
27368 IX86_BUILTIN_PSWAPDSI,
27369 IX86_BUILTIN_PSWAPDSF,
27370
27371 /* SSE2 */
27372 IX86_BUILTIN_ADDPD,
27373 IX86_BUILTIN_ADDSD,
27374 IX86_BUILTIN_DIVPD,
27375 IX86_BUILTIN_DIVSD,
27376 IX86_BUILTIN_MULPD,
27377 IX86_BUILTIN_MULSD,
27378 IX86_BUILTIN_SUBPD,
27379 IX86_BUILTIN_SUBSD,
27380
27381 IX86_BUILTIN_CMPEQPD,
27382 IX86_BUILTIN_CMPLTPD,
27383 IX86_BUILTIN_CMPLEPD,
27384 IX86_BUILTIN_CMPGTPD,
27385 IX86_BUILTIN_CMPGEPD,
27386 IX86_BUILTIN_CMPNEQPD,
27387 IX86_BUILTIN_CMPNLTPD,
27388 IX86_BUILTIN_CMPNLEPD,
27389 IX86_BUILTIN_CMPNGTPD,
27390 IX86_BUILTIN_CMPNGEPD,
27391 IX86_BUILTIN_CMPORDPD,
27392 IX86_BUILTIN_CMPUNORDPD,
27393 IX86_BUILTIN_CMPEQSD,
27394 IX86_BUILTIN_CMPLTSD,
27395 IX86_BUILTIN_CMPLESD,
27396 IX86_BUILTIN_CMPNEQSD,
27397 IX86_BUILTIN_CMPNLTSD,
27398 IX86_BUILTIN_CMPNLESD,
27399 IX86_BUILTIN_CMPORDSD,
27400 IX86_BUILTIN_CMPUNORDSD,
27401
27402 IX86_BUILTIN_COMIEQSD,
27403 IX86_BUILTIN_COMILTSD,
27404 IX86_BUILTIN_COMILESD,
27405 IX86_BUILTIN_COMIGTSD,
27406 IX86_BUILTIN_COMIGESD,
27407 IX86_BUILTIN_COMINEQSD,
27408 IX86_BUILTIN_UCOMIEQSD,
27409 IX86_BUILTIN_UCOMILTSD,
27410 IX86_BUILTIN_UCOMILESD,
27411 IX86_BUILTIN_UCOMIGTSD,
27412 IX86_BUILTIN_UCOMIGESD,
27413 IX86_BUILTIN_UCOMINEQSD,
27414
27415 IX86_BUILTIN_MAXPD,
27416 IX86_BUILTIN_MAXSD,
27417 IX86_BUILTIN_MINPD,
27418 IX86_BUILTIN_MINSD,
27419
27420 IX86_BUILTIN_ANDPD,
27421 IX86_BUILTIN_ANDNPD,
27422 IX86_BUILTIN_ORPD,
27423 IX86_BUILTIN_XORPD,
27424
27425 IX86_BUILTIN_SQRTPD,
27426 IX86_BUILTIN_SQRTSD,
27427
27428 IX86_BUILTIN_UNPCKHPD,
27429 IX86_BUILTIN_UNPCKLPD,
27430
27431 IX86_BUILTIN_SHUFPD,
27432
27433 IX86_BUILTIN_LOADUPD,
27434 IX86_BUILTIN_STOREUPD,
27435 IX86_BUILTIN_MOVSD,
27436
27437 IX86_BUILTIN_LOADHPD,
27438 IX86_BUILTIN_LOADLPD,
27439
27440 IX86_BUILTIN_CVTDQ2PD,
27441 IX86_BUILTIN_CVTDQ2PS,
27442
27443 IX86_BUILTIN_CVTPD2DQ,
27444 IX86_BUILTIN_CVTPD2PI,
27445 IX86_BUILTIN_CVTPD2PS,
27446 IX86_BUILTIN_CVTTPD2DQ,
27447 IX86_BUILTIN_CVTTPD2PI,
27448
27449 IX86_BUILTIN_CVTPI2PD,
27450 IX86_BUILTIN_CVTSI2SD,
27451 IX86_BUILTIN_CVTSI642SD,
27452
27453 IX86_BUILTIN_CVTSD2SI,
27454 IX86_BUILTIN_CVTSD2SI64,
27455 IX86_BUILTIN_CVTSD2SS,
27456 IX86_BUILTIN_CVTSS2SD,
27457 IX86_BUILTIN_CVTTSD2SI,
27458 IX86_BUILTIN_CVTTSD2SI64,
27459
27460 IX86_BUILTIN_CVTPS2DQ,
27461 IX86_BUILTIN_CVTPS2PD,
27462 IX86_BUILTIN_CVTTPS2DQ,
27463
27464 IX86_BUILTIN_MOVNTI,
27465 IX86_BUILTIN_MOVNTI64,
27466 IX86_BUILTIN_MOVNTPD,
27467 IX86_BUILTIN_MOVNTDQ,
27468
27469 IX86_BUILTIN_MOVQ128,
27470
27471 /* SSE2 MMX */
27472 IX86_BUILTIN_MASKMOVDQU,
27473 IX86_BUILTIN_MOVMSKPD,
27474 IX86_BUILTIN_PMOVMSKB128,
27475
27476 IX86_BUILTIN_PACKSSWB128,
27477 IX86_BUILTIN_PACKSSDW128,
27478 IX86_BUILTIN_PACKUSWB128,
27479
27480 IX86_BUILTIN_PADDB128,
27481 IX86_BUILTIN_PADDW128,
27482 IX86_BUILTIN_PADDD128,
27483 IX86_BUILTIN_PADDQ128,
27484 IX86_BUILTIN_PADDSB128,
27485 IX86_BUILTIN_PADDSW128,
27486 IX86_BUILTIN_PADDUSB128,
27487 IX86_BUILTIN_PADDUSW128,
27488 IX86_BUILTIN_PSUBB128,
27489 IX86_BUILTIN_PSUBW128,
27490 IX86_BUILTIN_PSUBD128,
27491 IX86_BUILTIN_PSUBQ128,
27492 IX86_BUILTIN_PSUBSB128,
27493 IX86_BUILTIN_PSUBSW128,
27494 IX86_BUILTIN_PSUBUSB128,
27495 IX86_BUILTIN_PSUBUSW128,
27496
27497 IX86_BUILTIN_PAND128,
27498 IX86_BUILTIN_PANDN128,
27499 IX86_BUILTIN_POR128,
27500 IX86_BUILTIN_PXOR128,
27501
27502 IX86_BUILTIN_PAVGB128,
27503 IX86_BUILTIN_PAVGW128,
27504
27505 IX86_BUILTIN_PCMPEQB128,
27506 IX86_BUILTIN_PCMPEQW128,
27507 IX86_BUILTIN_PCMPEQD128,
27508 IX86_BUILTIN_PCMPGTB128,
27509 IX86_BUILTIN_PCMPGTW128,
27510 IX86_BUILTIN_PCMPGTD128,
27511
27512 IX86_BUILTIN_PMADDWD128,
27513
27514 IX86_BUILTIN_PMAXSW128,
27515 IX86_BUILTIN_PMAXUB128,
27516 IX86_BUILTIN_PMINSW128,
27517 IX86_BUILTIN_PMINUB128,
27518
27519 IX86_BUILTIN_PMULUDQ,
27520 IX86_BUILTIN_PMULUDQ128,
27521 IX86_BUILTIN_PMULHUW128,
27522 IX86_BUILTIN_PMULHW128,
27523 IX86_BUILTIN_PMULLW128,
27524
27525 IX86_BUILTIN_PSADBW128,
27526 IX86_BUILTIN_PSHUFHW,
27527 IX86_BUILTIN_PSHUFLW,
27528 IX86_BUILTIN_PSHUFD,
27529
27530 IX86_BUILTIN_PSLLDQI128,
27531 IX86_BUILTIN_PSLLWI128,
27532 IX86_BUILTIN_PSLLDI128,
27533 IX86_BUILTIN_PSLLQI128,
27534 IX86_BUILTIN_PSRAWI128,
27535 IX86_BUILTIN_PSRADI128,
27536 IX86_BUILTIN_PSRLDQI128,
27537 IX86_BUILTIN_PSRLWI128,
27538 IX86_BUILTIN_PSRLDI128,
27539 IX86_BUILTIN_PSRLQI128,
27540
27541 IX86_BUILTIN_PSLLDQ128,
27542 IX86_BUILTIN_PSLLW128,
27543 IX86_BUILTIN_PSLLD128,
27544 IX86_BUILTIN_PSLLQ128,
27545 IX86_BUILTIN_PSRAW128,
27546 IX86_BUILTIN_PSRAD128,
27547 IX86_BUILTIN_PSRLW128,
27548 IX86_BUILTIN_PSRLD128,
27549 IX86_BUILTIN_PSRLQ128,
27550
27551 IX86_BUILTIN_PUNPCKHBW128,
27552 IX86_BUILTIN_PUNPCKHWD128,
27553 IX86_BUILTIN_PUNPCKHDQ128,
27554 IX86_BUILTIN_PUNPCKHQDQ128,
27555 IX86_BUILTIN_PUNPCKLBW128,
27556 IX86_BUILTIN_PUNPCKLWD128,
27557 IX86_BUILTIN_PUNPCKLDQ128,
27558 IX86_BUILTIN_PUNPCKLQDQ128,
27559
27560 IX86_BUILTIN_CLFLUSH,
27561 IX86_BUILTIN_MFENCE,
27562 IX86_BUILTIN_LFENCE,
27563 IX86_BUILTIN_PAUSE,
27564
27565 IX86_BUILTIN_FNSTENV,
27566 IX86_BUILTIN_FLDENV,
27567 IX86_BUILTIN_FNSTSW,
27568 IX86_BUILTIN_FNCLEX,
27569
27570 IX86_BUILTIN_BSRSI,
27571 IX86_BUILTIN_BSRDI,
27572 IX86_BUILTIN_RDPMC,
27573 IX86_BUILTIN_RDTSC,
27574 IX86_BUILTIN_RDTSCP,
27575 IX86_BUILTIN_ROLQI,
27576 IX86_BUILTIN_ROLHI,
27577 IX86_BUILTIN_RORQI,
27578 IX86_BUILTIN_RORHI,
27579
27580 /* SSE3. */
27581 IX86_BUILTIN_ADDSUBPS,
27582 IX86_BUILTIN_HADDPS,
27583 IX86_BUILTIN_HSUBPS,
27584 IX86_BUILTIN_MOVSHDUP,
27585 IX86_BUILTIN_MOVSLDUP,
27586 IX86_BUILTIN_ADDSUBPD,
27587 IX86_BUILTIN_HADDPD,
27588 IX86_BUILTIN_HSUBPD,
27589 IX86_BUILTIN_LDDQU,
27590
27591 IX86_BUILTIN_MONITOR,
27592 IX86_BUILTIN_MWAIT,
27593
27594 /* SSSE3. */
27595 IX86_BUILTIN_PHADDW,
27596 IX86_BUILTIN_PHADDD,
27597 IX86_BUILTIN_PHADDSW,
27598 IX86_BUILTIN_PHSUBW,
27599 IX86_BUILTIN_PHSUBD,
27600 IX86_BUILTIN_PHSUBSW,
27601 IX86_BUILTIN_PMADDUBSW,
27602 IX86_BUILTIN_PMULHRSW,
27603 IX86_BUILTIN_PSHUFB,
27604 IX86_BUILTIN_PSIGNB,
27605 IX86_BUILTIN_PSIGNW,
27606 IX86_BUILTIN_PSIGND,
27607 IX86_BUILTIN_PALIGNR,
27608 IX86_BUILTIN_PABSB,
27609 IX86_BUILTIN_PABSW,
27610 IX86_BUILTIN_PABSD,
27611
27612 IX86_BUILTIN_PHADDW128,
27613 IX86_BUILTIN_PHADDD128,
27614 IX86_BUILTIN_PHADDSW128,
27615 IX86_BUILTIN_PHSUBW128,
27616 IX86_BUILTIN_PHSUBD128,
27617 IX86_BUILTIN_PHSUBSW128,
27618 IX86_BUILTIN_PMADDUBSW128,
27619 IX86_BUILTIN_PMULHRSW128,
27620 IX86_BUILTIN_PSHUFB128,
27621 IX86_BUILTIN_PSIGNB128,
27622 IX86_BUILTIN_PSIGNW128,
27623 IX86_BUILTIN_PSIGND128,
27624 IX86_BUILTIN_PALIGNR128,
27625 IX86_BUILTIN_PABSB128,
27626 IX86_BUILTIN_PABSW128,
27627 IX86_BUILTIN_PABSD128,
27628
27629 /* AMDFAM10 - SSE4A New Instructions. */
27630 IX86_BUILTIN_MOVNTSD,
27631 IX86_BUILTIN_MOVNTSS,
27632 IX86_BUILTIN_EXTRQI,
27633 IX86_BUILTIN_EXTRQ,
27634 IX86_BUILTIN_INSERTQI,
27635 IX86_BUILTIN_INSERTQ,
27636
27637 /* SSE4.1. */
27638 IX86_BUILTIN_BLENDPD,
27639 IX86_BUILTIN_BLENDPS,
27640 IX86_BUILTIN_BLENDVPD,
27641 IX86_BUILTIN_BLENDVPS,
27642 IX86_BUILTIN_PBLENDVB128,
27643 IX86_BUILTIN_PBLENDW128,
27644
27645 IX86_BUILTIN_DPPD,
27646 IX86_BUILTIN_DPPS,
27647
27648 IX86_BUILTIN_INSERTPS128,
27649
27650 IX86_BUILTIN_MOVNTDQA,
27651 IX86_BUILTIN_MPSADBW128,
27652 IX86_BUILTIN_PACKUSDW128,
27653 IX86_BUILTIN_PCMPEQQ,
27654 IX86_BUILTIN_PHMINPOSUW128,
27655
27656 IX86_BUILTIN_PMAXSB128,
27657 IX86_BUILTIN_PMAXSD128,
27658 IX86_BUILTIN_PMAXUD128,
27659 IX86_BUILTIN_PMAXUW128,
27660
27661 IX86_BUILTIN_PMINSB128,
27662 IX86_BUILTIN_PMINSD128,
27663 IX86_BUILTIN_PMINUD128,
27664 IX86_BUILTIN_PMINUW128,
27665
27666 IX86_BUILTIN_PMOVSXBW128,
27667 IX86_BUILTIN_PMOVSXBD128,
27668 IX86_BUILTIN_PMOVSXBQ128,
27669 IX86_BUILTIN_PMOVSXWD128,
27670 IX86_BUILTIN_PMOVSXWQ128,
27671 IX86_BUILTIN_PMOVSXDQ128,
27672
27673 IX86_BUILTIN_PMOVZXBW128,
27674 IX86_BUILTIN_PMOVZXBD128,
27675 IX86_BUILTIN_PMOVZXBQ128,
27676 IX86_BUILTIN_PMOVZXWD128,
27677 IX86_BUILTIN_PMOVZXWQ128,
27678 IX86_BUILTIN_PMOVZXDQ128,
27679
27680 IX86_BUILTIN_PMULDQ128,
27681 IX86_BUILTIN_PMULLD128,
27682
27683 IX86_BUILTIN_ROUNDSD,
27684 IX86_BUILTIN_ROUNDSS,
27685
27686 IX86_BUILTIN_ROUNDPD,
27687 IX86_BUILTIN_ROUNDPS,
27688
27689 IX86_BUILTIN_FLOORPD,
27690 IX86_BUILTIN_CEILPD,
27691 IX86_BUILTIN_TRUNCPD,
27692 IX86_BUILTIN_RINTPD,
27693 IX86_BUILTIN_ROUNDPD_AZ,
27694
27695 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX,
27696 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX,
27697 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX,
27698
27699 IX86_BUILTIN_FLOORPS,
27700 IX86_BUILTIN_CEILPS,
27701 IX86_BUILTIN_TRUNCPS,
27702 IX86_BUILTIN_RINTPS,
27703 IX86_BUILTIN_ROUNDPS_AZ,
27704
27705 IX86_BUILTIN_FLOORPS_SFIX,
27706 IX86_BUILTIN_CEILPS_SFIX,
27707 IX86_BUILTIN_ROUNDPS_AZ_SFIX,
27708
27709 IX86_BUILTIN_PTESTZ,
27710 IX86_BUILTIN_PTESTC,
27711 IX86_BUILTIN_PTESTNZC,
27712
27713 IX86_BUILTIN_VEC_INIT_V2SI,
27714 IX86_BUILTIN_VEC_INIT_V4HI,
27715 IX86_BUILTIN_VEC_INIT_V8QI,
27716 IX86_BUILTIN_VEC_EXT_V2DF,
27717 IX86_BUILTIN_VEC_EXT_V2DI,
27718 IX86_BUILTIN_VEC_EXT_V4SF,
27719 IX86_BUILTIN_VEC_EXT_V4SI,
27720 IX86_BUILTIN_VEC_EXT_V8HI,
27721 IX86_BUILTIN_VEC_EXT_V2SI,
27722 IX86_BUILTIN_VEC_EXT_V4HI,
27723 IX86_BUILTIN_VEC_EXT_V16QI,
27724 IX86_BUILTIN_VEC_SET_V2DI,
27725 IX86_BUILTIN_VEC_SET_V4SF,
27726 IX86_BUILTIN_VEC_SET_V4SI,
27727 IX86_BUILTIN_VEC_SET_V8HI,
27728 IX86_BUILTIN_VEC_SET_V4HI,
27729 IX86_BUILTIN_VEC_SET_V16QI,
27730
27731 IX86_BUILTIN_VEC_PACK_SFIX,
27732 IX86_BUILTIN_VEC_PACK_SFIX256,
27733
27734 /* SSE4.2. */
27735 IX86_BUILTIN_CRC32QI,
27736 IX86_BUILTIN_CRC32HI,
27737 IX86_BUILTIN_CRC32SI,
27738 IX86_BUILTIN_CRC32DI,
27739
27740 IX86_BUILTIN_PCMPESTRI128,
27741 IX86_BUILTIN_PCMPESTRM128,
27742 IX86_BUILTIN_PCMPESTRA128,
27743 IX86_BUILTIN_PCMPESTRC128,
27744 IX86_BUILTIN_PCMPESTRO128,
27745 IX86_BUILTIN_PCMPESTRS128,
27746 IX86_BUILTIN_PCMPESTRZ128,
27747 IX86_BUILTIN_PCMPISTRI128,
27748 IX86_BUILTIN_PCMPISTRM128,
27749 IX86_BUILTIN_PCMPISTRA128,
27750 IX86_BUILTIN_PCMPISTRC128,
27751 IX86_BUILTIN_PCMPISTRO128,
27752 IX86_BUILTIN_PCMPISTRS128,
27753 IX86_BUILTIN_PCMPISTRZ128,
27754
27755 IX86_BUILTIN_PCMPGTQ,
27756
27757 /* AES instructions */
27758 IX86_BUILTIN_AESENC128,
27759 IX86_BUILTIN_AESENCLAST128,
27760 IX86_BUILTIN_AESDEC128,
27761 IX86_BUILTIN_AESDECLAST128,
27762 IX86_BUILTIN_AESIMC128,
27763 IX86_BUILTIN_AESKEYGENASSIST128,
27764
27765 /* PCLMUL instruction */
27766 IX86_BUILTIN_PCLMULQDQ128,
27767
27768 /* AVX */
27769 IX86_BUILTIN_ADDPD256,
27770 IX86_BUILTIN_ADDPS256,
27771 IX86_BUILTIN_ADDSUBPD256,
27772 IX86_BUILTIN_ADDSUBPS256,
27773 IX86_BUILTIN_ANDPD256,
27774 IX86_BUILTIN_ANDPS256,
27775 IX86_BUILTIN_ANDNPD256,
27776 IX86_BUILTIN_ANDNPS256,
27777 IX86_BUILTIN_BLENDPD256,
27778 IX86_BUILTIN_BLENDPS256,
27779 IX86_BUILTIN_BLENDVPD256,
27780 IX86_BUILTIN_BLENDVPS256,
27781 IX86_BUILTIN_DIVPD256,
27782 IX86_BUILTIN_DIVPS256,
27783 IX86_BUILTIN_DPPS256,
27784 IX86_BUILTIN_HADDPD256,
27785 IX86_BUILTIN_HADDPS256,
27786 IX86_BUILTIN_HSUBPD256,
27787 IX86_BUILTIN_HSUBPS256,
27788 IX86_BUILTIN_MAXPD256,
27789 IX86_BUILTIN_MAXPS256,
27790 IX86_BUILTIN_MINPD256,
27791 IX86_BUILTIN_MINPS256,
27792 IX86_BUILTIN_MULPD256,
27793 IX86_BUILTIN_MULPS256,
27794 IX86_BUILTIN_ORPD256,
27795 IX86_BUILTIN_ORPS256,
27796 IX86_BUILTIN_SHUFPD256,
27797 IX86_BUILTIN_SHUFPS256,
27798 IX86_BUILTIN_SUBPD256,
27799 IX86_BUILTIN_SUBPS256,
27800 IX86_BUILTIN_XORPD256,
27801 IX86_BUILTIN_XORPS256,
27802 IX86_BUILTIN_CMPSD,
27803 IX86_BUILTIN_CMPSS,
27804 IX86_BUILTIN_CMPPD,
27805 IX86_BUILTIN_CMPPS,
27806 IX86_BUILTIN_CMPPD256,
27807 IX86_BUILTIN_CMPPS256,
27808 IX86_BUILTIN_CVTDQ2PD256,
27809 IX86_BUILTIN_CVTDQ2PS256,
27810 IX86_BUILTIN_CVTPD2PS256,
27811 IX86_BUILTIN_CVTPS2DQ256,
27812 IX86_BUILTIN_CVTPS2PD256,
27813 IX86_BUILTIN_CVTTPD2DQ256,
27814 IX86_BUILTIN_CVTPD2DQ256,
27815 IX86_BUILTIN_CVTTPS2DQ256,
27816 IX86_BUILTIN_EXTRACTF128PD256,
27817 IX86_BUILTIN_EXTRACTF128PS256,
27818 IX86_BUILTIN_EXTRACTF128SI256,
27819 IX86_BUILTIN_VZEROALL,
27820 IX86_BUILTIN_VZEROUPPER,
27821 IX86_BUILTIN_VPERMILVARPD,
27822 IX86_BUILTIN_VPERMILVARPS,
27823 IX86_BUILTIN_VPERMILVARPD256,
27824 IX86_BUILTIN_VPERMILVARPS256,
27825 IX86_BUILTIN_VPERMILPD,
27826 IX86_BUILTIN_VPERMILPS,
27827 IX86_BUILTIN_VPERMILPD256,
27828 IX86_BUILTIN_VPERMILPS256,
27829 IX86_BUILTIN_VPERMIL2PD,
27830 IX86_BUILTIN_VPERMIL2PS,
27831 IX86_BUILTIN_VPERMIL2PD256,
27832 IX86_BUILTIN_VPERMIL2PS256,
27833 IX86_BUILTIN_VPERM2F128PD256,
27834 IX86_BUILTIN_VPERM2F128PS256,
27835 IX86_BUILTIN_VPERM2F128SI256,
27836 IX86_BUILTIN_VBROADCASTSS,
27837 IX86_BUILTIN_VBROADCASTSD256,
27838 IX86_BUILTIN_VBROADCASTSS256,
27839 IX86_BUILTIN_VBROADCASTPD256,
27840 IX86_BUILTIN_VBROADCASTPS256,
27841 IX86_BUILTIN_VINSERTF128PD256,
27842 IX86_BUILTIN_VINSERTF128PS256,
27843 IX86_BUILTIN_VINSERTF128SI256,
27844 IX86_BUILTIN_LOADUPD256,
27845 IX86_BUILTIN_LOADUPS256,
27846 IX86_BUILTIN_STOREUPD256,
27847 IX86_BUILTIN_STOREUPS256,
27848 IX86_BUILTIN_LDDQU256,
27849 IX86_BUILTIN_MOVNTDQ256,
27850 IX86_BUILTIN_MOVNTPD256,
27851 IX86_BUILTIN_MOVNTPS256,
27852 IX86_BUILTIN_LOADDQU256,
27853 IX86_BUILTIN_STOREDQU256,
27854 IX86_BUILTIN_MASKLOADPD,
27855 IX86_BUILTIN_MASKLOADPS,
27856 IX86_BUILTIN_MASKSTOREPD,
27857 IX86_BUILTIN_MASKSTOREPS,
27858 IX86_BUILTIN_MASKLOADPD256,
27859 IX86_BUILTIN_MASKLOADPS256,
27860 IX86_BUILTIN_MASKSTOREPD256,
27861 IX86_BUILTIN_MASKSTOREPS256,
27862 IX86_BUILTIN_MOVSHDUP256,
27863 IX86_BUILTIN_MOVSLDUP256,
27864 IX86_BUILTIN_MOVDDUP256,
27865
27866 IX86_BUILTIN_SQRTPD256,
27867 IX86_BUILTIN_SQRTPS256,
27868 IX86_BUILTIN_SQRTPS_NR256,
27869 IX86_BUILTIN_RSQRTPS256,
27870 IX86_BUILTIN_RSQRTPS_NR256,
27871
27872 IX86_BUILTIN_RCPPS256,
27873
27874 IX86_BUILTIN_ROUNDPD256,
27875 IX86_BUILTIN_ROUNDPS256,
27876
27877 IX86_BUILTIN_FLOORPD256,
27878 IX86_BUILTIN_CEILPD256,
27879 IX86_BUILTIN_TRUNCPD256,
27880 IX86_BUILTIN_RINTPD256,
27881 IX86_BUILTIN_ROUNDPD_AZ256,
27882
27883 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256,
27884 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256,
27885 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256,
27886
27887 IX86_BUILTIN_FLOORPS256,
27888 IX86_BUILTIN_CEILPS256,
27889 IX86_BUILTIN_TRUNCPS256,
27890 IX86_BUILTIN_RINTPS256,
27891 IX86_BUILTIN_ROUNDPS_AZ256,
27892
27893 IX86_BUILTIN_FLOORPS_SFIX256,
27894 IX86_BUILTIN_CEILPS_SFIX256,
27895 IX86_BUILTIN_ROUNDPS_AZ_SFIX256,
27896
27897 IX86_BUILTIN_UNPCKHPD256,
27898 IX86_BUILTIN_UNPCKLPD256,
27899 IX86_BUILTIN_UNPCKHPS256,
27900 IX86_BUILTIN_UNPCKLPS256,
27901
27902 IX86_BUILTIN_SI256_SI,
27903 IX86_BUILTIN_PS256_PS,
27904 IX86_BUILTIN_PD256_PD,
27905 IX86_BUILTIN_SI_SI256,
27906 IX86_BUILTIN_PS_PS256,
27907 IX86_BUILTIN_PD_PD256,
27908
27909 IX86_BUILTIN_VTESTZPD,
27910 IX86_BUILTIN_VTESTCPD,
27911 IX86_BUILTIN_VTESTNZCPD,
27912 IX86_BUILTIN_VTESTZPS,
27913 IX86_BUILTIN_VTESTCPS,
27914 IX86_BUILTIN_VTESTNZCPS,
27915 IX86_BUILTIN_VTESTZPD256,
27916 IX86_BUILTIN_VTESTCPD256,
27917 IX86_BUILTIN_VTESTNZCPD256,
27918 IX86_BUILTIN_VTESTZPS256,
27919 IX86_BUILTIN_VTESTCPS256,
27920 IX86_BUILTIN_VTESTNZCPS256,
27921 IX86_BUILTIN_PTESTZ256,
27922 IX86_BUILTIN_PTESTC256,
27923 IX86_BUILTIN_PTESTNZC256,
27924
27925 IX86_BUILTIN_MOVMSKPD256,
27926 IX86_BUILTIN_MOVMSKPS256,
27927
27928 /* AVX2 */
27929 IX86_BUILTIN_MPSADBW256,
27930 IX86_BUILTIN_PABSB256,
27931 IX86_BUILTIN_PABSW256,
27932 IX86_BUILTIN_PABSD256,
27933 IX86_BUILTIN_PACKSSDW256,
27934 IX86_BUILTIN_PACKSSWB256,
27935 IX86_BUILTIN_PACKUSDW256,
27936 IX86_BUILTIN_PACKUSWB256,
27937 IX86_BUILTIN_PADDB256,
27938 IX86_BUILTIN_PADDW256,
27939 IX86_BUILTIN_PADDD256,
27940 IX86_BUILTIN_PADDQ256,
27941 IX86_BUILTIN_PADDSB256,
27942 IX86_BUILTIN_PADDSW256,
27943 IX86_BUILTIN_PADDUSB256,
27944 IX86_BUILTIN_PADDUSW256,
27945 IX86_BUILTIN_PALIGNR256,
27946 IX86_BUILTIN_AND256I,
27947 IX86_BUILTIN_ANDNOT256I,
27948 IX86_BUILTIN_PAVGB256,
27949 IX86_BUILTIN_PAVGW256,
27950 IX86_BUILTIN_PBLENDVB256,
27951 IX86_BUILTIN_PBLENDVW256,
27952 IX86_BUILTIN_PCMPEQB256,
27953 IX86_BUILTIN_PCMPEQW256,
27954 IX86_BUILTIN_PCMPEQD256,
27955 IX86_BUILTIN_PCMPEQQ256,
27956 IX86_BUILTIN_PCMPGTB256,
27957 IX86_BUILTIN_PCMPGTW256,
27958 IX86_BUILTIN_PCMPGTD256,
27959 IX86_BUILTIN_PCMPGTQ256,
27960 IX86_BUILTIN_PHADDW256,
27961 IX86_BUILTIN_PHADDD256,
27962 IX86_BUILTIN_PHADDSW256,
27963 IX86_BUILTIN_PHSUBW256,
27964 IX86_BUILTIN_PHSUBD256,
27965 IX86_BUILTIN_PHSUBSW256,
27966 IX86_BUILTIN_PMADDUBSW256,
27967 IX86_BUILTIN_PMADDWD256,
27968 IX86_BUILTIN_PMAXSB256,
27969 IX86_BUILTIN_PMAXSW256,
27970 IX86_BUILTIN_PMAXSD256,
27971 IX86_BUILTIN_PMAXUB256,
27972 IX86_BUILTIN_PMAXUW256,
27973 IX86_BUILTIN_PMAXUD256,
27974 IX86_BUILTIN_PMINSB256,
27975 IX86_BUILTIN_PMINSW256,
27976 IX86_BUILTIN_PMINSD256,
27977 IX86_BUILTIN_PMINUB256,
27978 IX86_BUILTIN_PMINUW256,
27979 IX86_BUILTIN_PMINUD256,
27980 IX86_BUILTIN_PMOVMSKB256,
27981 IX86_BUILTIN_PMOVSXBW256,
27982 IX86_BUILTIN_PMOVSXBD256,
27983 IX86_BUILTIN_PMOVSXBQ256,
27984 IX86_BUILTIN_PMOVSXWD256,
27985 IX86_BUILTIN_PMOVSXWQ256,
27986 IX86_BUILTIN_PMOVSXDQ256,
27987 IX86_BUILTIN_PMOVZXBW256,
27988 IX86_BUILTIN_PMOVZXBD256,
27989 IX86_BUILTIN_PMOVZXBQ256,
27990 IX86_BUILTIN_PMOVZXWD256,
27991 IX86_BUILTIN_PMOVZXWQ256,
27992 IX86_BUILTIN_PMOVZXDQ256,
27993 IX86_BUILTIN_PMULDQ256,
27994 IX86_BUILTIN_PMULHRSW256,
27995 IX86_BUILTIN_PMULHUW256,
27996 IX86_BUILTIN_PMULHW256,
27997 IX86_BUILTIN_PMULLW256,
27998 IX86_BUILTIN_PMULLD256,
27999 IX86_BUILTIN_PMULUDQ256,
28000 IX86_BUILTIN_POR256,
28001 IX86_BUILTIN_PSADBW256,
28002 IX86_BUILTIN_PSHUFB256,
28003 IX86_BUILTIN_PSHUFD256,
28004 IX86_BUILTIN_PSHUFHW256,
28005 IX86_BUILTIN_PSHUFLW256,
28006 IX86_BUILTIN_PSIGNB256,
28007 IX86_BUILTIN_PSIGNW256,
28008 IX86_BUILTIN_PSIGND256,
28009 IX86_BUILTIN_PSLLDQI256,
28010 IX86_BUILTIN_PSLLWI256,
28011 IX86_BUILTIN_PSLLW256,
28012 IX86_BUILTIN_PSLLDI256,
28013 IX86_BUILTIN_PSLLD256,
28014 IX86_BUILTIN_PSLLQI256,
28015 IX86_BUILTIN_PSLLQ256,
28016 IX86_BUILTIN_PSRAWI256,
28017 IX86_BUILTIN_PSRAW256,
28018 IX86_BUILTIN_PSRADI256,
28019 IX86_BUILTIN_PSRAD256,
28020 IX86_BUILTIN_PSRLDQI256,
28021 IX86_BUILTIN_PSRLWI256,
28022 IX86_BUILTIN_PSRLW256,
28023 IX86_BUILTIN_PSRLDI256,
28024 IX86_BUILTIN_PSRLD256,
28025 IX86_BUILTIN_PSRLQI256,
28026 IX86_BUILTIN_PSRLQ256,
28027 IX86_BUILTIN_PSUBB256,
28028 IX86_BUILTIN_PSUBW256,
28029 IX86_BUILTIN_PSUBD256,
28030 IX86_BUILTIN_PSUBQ256,
28031 IX86_BUILTIN_PSUBSB256,
28032 IX86_BUILTIN_PSUBSW256,
28033 IX86_BUILTIN_PSUBUSB256,
28034 IX86_BUILTIN_PSUBUSW256,
28035 IX86_BUILTIN_PUNPCKHBW256,
28036 IX86_BUILTIN_PUNPCKHWD256,
28037 IX86_BUILTIN_PUNPCKHDQ256,
28038 IX86_BUILTIN_PUNPCKHQDQ256,
28039 IX86_BUILTIN_PUNPCKLBW256,
28040 IX86_BUILTIN_PUNPCKLWD256,
28041 IX86_BUILTIN_PUNPCKLDQ256,
28042 IX86_BUILTIN_PUNPCKLQDQ256,
28043 IX86_BUILTIN_PXOR256,
28044 IX86_BUILTIN_MOVNTDQA256,
28045 IX86_BUILTIN_VBROADCASTSS_PS,
28046 IX86_BUILTIN_VBROADCASTSS_PS256,
28047 IX86_BUILTIN_VBROADCASTSD_PD256,
28048 IX86_BUILTIN_VBROADCASTSI256,
28049 IX86_BUILTIN_PBLENDD256,
28050 IX86_BUILTIN_PBLENDD128,
28051 IX86_BUILTIN_PBROADCASTB256,
28052 IX86_BUILTIN_PBROADCASTW256,
28053 IX86_BUILTIN_PBROADCASTD256,
28054 IX86_BUILTIN_PBROADCASTQ256,
28055 IX86_BUILTIN_PBROADCASTB128,
28056 IX86_BUILTIN_PBROADCASTW128,
28057 IX86_BUILTIN_PBROADCASTD128,
28058 IX86_BUILTIN_PBROADCASTQ128,
28059 IX86_BUILTIN_VPERMVARSI256,
28060 IX86_BUILTIN_VPERMDF256,
28061 IX86_BUILTIN_VPERMVARSF256,
28062 IX86_BUILTIN_VPERMDI256,
28063 IX86_BUILTIN_VPERMTI256,
28064 IX86_BUILTIN_VEXTRACT128I256,
28065 IX86_BUILTIN_VINSERT128I256,
28066 IX86_BUILTIN_MASKLOADD,
28067 IX86_BUILTIN_MASKLOADQ,
28068 IX86_BUILTIN_MASKLOADD256,
28069 IX86_BUILTIN_MASKLOADQ256,
28070 IX86_BUILTIN_MASKSTORED,
28071 IX86_BUILTIN_MASKSTOREQ,
28072 IX86_BUILTIN_MASKSTORED256,
28073 IX86_BUILTIN_MASKSTOREQ256,
28074 IX86_BUILTIN_PSLLVV4DI,
28075 IX86_BUILTIN_PSLLVV2DI,
28076 IX86_BUILTIN_PSLLVV8SI,
28077 IX86_BUILTIN_PSLLVV4SI,
28078 IX86_BUILTIN_PSRAVV8SI,
28079 IX86_BUILTIN_PSRAVV4SI,
28080 IX86_BUILTIN_PSRLVV4DI,
28081 IX86_BUILTIN_PSRLVV2DI,
28082 IX86_BUILTIN_PSRLVV8SI,
28083 IX86_BUILTIN_PSRLVV4SI,
28084
28085 IX86_BUILTIN_GATHERSIV2DF,
28086 IX86_BUILTIN_GATHERSIV4DF,
28087 IX86_BUILTIN_GATHERDIV2DF,
28088 IX86_BUILTIN_GATHERDIV4DF,
28089 IX86_BUILTIN_GATHERSIV4SF,
28090 IX86_BUILTIN_GATHERSIV8SF,
28091 IX86_BUILTIN_GATHERDIV4SF,
28092 IX86_BUILTIN_GATHERDIV8SF,
28093 IX86_BUILTIN_GATHERSIV2DI,
28094 IX86_BUILTIN_GATHERSIV4DI,
28095 IX86_BUILTIN_GATHERDIV2DI,
28096 IX86_BUILTIN_GATHERDIV4DI,
28097 IX86_BUILTIN_GATHERSIV4SI,
28098 IX86_BUILTIN_GATHERSIV8SI,
28099 IX86_BUILTIN_GATHERDIV4SI,
28100 IX86_BUILTIN_GATHERDIV8SI,
28101
28102 /* AVX512F */
28103 IX86_BUILTIN_SI512_SI256,
28104 IX86_BUILTIN_PD512_PD256,
28105 IX86_BUILTIN_PS512_PS256,
28106 IX86_BUILTIN_SI512_SI,
28107 IX86_BUILTIN_PD512_PD,
28108 IX86_BUILTIN_PS512_PS,
28109 IX86_BUILTIN_ADDPD512,
28110 IX86_BUILTIN_ADDPS512,
28111 IX86_BUILTIN_ADDSD_ROUND,
28112 IX86_BUILTIN_ADDSS_ROUND,
28113 IX86_BUILTIN_ALIGND512,
28114 IX86_BUILTIN_ALIGNQ512,
28115 IX86_BUILTIN_BLENDMD512,
28116 IX86_BUILTIN_BLENDMPD512,
28117 IX86_BUILTIN_BLENDMPS512,
28118 IX86_BUILTIN_BLENDMQ512,
28119 IX86_BUILTIN_BROADCASTF32X4_512,
28120 IX86_BUILTIN_BROADCASTF64X4_512,
28121 IX86_BUILTIN_BROADCASTI32X4_512,
28122 IX86_BUILTIN_BROADCASTI64X4_512,
28123 IX86_BUILTIN_BROADCASTSD512,
28124 IX86_BUILTIN_BROADCASTSS512,
28125 IX86_BUILTIN_CMPD512,
28126 IX86_BUILTIN_CMPPD512,
28127 IX86_BUILTIN_CMPPS512,
28128 IX86_BUILTIN_CMPQ512,
28129 IX86_BUILTIN_CMPSD_MASK,
28130 IX86_BUILTIN_CMPSS_MASK,
28131 IX86_BUILTIN_COMIDF,
28132 IX86_BUILTIN_COMISF,
28133 IX86_BUILTIN_COMPRESSPD512,
28134 IX86_BUILTIN_COMPRESSPDSTORE512,
28135 IX86_BUILTIN_COMPRESSPS512,
28136 IX86_BUILTIN_COMPRESSPSSTORE512,
28137 IX86_BUILTIN_CVTDQ2PD512,
28138 IX86_BUILTIN_CVTDQ2PS512,
28139 IX86_BUILTIN_CVTPD2DQ512,
28140 IX86_BUILTIN_CVTPD2PS512,
28141 IX86_BUILTIN_CVTPD2UDQ512,
28142 IX86_BUILTIN_CVTPH2PS512,
28143 IX86_BUILTIN_CVTPS2DQ512,
28144 IX86_BUILTIN_CVTPS2PD512,
28145 IX86_BUILTIN_CVTPS2PH512,
28146 IX86_BUILTIN_CVTPS2UDQ512,
28147 IX86_BUILTIN_CVTSD2SS_ROUND,
28148 IX86_BUILTIN_CVTSI2SD64,
28149 IX86_BUILTIN_CVTSI2SS32,
28150 IX86_BUILTIN_CVTSI2SS64,
28151 IX86_BUILTIN_CVTSS2SD_ROUND,
28152 IX86_BUILTIN_CVTTPD2DQ512,
28153 IX86_BUILTIN_CVTTPD2UDQ512,
28154 IX86_BUILTIN_CVTTPS2DQ512,
28155 IX86_BUILTIN_CVTTPS2UDQ512,
28156 IX86_BUILTIN_CVTUDQ2PD512,
28157 IX86_BUILTIN_CVTUDQ2PS512,
28158 IX86_BUILTIN_CVTUSI2SD32,
28159 IX86_BUILTIN_CVTUSI2SD64,
28160 IX86_BUILTIN_CVTUSI2SS32,
28161 IX86_BUILTIN_CVTUSI2SS64,
28162 IX86_BUILTIN_DIVPD512,
28163 IX86_BUILTIN_DIVPS512,
28164 IX86_BUILTIN_DIVSD_ROUND,
28165 IX86_BUILTIN_DIVSS_ROUND,
28166 IX86_BUILTIN_EXPANDPD512,
28167 IX86_BUILTIN_EXPANDPD512Z,
28168 IX86_BUILTIN_EXPANDPDLOAD512,
28169 IX86_BUILTIN_EXPANDPDLOAD512Z,
28170 IX86_BUILTIN_EXPANDPS512,
28171 IX86_BUILTIN_EXPANDPS512Z,
28172 IX86_BUILTIN_EXPANDPSLOAD512,
28173 IX86_BUILTIN_EXPANDPSLOAD512Z,
28174 IX86_BUILTIN_EXTRACTF32X4,
28175 IX86_BUILTIN_EXTRACTF64X4,
28176 IX86_BUILTIN_EXTRACTI32X4,
28177 IX86_BUILTIN_EXTRACTI64X4,
28178 IX86_BUILTIN_FIXUPIMMPD512_MASK,
28179 IX86_BUILTIN_FIXUPIMMPD512_MASKZ,
28180 IX86_BUILTIN_FIXUPIMMPS512_MASK,
28181 IX86_BUILTIN_FIXUPIMMPS512_MASKZ,
28182 IX86_BUILTIN_FIXUPIMMSD128_MASK,
28183 IX86_BUILTIN_FIXUPIMMSD128_MASKZ,
28184 IX86_BUILTIN_FIXUPIMMSS128_MASK,
28185 IX86_BUILTIN_FIXUPIMMSS128_MASKZ,
28186 IX86_BUILTIN_GETEXPPD512,
28187 IX86_BUILTIN_GETEXPPS512,
28188 IX86_BUILTIN_GETEXPSD128,
28189 IX86_BUILTIN_GETEXPSS128,
28190 IX86_BUILTIN_GETMANTPD512,
28191 IX86_BUILTIN_GETMANTPS512,
28192 IX86_BUILTIN_GETMANTSD128,
28193 IX86_BUILTIN_GETMANTSS128,
28194 IX86_BUILTIN_INSERTF32X4,
28195 IX86_BUILTIN_INSERTF64X4,
28196 IX86_BUILTIN_INSERTI32X4,
28197 IX86_BUILTIN_INSERTI64X4,
28198 IX86_BUILTIN_LOADAPD512,
28199 IX86_BUILTIN_LOADAPS512,
28200 IX86_BUILTIN_LOADDQUDI512,
28201 IX86_BUILTIN_LOADDQUSI512,
28202 IX86_BUILTIN_LOADUPD512,
28203 IX86_BUILTIN_LOADUPS512,
28204 IX86_BUILTIN_MAXPD512,
28205 IX86_BUILTIN_MAXPS512,
28206 IX86_BUILTIN_MAXSD_ROUND,
28207 IX86_BUILTIN_MAXSS_ROUND,
28208 IX86_BUILTIN_MINPD512,
28209 IX86_BUILTIN_MINPS512,
28210 IX86_BUILTIN_MINSD_ROUND,
28211 IX86_BUILTIN_MINSS_ROUND,
28212 IX86_BUILTIN_MOVAPD512,
28213 IX86_BUILTIN_MOVAPS512,
28214 IX86_BUILTIN_MOVDDUP512,
28215 IX86_BUILTIN_MOVDQA32LOAD512,
28216 IX86_BUILTIN_MOVDQA32STORE512,
28217 IX86_BUILTIN_MOVDQA32_512,
28218 IX86_BUILTIN_MOVDQA64LOAD512,
28219 IX86_BUILTIN_MOVDQA64STORE512,
28220 IX86_BUILTIN_MOVDQA64_512,
28221 IX86_BUILTIN_MOVNTDQ512,
28222 IX86_BUILTIN_MOVNTDQA512,
28223 IX86_BUILTIN_MOVNTPD512,
28224 IX86_BUILTIN_MOVNTPS512,
28225 IX86_BUILTIN_MOVSHDUP512,
28226 IX86_BUILTIN_MOVSLDUP512,
28227 IX86_BUILTIN_MULPD512,
28228 IX86_BUILTIN_MULPS512,
28229 IX86_BUILTIN_MULSD_ROUND,
28230 IX86_BUILTIN_MULSS_ROUND,
28231 IX86_BUILTIN_PABSD512,
28232 IX86_BUILTIN_PABSQ512,
28233 IX86_BUILTIN_PADDD512,
28234 IX86_BUILTIN_PADDQ512,
28235 IX86_BUILTIN_PANDD512,
28236 IX86_BUILTIN_PANDND512,
28237 IX86_BUILTIN_PANDNQ512,
28238 IX86_BUILTIN_PANDQ512,
28239 IX86_BUILTIN_PBROADCASTD512,
28240 IX86_BUILTIN_PBROADCASTD512_GPR,
28241 IX86_BUILTIN_PBROADCASTMB512,
28242 IX86_BUILTIN_PBROADCASTMW512,
28243 IX86_BUILTIN_PBROADCASTQ512,
28244 IX86_BUILTIN_PBROADCASTQ512_GPR,
28245 IX86_BUILTIN_PBROADCASTQ512_MEM,
28246 IX86_BUILTIN_PCMPEQD512_MASK,
28247 IX86_BUILTIN_PCMPEQQ512_MASK,
28248 IX86_BUILTIN_PCMPGTD512_MASK,
28249 IX86_BUILTIN_PCMPGTQ512_MASK,
28250 IX86_BUILTIN_PCOMPRESSD512,
28251 IX86_BUILTIN_PCOMPRESSDSTORE512,
28252 IX86_BUILTIN_PCOMPRESSQ512,
28253 IX86_BUILTIN_PCOMPRESSQSTORE512,
28254 IX86_BUILTIN_PEXPANDD512,
28255 IX86_BUILTIN_PEXPANDD512Z,
28256 IX86_BUILTIN_PEXPANDDLOAD512,
28257 IX86_BUILTIN_PEXPANDDLOAD512Z,
28258 IX86_BUILTIN_PEXPANDQ512,
28259 IX86_BUILTIN_PEXPANDQ512Z,
28260 IX86_BUILTIN_PEXPANDQLOAD512,
28261 IX86_BUILTIN_PEXPANDQLOAD512Z,
28262 IX86_BUILTIN_PMAXSD512,
28263 IX86_BUILTIN_PMAXSQ512,
28264 IX86_BUILTIN_PMAXUD512,
28265 IX86_BUILTIN_PMAXUQ512,
28266 IX86_BUILTIN_PMINSD512,
28267 IX86_BUILTIN_PMINSQ512,
28268 IX86_BUILTIN_PMINUD512,
28269 IX86_BUILTIN_PMINUQ512,
28270 IX86_BUILTIN_PMOVDB512,
28271 IX86_BUILTIN_PMOVDB512_MEM,
28272 IX86_BUILTIN_PMOVDW512,
28273 IX86_BUILTIN_PMOVDW512_MEM,
28274 IX86_BUILTIN_PMOVQB512,
28275 IX86_BUILTIN_PMOVQB512_MEM,
28276 IX86_BUILTIN_PMOVQD512,
28277 IX86_BUILTIN_PMOVQD512_MEM,
28278 IX86_BUILTIN_PMOVQW512,
28279 IX86_BUILTIN_PMOVQW512_MEM,
28280 IX86_BUILTIN_PMOVSDB512,
28281 IX86_BUILTIN_PMOVSDB512_MEM,
28282 IX86_BUILTIN_PMOVSDW512,
28283 IX86_BUILTIN_PMOVSDW512_MEM,
28284 IX86_BUILTIN_PMOVSQB512,
28285 IX86_BUILTIN_PMOVSQB512_MEM,
28286 IX86_BUILTIN_PMOVSQD512,
28287 IX86_BUILTIN_PMOVSQD512_MEM,
28288 IX86_BUILTIN_PMOVSQW512,
28289 IX86_BUILTIN_PMOVSQW512_MEM,
28290 IX86_BUILTIN_PMOVSXBD512,
28291 IX86_BUILTIN_PMOVSXBQ512,
28292 IX86_BUILTIN_PMOVSXDQ512,
28293 IX86_BUILTIN_PMOVSXWD512,
28294 IX86_BUILTIN_PMOVSXWQ512,
28295 IX86_BUILTIN_PMOVUSDB512,
28296 IX86_BUILTIN_PMOVUSDB512_MEM,
28297 IX86_BUILTIN_PMOVUSDW512,
28298 IX86_BUILTIN_PMOVUSDW512_MEM,
28299 IX86_BUILTIN_PMOVUSQB512,
28300 IX86_BUILTIN_PMOVUSQB512_MEM,
28301 IX86_BUILTIN_PMOVUSQD512,
28302 IX86_BUILTIN_PMOVUSQD512_MEM,
28303 IX86_BUILTIN_PMOVUSQW512,
28304 IX86_BUILTIN_PMOVUSQW512_MEM,
28305 IX86_BUILTIN_PMOVZXBD512,
28306 IX86_BUILTIN_PMOVZXBQ512,
28307 IX86_BUILTIN_PMOVZXDQ512,
28308 IX86_BUILTIN_PMOVZXWD512,
28309 IX86_BUILTIN_PMOVZXWQ512,
28310 IX86_BUILTIN_PMULDQ512,
28311 IX86_BUILTIN_PMULLD512,
28312 IX86_BUILTIN_PMULUDQ512,
28313 IX86_BUILTIN_PORD512,
28314 IX86_BUILTIN_PORQ512,
28315 IX86_BUILTIN_PROLD512,
28316 IX86_BUILTIN_PROLQ512,
28317 IX86_BUILTIN_PROLVD512,
28318 IX86_BUILTIN_PROLVQ512,
28319 IX86_BUILTIN_PRORD512,
28320 IX86_BUILTIN_PRORQ512,
28321 IX86_BUILTIN_PRORVD512,
28322 IX86_BUILTIN_PRORVQ512,
28323 IX86_BUILTIN_PSHUFD512,
28324 IX86_BUILTIN_PSLLD512,
28325 IX86_BUILTIN_PSLLDI512,
28326 IX86_BUILTIN_PSLLQ512,
28327 IX86_BUILTIN_PSLLQI512,
28328 IX86_BUILTIN_PSLLVV16SI,
28329 IX86_BUILTIN_PSLLVV8DI,
28330 IX86_BUILTIN_PSRAD512,
28331 IX86_BUILTIN_PSRADI512,
28332 IX86_BUILTIN_PSRAQ512,
28333 IX86_BUILTIN_PSRAQI512,
28334 IX86_BUILTIN_PSRAVV16SI,
28335 IX86_BUILTIN_PSRAVV8DI,
28336 IX86_BUILTIN_PSRLD512,
28337 IX86_BUILTIN_PSRLDI512,
28338 IX86_BUILTIN_PSRLQ512,
28339 IX86_BUILTIN_PSRLQI512,
28340 IX86_BUILTIN_PSRLVV16SI,
28341 IX86_BUILTIN_PSRLVV8DI,
28342 IX86_BUILTIN_PSUBD512,
28343 IX86_BUILTIN_PSUBQ512,
28344 IX86_BUILTIN_PTESTMD512,
28345 IX86_BUILTIN_PTESTMQ512,
28346 IX86_BUILTIN_PTESTNMD512,
28347 IX86_BUILTIN_PTESTNMQ512,
28348 IX86_BUILTIN_PUNPCKHDQ512,
28349 IX86_BUILTIN_PUNPCKHQDQ512,
28350 IX86_BUILTIN_PUNPCKLDQ512,
28351 IX86_BUILTIN_PUNPCKLQDQ512,
28352 IX86_BUILTIN_PXORD512,
28353 IX86_BUILTIN_PXORQ512,
28354 IX86_BUILTIN_RCP14PD512,
28355 IX86_BUILTIN_RCP14PS512,
28356 IX86_BUILTIN_RCP14SD,
28357 IX86_BUILTIN_RCP14SS,
28358 IX86_BUILTIN_RNDSCALEPD,
28359 IX86_BUILTIN_RNDSCALEPS,
28360 IX86_BUILTIN_RNDSCALESD,
28361 IX86_BUILTIN_RNDSCALESS,
28362 IX86_BUILTIN_RSQRT14PD512,
28363 IX86_BUILTIN_RSQRT14PS512,
28364 IX86_BUILTIN_RSQRT14SD,
28365 IX86_BUILTIN_RSQRT14SS,
28366 IX86_BUILTIN_SCALEFPD512,
28367 IX86_BUILTIN_SCALEFPS512,
28368 IX86_BUILTIN_SCALEFSD,
28369 IX86_BUILTIN_SCALEFSS,
28370 IX86_BUILTIN_SHUFPD512,
28371 IX86_BUILTIN_SHUFPS512,
28372 IX86_BUILTIN_SHUF_F32x4,
28373 IX86_BUILTIN_SHUF_F64x2,
28374 IX86_BUILTIN_SHUF_I32x4,
28375 IX86_BUILTIN_SHUF_I64x2,
28376 IX86_BUILTIN_SQRTPD512,
28377 IX86_BUILTIN_SQRTPD512_MASK,
28378 IX86_BUILTIN_SQRTPS512_MASK,
28379 IX86_BUILTIN_SQRTPS_NR512,
28380 IX86_BUILTIN_SQRTSD_ROUND,
28381 IX86_BUILTIN_SQRTSS_ROUND,
28382 IX86_BUILTIN_STOREAPD512,
28383 IX86_BUILTIN_STOREAPS512,
28384 IX86_BUILTIN_STOREDQUDI512,
28385 IX86_BUILTIN_STOREDQUSI512,
28386 IX86_BUILTIN_STOREUPD512,
28387 IX86_BUILTIN_STOREUPS512,
28388 IX86_BUILTIN_SUBPD512,
28389 IX86_BUILTIN_SUBPS512,
28390 IX86_BUILTIN_SUBSD_ROUND,
28391 IX86_BUILTIN_SUBSS_ROUND,
28392 IX86_BUILTIN_UCMPD512,
28393 IX86_BUILTIN_UCMPQ512,
28394 IX86_BUILTIN_UNPCKHPD512,
28395 IX86_BUILTIN_UNPCKHPS512,
28396 IX86_BUILTIN_UNPCKLPD512,
28397 IX86_BUILTIN_UNPCKLPS512,
28398 IX86_BUILTIN_VCVTSD2SI32,
28399 IX86_BUILTIN_VCVTSD2SI64,
28400 IX86_BUILTIN_VCVTSD2USI32,
28401 IX86_BUILTIN_VCVTSD2USI64,
28402 IX86_BUILTIN_VCVTSS2SI32,
28403 IX86_BUILTIN_VCVTSS2SI64,
28404 IX86_BUILTIN_VCVTSS2USI32,
28405 IX86_BUILTIN_VCVTSS2USI64,
28406 IX86_BUILTIN_VCVTTSD2SI32,
28407 IX86_BUILTIN_VCVTTSD2SI64,
28408 IX86_BUILTIN_VCVTTSD2USI32,
28409 IX86_BUILTIN_VCVTTSD2USI64,
28410 IX86_BUILTIN_VCVTTSS2SI32,
28411 IX86_BUILTIN_VCVTTSS2SI64,
28412 IX86_BUILTIN_VCVTTSS2USI32,
28413 IX86_BUILTIN_VCVTTSS2USI64,
28414 IX86_BUILTIN_VFMADDPD512_MASK,
28415 IX86_BUILTIN_VFMADDPD512_MASK3,
28416 IX86_BUILTIN_VFMADDPD512_MASKZ,
28417 IX86_BUILTIN_VFMADDPS512_MASK,
28418 IX86_BUILTIN_VFMADDPS512_MASK3,
28419 IX86_BUILTIN_VFMADDPS512_MASKZ,
28420 IX86_BUILTIN_VFMADDSD3_ROUND,
28421 IX86_BUILTIN_VFMADDSS3_ROUND,
28422 IX86_BUILTIN_VFMADDSUBPD512_MASK,
28423 IX86_BUILTIN_VFMADDSUBPD512_MASK3,
28424 IX86_BUILTIN_VFMADDSUBPD512_MASKZ,
28425 IX86_BUILTIN_VFMADDSUBPS512_MASK,
28426 IX86_BUILTIN_VFMADDSUBPS512_MASK3,
28427 IX86_BUILTIN_VFMADDSUBPS512_MASKZ,
28428 IX86_BUILTIN_VFMSUBADDPD512_MASK3,
28429 IX86_BUILTIN_VFMSUBADDPS512_MASK3,
28430 IX86_BUILTIN_VFMSUBPD512_MASK3,
28431 IX86_BUILTIN_VFMSUBPS512_MASK3,
28432 IX86_BUILTIN_VFMSUBSD3_MASK3,
28433 IX86_BUILTIN_VFMSUBSS3_MASK3,
28434 IX86_BUILTIN_VFNMADDPD512_MASK,
28435 IX86_BUILTIN_VFNMADDPS512_MASK,
28436 IX86_BUILTIN_VFNMSUBPD512_MASK,
28437 IX86_BUILTIN_VFNMSUBPD512_MASK3,
28438 IX86_BUILTIN_VFNMSUBPS512_MASK,
28439 IX86_BUILTIN_VFNMSUBPS512_MASK3,
28440 IX86_BUILTIN_VPCLZCNTD512,
28441 IX86_BUILTIN_VPCLZCNTQ512,
28442 IX86_BUILTIN_VPCONFLICTD512,
28443 IX86_BUILTIN_VPCONFLICTQ512,
28444 IX86_BUILTIN_VPERMDF512,
28445 IX86_BUILTIN_VPERMDI512,
28446 IX86_BUILTIN_VPERMI2VARD512,
28447 IX86_BUILTIN_VPERMI2VARPD512,
28448 IX86_BUILTIN_VPERMI2VARPS512,
28449 IX86_BUILTIN_VPERMI2VARQ512,
28450 IX86_BUILTIN_VPERMILPD512,
28451 IX86_BUILTIN_VPERMILPS512,
28452 IX86_BUILTIN_VPERMILVARPD512,
28453 IX86_BUILTIN_VPERMILVARPS512,
28454 IX86_BUILTIN_VPERMT2VARD512,
28455 IX86_BUILTIN_VPERMT2VARD512_MASKZ,
28456 IX86_BUILTIN_VPERMT2VARPD512,
28457 IX86_BUILTIN_VPERMT2VARPD512_MASKZ,
28458 IX86_BUILTIN_VPERMT2VARPS512,
28459 IX86_BUILTIN_VPERMT2VARPS512_MASKZ,
28460 IX86_BUILTIN_VPERMT2VARQ512,
28461 IX86_BUILTIN_VPERMT2VARQ512_MASKZ,
28462 IX86_BUILTIN_VPERMVARDF512,
28463 IX86_BUILTIN_VPERMVARDI512,
28464 IX86_BUILTIN_VPERMVARSF512,
28465 IX86_BUILTIN_VPERMVARSI512,
28466 IX86_BUILTIN_VTERNLOGD512_MASK,
28467 IX86_BUILTIN_VTERNLOGD512_MASKZ,
28468 IX86_BUILTIN_VTERNLOGQ512_MASK,
28469 IX86_BUILTIN_VTERNLOGQ512_MASKZ,
28470
28471 /* Mask arithmetic operations */
28472 IX86_BUILTIN_KAND16,
28473 IX86_BUILTIN_KANDN16,
28474 IX86_BUILTIN_KNOT16,
28475 IX86_BUILTIN_KOR16,
28476 IX86_BUILTIN_KORTESTC16,
28477 IX86_BUILTIN_KORTESTZ16,
28478 IX86_BUILTIN_KUNPCKBW,
28479 IX86_BUILTIN_KXNOR16,
28480 IX86_BUILTIN_KXOR16,
28481 IX86_BUILTIN_KMOV16,
28482
28483 /* Alternate 4 and 8 element gather/scatter for the vectorizer
28484 where all operands are 32-byte or 64-byte wide respectively. */
28485 IX86_BUILTIN_GATHERALTSIV4DF,
28486 IX86_BUILTIN_GATHERALTDIV8SF,
28487 IX86_BUILTIN_GATHERALTSIV4DI,
28488 IX86_BUILTIN_GATHERALTDIV8SI,
28489 IX86_BUILTIN_GATHER3ALTDIV16SF,
28490 IX86_BUILTIN_GATHER3ALTDIV16SI,
28491 IX86_BUILTIN_GATHER3ALTSIV8DF,
28492 IX86_BUILTIN_GATHER3ALTSIV8DI,
28493 IX86_BUILTIN_GATHER3DIV16SF,
28494 IX86_BUILTIN_GATHER3DIV16SI,
28495 IX86_BUILTIN_GATHER3DIV8DF,
28496 IX86_BUILTIN_GATHER3DIV8DI,
28497 IX86_BUILTIN_GATHER3SIV16SF,
28498 IX86_BUILTIN_GATHER3SIV16SI,
28499 IX86_BUILTIN_GATHER3SIV8DF,
28500 IX86_BUILTIN_GATHER3SIV8DI,
28501 IX86_BUILTIN_SCATTERDIV16SF,
28502 IX86_BUILTIN_SCATTERDIV16SI,
28503 IX86_BUILTIN_SCATTERDIV8DF,
28504 IX86_BUILTIN_SCATTERDIV8DI,
28505 IX86_BUILTIN_SCATTERSIV16SF,
28506 IX86_BUILTIN_SCATTERSIV16SI,
28507 IX86_BUILTIN_SCATTERSIV8DF,
28508 IX86_BUILTIN_SCATTERSIV8DI,
28509
28510 /* AVX512PF */
28511 IX86_BUILTIN_GATHERPFQPD,
28512 IX86_BUILTIN_GATHERPFDPS,
28513 IX86_BUILTIN_GATHERPFDPD,
28514 IX86_BUILTIN_GATHERPFQPS,
28515 IX86_BUILTIN_SCATTERPFDPD,
28516 IX86_BUILTIN_SCATTERPFDPS,
28517 IX86_BUILTIN_SCATTERPFQPD,
28518 IX86_BUILTIN_SCATTERPFQPS,
28519
28520 /* AVX-512ER */
28521 IX86_BUILTIN_EXP2PD_MASK,
28522 IX86_BUILTIN_EXP2PS_MASK,
28523 IX86_BUILTIN_EXP2PS,
28524 IX86_BUILTIN_RCP28PD,
28525 IX86_BUILTIN_RCP28PS,
28526 IX86_BUILTIN_RCP28SD,
28527 IX86_BUILTIN_RCP28SS,
28528 IX86_BUILTIN_RSQRT28PD,
28529 IX86_BUILTIN_RSQRT28PS,
28530 IX86_BUILTIN_RSQRT28SD,
28531 IX86_BUILTIN_RSQRT28SS,
28532
28533 /* SHA builtins. */
28534 IX86_BUILTIN_SHA1MSG1,
28535 IX86_BUILTIN_SHA1MSG2,
28536 IX86_BUILTIN_SHA1NEXTE,
28537 IX86_BUILTIN_SHA1RNDS4,
28538 IX86_BUILTIN_SHA256MSG1,
28539 IX86_BUILTIN_SHA256MSG2,
28540 IX86_BUILTIN_SHA256RNDS2,
28541
28542 /* CLFLUSHOPT instructions. */
28543 IX86_BUILTIN_CLFLUSHOPT,
28544
28545 /* TFmode support builtins. */
28546 IX86_BUILTIN_INFQ,
28547 IX86_BUILTIN_HUGE_VALQ,
28548 IX86_BUILTIN_FABSQ,
28549 IX86_BUILTIN_COPYSIGNQ,
28550
28551 /* Vectorizer support builtins. */
28552 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512,
28553 IX86_BUILTIN_CPYSGNPS,
28554 IX86_BUILTIN_CPYSGNPD,
28555 IX86_BUILTIN_CPYSGNPS256,
28556 IX86_BUILTIN_CPYSGNPS512,
28557 IX86_BUILTIN_CPYSGNPD256,
28558 IX86_BUILTIN_CPYSGNPD512,
28559 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512,
28560 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512,
28561
28562
28563 /* FMA4 instructions. */
28564 IX86_BUILTIN_VFMADDSS,
28565 IX86_BUILTIN_VFMADDSD,
28566 IX86_BUILTIN_VFMADDPS,
28567 IX86_BUILTIN_VFMADDPD,
28568 IX86_BUILTIN_VFMADDPS256,
28569 IX86_BUILTIN_VFMADDPD256,
28570 IX86_BUILTIN_VFMADDSUBPS,
28571 IX86_BUILTIN_VFMADDSUBPD,
28572 IX86_BUILTIN_VFMADDSUBPS256,
28573 IX86_BUILTIN_VFMADDSUBPD256,
28574
28575 /* FMA3 instructions. */
28576 IX86_BUILTIN_VFMADDSS3,
28577 IX86_BUILTIN_VFMADDSD3,
28578
28579 /* XOP instructions. */
28580 IX86_BUILTIN_VPCMOV,
28581 IX86_BUILTIN_VPCMOV_V2DI,
28582 IX86_BUILTIN_VPCMOV_V4SI,
28583 IX86_BUILTIN_VPCMOV_V8HI,
28584 IX86_BUILTIN_VPCMOV_V16QI,
28585 IX86_BUILTIN_VPCMOV_V4SF,
28586 IX86_BUILTIN_VPCMOV_V2DF,
28587 IX86_BUILTIN_VPCMOV256,
28588 IX86_BUILTIN_VPCMOV_V4DI256,
28589 IX86_BUILTIN_VPCMOV_V8SI256,
28590 IX86_BUILTIN_VPCMOV_V16HI256,
28591 IX86_BUILTIN_VPCMOV_V32QI256,
28592 IX86_BUILTIN_VPCMOV_V8SF256,
28593 IX86_BUILTIN_VPCMOV_V4DF256,
28594
28595 IX86_BUILTIN_VPPERM,
28596
28597 IX86_BUILTIN_VPMACSSWW,
28598 IX86_BUILTIN_VPMACSWW,
28599 IX86_BUILTIN_VPMACSSWD,
28600 IX86_BUILTIN_VPMACSWD,
28601 IX86_BUILTIN_VPMACSSDD,
28602 IX86_BUILTIN_VPMACSDD,
28603 IX86_BUILTIN_VPMACSSDQL,
28604 IX86_BUILTIN_VPMACSSDQH,
28605 IX86_BUILTIN_VPMACSDQL,
28606 IX86_BUILTIN_VPMACSDQH,
28607 IX86_BUILTIN_VPMADCSSWD,
28608 IX86_BUILTIN_VPMADCSWD,
28609
28610 IX86_BUILTIN_VPHADDBW,
28611 IX86_BUILTIN_VPHADDBD,
28612 IX86_BUILTIN_VPHADDBQ,
28613 IX86_BUILTIN_VPHADDWD,
28614 IX86_BUILTIN_VPHADDWQ,
28615 IX86_BUILTIN_VPHADDDQ,
28616 IX86_BUILTIN_VPHADDUBW,
28617 IX86_BUILTIN_VPHADDUBD,
28618 IX86_BUILTIN_VPHADDUBQ,
28619 IX86_BUILTIN_VPHADDUWD,
28620 IX86_BUILTIN_VPHADDUWQ,
28621 IX86_BUILTIN_VPHADDUDQ,
28622 IX86_BUILTIN_VPHSUBBW,
28623 IX86_BUILTIN_VPHSUBWD,
28624 IX86_BUILTIN_VPHSUBDQ,
28625
28626 IX86_BUILTIN_VPROTB,
28627 IX86_BUILTIN_VPROTW,
28628 IX86_BUILTIN_VPROTD,
28629 IX86_BUILTIN_VPROTQ,
28630 IX86_BUILTIN_VPROTB_IMM,
28631 IX86_BUILTIN_VPROTW_IMM,
28632 IX86_BUILTIN_VPROTD_IMM,
28633 IX86_BUILTIN_VPROTQ_IMM,
28634
28635 IX86_BUILTIN_VPSHLB,
28636 IX86_BUILTIN_VPSHLW,
28637 IX86_BUILTIN_VPSHLD,
28638 IX86_BUILTIN_VPSHLQ,
28639 IX86_BUILTIN_VPSHAB,
28640 IX86_BUILTIN_VPSHAW,
28641 IX86_BUILTIN_VPSHAD,
28642 IX86_BUILTIN_VPSHAQ,
28643
28644 IX86_BUILTIN_VFRCZSS,
28645 IX86_BUILTIN_VFRCZSD,
28646 IX86_BUILTIN_VFRCZPS,
28647 IX86_BUILTIN_VFRCZPD,
28648 IX86_BUILTIN_VFRCZPS256,
28649 IX86_BUILTIN_VFRCZPD256,
28650
28651 IX86_BUILTIN_VPCOMEQUB,
28652 IX86_BUILTIN_VPCOMNEUB,
28653 IX86_BUILTIN_VPCOMLTUB,
28654 IX86_BUILTIN_VPCOMLEUB,
28655 IX86_BUILTIN_VPCOMGTUB,
28656 IX86_BUILTIN_VPCOMGEUB,
28657 IX86_BUILTIN_VPCOMFALSEUB,
28658 IX86_BUILTIN_VPCOMTRUEUB,
28659
28660 IX86_BUILTIN_VPCOMEQUW,
28661 IX86_BUILTIN_VPCOMNEUW,
28662 IX86_BUILTIN_VPCOMLTUW,
28663 IX86_BUILTIN_VPCOMLEUW,
28664 IX86_BUILTIN_VPCOMGTUW,
28665 IX86_BUILTIN_VPCOMGEUW,
28666 IX86_BUILTIN_VPCOMFALSEUW,
28667 IX86_BUILTIN_VPCOMTRUEUW,
28668
28669 IX86_BUILTIN_VPCOMEQUD,
28670 IX86_BUILTIN_VPCOMNEUD,
28671 IX86_BUILTIN_VPCOMLTUD,
28672 IX86_BUILTIN_VPCOMLEUD,
28673 IX86_BUILTIN_VPCOMGTUD,
28674 IX86_BUILTIN_VPCOMGEUD,
28675 IX86_BUILTIN_VPCOMFALSEUD,
28676 IX86_BUILTIN_VPCOMTRUEUD,
28677
28678 IX86_BUILTIN_VPCOMEQUQ,
28679 IX86_BUILTIN_VPCOMNEUQ,
28680 IX86_BUILTIN_VPCOMLTUQ,
28681 IX86_BUILTIN_VPCOMLEUQ,
28682 IX86_BUILTIN_VPCOMGTUQ,
28683 IX86_BUILTIN_VPCOMGEUQ,
28684 IX86_BUILTIN_VPCOMFALSEUQ,
28685 IX86_BUILTIN_VPCOMTRUEUQ,
28686
28687 IX86_BUILTIN_VPCOMEQB,
28688 IX86_BUILTIN_VPCOMNEB,
28689 IX86_BUILTIN_VPCOMLTB,
28690 IX86_BUILTIN_VPCOMLEB,
28691 IX86_BUILTIN_VPCOMGTB,
28692 IX86_BUILTIN_VPCOMGEB,
28693 IX86_BUILTIN_VPCOMFALSEB,
28694 IX86_BUILTIN_VPCOMTRUEB,
28695
28696 IX86_BUILTIN_VPCOMEQW,
28697 IX86_BUILTIN_VPCOMNEW,
28698 IX86_BUILTIN_VPCOMLTW,
28699 IX86_BUILTIN_VPCOMLEW,
28700 IX86_BUILTIN_VPCOMGTW,
28701 IX86_BUILTIN_VPCOMGEW,
28702 IX86_BUILTIN_VPCOMFALSEW,
28703 IX86_BUILTIN_VPCOMTRUEW,
28704
28705 IX86_BUILTIN_VPCOMEQD,
28706 IX86_BUILTIN_VPCOMNED,
28707 IX86_BUILTIN_VPCOMLTD,
28708 IX86_BUILTIN_VPCOMLED,
28709 IX86_BUILTIN_VPCOMGTD,
28710 IX86_BUILTIN_VPCOMGED,
28711 IX86_BUILTIN_VPCOMFALSED,
28712 IX86_BUILTIN_VPCOMTRUED,
28713
28714 IX86_BUILTIN_VPCOMEQQ,
28715 IX86_BUILTIN_VPCOMNEQ,
28716 IX86_BUILTIN_VPCOMLTQ,
28717 IX86_BUILTIN_VPCOMLEQ,
28718 IX86_BUILTIN_VPCOMGTQ,
28719 IX86_BUILTIN_VPCOMGEQ,
28720 IX86_BUILTIN_VPCOMFALSEQ,
28721 IX86_BUILTIN_VPCOMTRUEQ,
28722
28723 /* LWP instructions. */
28724 IX86_BUILTIN_LLWPCB,
28725 IX86_BUILTIN_SLWPCB,
28726 IX86_BUILTIN_LWPVAL32,
28727 IX86_BUILTIN_LWPVAL64,
28728 IX86_BUILTIN_LWPINS32,
28729 IX86_BUILTIN_LWPINS64,
28730
28731 IX86_BUILTIN_CLZS,
28732
28733 /* RTM */
28734 IX86_BUILTIN_XBEGIN,
28735 IX86_BUILTIN_XEND,
28736 IX86_BUILTIN_XABORT,
28737 IX86_BUILTIN_XTEST,
28738
28739 /* BMI instructions. */
28740 IX86_BUILTIN_BEXTR32,
28741 IX86_BUILTIN_BEXTR64,
28742 IX86_BUILTIN_CTZS,
28743
28744 /* TBM instructions. */
28745 IX86_BUILTIN_BEXTRI32,
28746 IX86_BUILTIN_BEXTRI64,
28747
28748 /* BMI2 instructions. */
28749 IX86_BUILTIN_BZHI32,
28750 IX86_BUILTIN_BZHI64,
28751 IX86_BUILTIN_PDEP32,
28752 IX86_BUILTIN_PDEP64,
28753 IX86_BUILTIN_PEXT32,
28754 IX86_BUILTIN_PEXT64,
28755
28756 /* ADX instructions. */
28757 IX86_BUILTIN_ADDCARRYX32,
28758 IX86_BUILTIN_ADDCARRYX64,
28759
28760 /* SBB instructions. */
28761 IX86_BUILTIN_SBB32,
28762 IX86_BUILTIN_SBB64,
28763
28764 /* FSGSBASE instructions. */
28765 IX86_BUILTIN_RDFSBASE32,
28766 IX86_BUILTIN_RDFSBASE64,
28767 IX86_BUILTIN_RDGSBASE32,
28768 IX86_BUILTIN_RDGSBASE64,
28769 IX86_BUILTIN_WRFSBASE32,
28770 IX86_BUILTIN_WRFSBASE64,
28771 IX86_BUILTIN_WRGSBASE32,
28772 IX86_BUILTIN_WRGSBASE64,
28773
28774 /* RDRND instructions. */
28775 IX86_BUILTIN_RDRAND16_STEP,
28776 IX86_BUILTIN_RDRAND32_STEP,
28777 IX86_BUILTIN_RDRAND64_STEP,
28778
28779 /* RDSEED instructions. */
28780 IX86_BUILTIN_RDSEED16_STEP,
28781 IX86_BUILTIN_RDSEED32_STEP,
28782 IX86_BUILTIN_RDSEED64_STEP,
28783
28784 /* F16C instructions. */
28785 IX86_BUILTIN_CVTPH2PS,
28786 IX86_BUILTIN_CVTPH2PS256,
28787 IX86_BUILTIN_CVTPS2PH,
28788 IX86_BUILTIN_CVTPS2PH256,
28789
28790 /* CFString built-in for darwin */
28791 IX86_BUILTIN_CFSTRING,
28792
28793 /* Builtins to get CPU type and supported features. */
28794 IX86_BUILTIN_CPU_INIT,
28795 IX86_BUILTIN_CPU_IS,
28796 IX86_BUILTIN_CPU_SUPPORTS,
28797
28798 /* Read/write FLAGS register built-ins. */
28799 IX86_BUILTIN_READ_FLAGS,
28800 IX86_BUILTIN_WRITE_FLAGS,
28801
28802 IX86_BUILTIN_MAX
28803 };
28804
28805 /* Table for the ix86 builtin decls. */
28806 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
28807
28808 /* Table of all of the builtin functions that are possible with different ISA's
28809 but are waiting to be built until a function is declared to use that
28810 ISA. */
28811 struct builtin_isa {
28812 const char *name; /* function name */
28813 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
28814 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
28815 bool const_p; /* true if the declaration is constant */
28816 bool set_and_not_built_p;
28817 };
28818
28819 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
28820
28821
28822 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
28823 of which isa_flags to use in the ix86_builtins_isa array. Stores the
28824 function decl in the ix86_builtins array. Returns the function decl or
28825 NULL_TREE, if the builtin was not added.
28826
28827 If the front end has a special hook for builtin functions, delay adding
28828 builtin functions that aren't in the current ISA until the ISA is changed
28829 with function specific optimization. Doing so, can save about 300K for the
28830 default compiler. When the builtin is expanded, check at that time whether
28831 it is valid.
28832
28833 If the front end doesn't have a special hook, record all builtins, even if
28834 it isn't an instruction set in the current ISA in case the user uses
28835 function specific options for a different ISA, so that we don't get scope
28836 errors if a builtin is added in the middle of a function scope. */
28837
28838 static inline tree
28839 def_builtin (HOST_WIDE_INT mask, const char *name,
28840 enum ix86_builtin_func_type tcode,
28841 enum ix86_builtins code)
28842 {
28843 tree decl = NULL_TREE;
28844
28845 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
28846 {
28847 ix86_builtins_isa[(int) code].isa = mask;
28848
28849 mask &= ~OPTION_MASK_ISA_64BIT;
28850 if (mask == 0
28851 || (mask & ix86_isa_flags) != 0
28852 || (lang_hooks.builtin_function
28853 == lang_hooks.builtin_function_ext_scope))
28854
28855 {
28856 tree type = ix86_get_builtin_func_type (tcode);
28857 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
28858 NULL, NULL_TREE);
28859 ix86_builtins[(int) code] = decl;
28860 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
28861 }
28862 else
28863 {
28864 ix86_builtins[(int) code] = NULL_TREE;
28865 ix86_builtins_isa[(int) code].tcode = tcode;
28866 ix86_builtins_isa[(int) code].name = name;
28867 ix86_builtins_isa[(int) code].const_p = false;
28868 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
28869 }
28870 }
28871
28872 return decl;
28873 }
28874
28875 /* Like def_builtin, but also marks the function decl "const". */
28876
28877 static inline tree
28878 def_builtin_const (HOST_WIDE_INT mask, const char *name,
28879 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
28880 {
28881 tree decl = def_builtin (mask, name, tcode, code);
28882 if (decl)
28883 TREE_READONLY (decl) = 1;
28884 else
28885 ix86_builtins_isa[(int) code].const_p = true;
28886
28887 return decl;
28888 }
28889
28890 /* Add any new builtin functions for a given ISA that may not have been
28891 declared. This saves a bit of space compared to adding all of the
28892 declarations to the tree, even if we didn't use them. */
28893
28894 static void
28895 ix86_add_new_builtins (HOST_WIDE_INT isa)
28896 {
28897 int i;
28898
28899 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
28900 {
28901 if ((ix86_builtins_isa[i].isa & isa) != 0
28902 && ix86_builtins_isa[i].set_and_not_built_p)
28903 {
28904 tree decl, type;
28905
28906 /* Don't define the builtin again. */
28907 ix86_builtins_isa[i].set_and_not_built_p = false;
28908
28909 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
28910 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
28911 type, i, BUILT_IN_MD, NULL,
28912 NULL_TREE);
28913
28914 ix86_builtins[i] = decl;
28915 if (ix86_builtins_isa[i].const_p)
28916 TREE_READONLY (decl) = 1;
28917 }
28918 }
28919 }
28920
28921 /* Bits for builtin_description.flag. */
28922
28923 /* Set when we don't support the comparison natively, and should
28924 swap_comparison in order to support it. */
28925 #define BUILTIN_DESC_SWAP_OPERANDS 1
28926
28927 struct builtin_description
28928 {
28929 const HOST_WIDE_INT mask;
28930 const enum insn_code icode;
28931 const char *const name;
28932 const enum ix86_builtins code;
28933 const enum rtx_code comparison;
28934 const int flag;
28935 };
28936
28937 static const struct builtin_description bdesc_comi[] =
28938 {
28939 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
28940 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
28941 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
28942 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
28943 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
28944 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
28945 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
28946 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
28947 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
28948 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
28949 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
28950 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
28951 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
28952 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
28953 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
28954 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
28955 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
28956 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
28957 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
28958 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
28959 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
28960 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
28961 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
28962 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
28963 };
28964
28965 static const struct builtin_description bdesc_pcmpestr[] =
28966 {
28967 /* SSE4.2 */
28968 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
28969 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
28970 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
28971 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
28972 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
28973 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
28974 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
28975 };
28976
28977 static const struct builtin_description bdesc_pcmpistr[] =
28978 {
28979 /* SSE4.2 */
28980 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
28981 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
28982 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
28983 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
28984 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
28985 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
28986 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
28987 };
28988
28989 /* Special builtins with variable number of arguments. */
28990 static const struct builtin_description bdesc_special_args[] =
28991 {
28992 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
28993 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
28994 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
28995
28996 /* 80387 (for use internally for atomic compound assignment). */
28997 { 0, CODE_FOR_fnstenv, "__builtin_ia32_fnstenv", IX86_BUILTIN_FNSTENV, UNKNOWN, (int) VOID_FTYPE_PVOID },
28998 { 0, CODE_FOR_fldenv, "__builtin_ia32_fldenv", IX86_BUILTIN_FLDENV, UNKNOWN, (int) VOID_FTYPE_PCVOID },
28999 { 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW, UNKNOWN, (int) USHORT_FTYPE_VOID },
29000 { 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX, UNKNOWN, (int) VOID_FTYPE_VOID },
29001
29002 /* MMX */
29003 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
29004
29005 /* 3DNow! */
29006 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
29007
29008 /* FXSR, XSAVE, XSAVEOPT, XSAVEC and XSAVES. */
29009 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID },
29010 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID },
29011 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29012 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29013 { OPTION_MASK_ISA_XSAVEOPT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29014 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xsaves", IX86_BUILTIN_XSAVES, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29015 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xrstors", IX86_BUILTIN_XRSTORS, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29016 { OPTION_MASK_ISA_XSAVEC, CODE_FOR_nothing, "__builtin_ia32_xsavec", IX86_BUILTIN_XSAVEC, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29017
29018 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID },
29019 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID },
29020 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29021 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29022 { OPTION_MASK_ISA_XSAVEOPT | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29023 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaves64", IX86_BUILTIN_XSAVES64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29024 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstors64", IX86_BUILTIN_XRSTORS64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29025 { OPTION_MASK_ISA_XSAVEC | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsavec64", IX86_BUILTIN_XSAVEC64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29026
29027 /* SSE */
29028 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storeups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
29029 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
29030 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
29031
29032 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
29033 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
29034 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
29035 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
29036
29037 /* SSE or 3DNow!A */
29038 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
29039 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntq, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
29040
29041 /* SSE2 */
29042 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
29043 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
29044 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storeupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
29045 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedquv16qi, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
29046 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
29047 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
29048 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
29049 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG },
29050 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
29051 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddquv16qi, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
29052
29053 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
29054 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
29055
29056 /* SSE3 */
29057 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
29058
29059 /* SSE4.1 */
29060 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
29061
29062 /* SSE4A */
29063 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
29064 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
29065
29066 /* AVX */
29067 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
29068 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
29069
29070 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
29071 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
29072 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
29073 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
29074 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
29075
29076 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
29077 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
29078 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
29079 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
29080 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddquv32qi, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
29081 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedquv32qi, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
29082 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
29083
29084 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
29085 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
29086 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
29087
29088 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
29089 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
29090 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
29091 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
29092 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
29093 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
29094 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
29095 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
29096
29097 /* AVX2 */
29098 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_movntdqa, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256, UNKNOWN, (int) V4DI_FTYPE_PV4DI },
29099 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI },
29100 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI },
29101 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd256, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI },
29102 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq256, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI },
29103 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_V4SI },
29104 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_V2DI },
29105 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI },
29106 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI },
29107
29108 /* AVX512F */
29109 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16sf_mask, "__builtin_ia32_compressstoresf512_mask", IX86_BUILTIN_COMPRESSPSSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
29110 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16si_mask, "__builtin_ia32_compressstoresi512_mask", IX86_BUILTIN_PCOMPRESSDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
29111 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8df_mask, "__builtin_ia32_compressstoredf512_mask", IX86_BUILTIN_COMPRESSPDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
29112 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8di_mask, "__builtin_ia32_compressstoredi512_mask", IX86_BUILTIN_PCOMPRESSQSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
29113 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandloadsf512_mask", IX86_BUILTIN_EXPANDPSLOAD512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29114 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandloadsf512_maskz", IX86_BUILTIN_EXPANDPSLOAD512Z, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29115 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandloadsi512_mask", IX86_BUILTIN_PEXPANDDLOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29116 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandloadsi512_maskz", IX86_BUILTIN_PEXPANDDLOAD512Z, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29117 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expandloaddf512_mask", IX86_BUILTIN_EXPANDPDLOAD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29118 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expandloaddf512_maskz", IX86_BUILTIN_EXPANDPDLOAD512Z, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29119 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expandloaddi512_mask", IX86_BUILTIN_PEXPANDQLOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29120 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expandloaddi512_maskz", IX86_BUILTIN_PEXPANDQLOAD512Z, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29121 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv16si_mask, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29122 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv8di_mask, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29123 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadupd512_mask, "__builtin_ia32_loadupd512_mask", IX86_BUILTIN_LOADUPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29124 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadups512_mask, "__builtin_ia32_loadups512_mask", IX86_BUILTIN_LOADUPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29125 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadaps512_mask", IX86_BUILTIN_LOADAPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29126 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32load512_mask", IX86_BUILTIN_MOVDQA32LOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29127 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadapd512_mask", IX86_BUILTIN_LOADAPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29128 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64load512_mask", IX86_BUILTIN_MOVDQA64LOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29129 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv16sf, "__builtin_ia32_movntps512", IX86_BUILTIN_MOVNTPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF },
29130 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8df, "__builtin_ia32_movntpd512", IX86_BUILTIN_MOVNTPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF },
29131 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8di, "__builtin_ia32_movntdq512", IX86_BUILTIN_MOVNTDQ512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI },
29132 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntdqa, "__builtin_ia32_movntdqa512", IX86_BUILTIN_MOVNTDQA512, UNKNOWN, (int) V8DI_FTYPE_PV8DI },
29133 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
29134 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
29135 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeupd512_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
29136 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask_store, "__builtin_ia32_pmovusqd512mem_mask", IX86_BUILTIN_PMOVUSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
29137 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask_store, "__builtin_ia32_pmovsqd512mem_mask", IX86_BUILTIN_PMOVSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
29138 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask_store, "__builtin_ia32_pmovqd512mem_mask", IX86_BUILTIN_PMOVQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
29139 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovusqw512mem_mask", IX86_BUILTIN_PMOVUSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
29140 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovsqw512mem_mask", IX86_BUILTIN_PMOVSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
29141 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovqw512mem_mask", IX86_BUILTIN_PMOVQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
29142 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovusdw512mem_mask", IX86_BUILTIN_PMOVUSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
29143 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovsdw512mem_mask", IX86_BUILTIN_PMOVSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
29144 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovdw512mem_mask", IX86_BUILTIN_PMOVDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
29145 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovqb512mem_mask", IX86_BUILTIN_PMOVQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
29146 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovusqb512mem_mask", IX86_BUILTIN_PMOVUSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
29147 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovsqb512mem_mask", IX86_BUILTIN_PMOVSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
29148 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovusdb512mem_mask", IX86_BUILTIN_PMOVUSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
29149 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovsdb512mem_mask", IX86_BUILTIN_PMOVSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
29150 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovdb512mem_mask", IX86_BUILTIN_PMOVDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
29151 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeups512_mask, "__builtin_ia32_storeups512_mask", IX86_BUILTIN_STOREUPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
29152 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeaps512_mask", IX86_BUILTIN_STOREAPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
29153 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_movdqa32store512_mask", IX86_BUILTIN_MOVDQA32STORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
29154 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeapd512_mask", IX86_BUILTIN_STOREAPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
29155 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_movdqa64store512_mask", IX86_BUILTIN_MOVDQA64STORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
29156
29157 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
29158 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
29159 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
29160 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
29161 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
29162 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
29163
29164 /* FSGSBASE */
29165 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
29166 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
29167 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
29168 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
29169 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
29170 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
29171 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
29172 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
29173
29174 /* RTM */
29175 { OPTION_MASK_ISA_RTM, CODE_FOR_xbegin, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
29176 { OPTION_MASK_ISA_RTM, CODE_FOR_xend, "__builtin_ia32_xend", IX86_BUILTIN_XEND, UNKNOWN, (int) VOID_FTYPE_VOID },
29177 { OPTION_MASK_ISA_RTM, CODE_FOR_xtest, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST, UNKNOWN, (int) INT_FTYPE_VOID },
29178 };
29179
29180 /* Builtins with variable number of arguments. */
29181 static const struct builtin_description bdesc_args[] =
29182 {
29183 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
29184 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
29185 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
29186 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
29187 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
29188 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
29189 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
29190
29191 /* MMX */
29192 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29193 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29194 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29195 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29196 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29197 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29198
29199 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29200 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29201 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29202 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29203 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29204 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29205 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29206 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29207
29208 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29209 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29210
29211 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29212 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29213 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29214 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29215
29216 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29217 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29218 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29219 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29220 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29221 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29222
29223 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29224 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29225 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29226 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29227 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
29228 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
29229
29230 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
29231 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
29232 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
29233
29234 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
29235
29236 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
29237 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
29238 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
29239 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
29240 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
29241 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
29242
29243 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
29244 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
29245 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
29246 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
29247 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
29248 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
29249
29250 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
29251 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
29252 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
29253 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
29254
29255 /* 3DNow! */
29256 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
29257 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
29258 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
29259 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
29260
29261 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29262 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29263 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29264 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
29265 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
29266 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
29267 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29268 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29269 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29270 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29271 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29272 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29273 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29274 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29275 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29276
29277 /* 3DNow!A */
29278 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
29279 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
29280 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
29281 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
29282 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29283 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29284
29285 /* SSE */
29286 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
29287 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29288 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29289 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29290 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29291 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29292 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
29293 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
29294 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
29295 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
29296 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
29297 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
29298
29299 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29300
29301 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29302 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29303 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29304 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29305 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29306 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29307 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29308 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29309
29310 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
29311 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
29312 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
29313 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
29314 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
29315 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29316 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
29317 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
29318 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
29319 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
29320 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
29321 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29322 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
29323 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
29324 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
29325 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29326 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
29327 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
29328 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
29329 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29330
29331 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29332 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29333 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29334 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29335
29336 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29337 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29338 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29339 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29340
29341 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29342
29343 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29344 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29345 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29346 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29347 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29348
29349 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
29350 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
29351 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
29352
29353 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
29354
29355 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
29356 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
29357 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
29358
29359 { OPTION_MASK_ISA_SSE, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
29360 { OPTION_MASK_ISA_SSE, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
29361
29362 /* SSE MMX or 3Dnow!A */
29363 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29364 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29365 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29366
29367 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29368 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29369 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29370 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29371
29372 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
29373 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
29374
29375 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
29376
29377 /* SSE2 */
29378 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29379
29380 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
29381 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
29382 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
29383 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
29384 { OPTION_MASK_ISA_SSE2, CODE_FOR_floatv4siv4sf2, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
29385
29386 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
29387 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
29388 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
29389 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
29390 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
29391
29392 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
29393
29394 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
29395 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
29396 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
29397 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
29398
29399 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_fix_notruncv4sfv4si, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
29400 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
29401 { OPTION_MASK_ISA_SSE2, CODE_FOR_fix_truncv4sfv4si2, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
29402
29403 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29404 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29405 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29406 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29407 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29408 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29409 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29410 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29411
29412 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
29413 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
29414 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
29415 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
29416 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
29417 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29418 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
29419 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
29420 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
29421 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
29422 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
29423 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29424 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
29425 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
29426 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
29427 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29428 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
29429 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
29430 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
29431 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29432
29433 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29434 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29435 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29436 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29437
29438 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29439 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29440 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29441 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29442
29443 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29444
29445 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29446 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29447 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29448
29449 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
29450
29451 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29452 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29453 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29454 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29455 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29456 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29457 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29458 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29459
29460 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29461 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29462 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29463 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29464 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29465 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29466 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29467 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29468
29469 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29470 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
29471
29472 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29473 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29474 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29475 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29476
29477 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29478 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29479
29480 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29481 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29482 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29483 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29484 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29485 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29486
29487 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29488 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29489 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29490 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29491
29492 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29493 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29494 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29495 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29496 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29497 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29498 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29499 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29500
29501 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
29502 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
29503 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
29504
29505 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29506 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
29507
29508 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
29509 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_widen_umult_even_v4si, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
29510
29511 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
29512
29513 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
29514 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
29515 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
29516 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
29517
29518 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
29519 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
29520 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
29521 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
29522 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
29523 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
29524 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
29525
29526 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
29527 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
29528 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
29529 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
29530 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
29531 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
29532 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
29533
29534 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
29535 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
29536 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
29537 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
29538
29539 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
29540 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
29541 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
29542
29543 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
29544
29545 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
29546
29547 /* SSE2 MMX */
29548 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
29549 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
29550
29551 /* SSE3 */
29552 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
29553 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29554
29555 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29556 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29557 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29558 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29559 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29560 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29561
29562 /* SSSE3 */
29563 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
29564 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
29565 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
29566 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
29567 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
29568 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
29569
29570 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29571 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29572 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29573 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29574 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29575 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29576 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29577 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29578 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29579 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29580 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29581 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29582 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
29583 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
29584 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29585 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29586 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29587 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29588 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29589 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29590 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29591 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29592 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29593 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29594
29595 /* SSSE3. */
29596 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
29597 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
29598
29599 /* SSE4.1 */
29600 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29601 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29602 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
29603 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
29604 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29605 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29606 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29607 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
29608 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
29609 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
29610
29611 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
29612 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
29613 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
29614 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
29615 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
29616 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
29617 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
29618 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
29619 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
29620 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
29621 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
29622 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
29623 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
29624
29625 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
29626 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29627 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29628 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29629 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29630 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29631 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29632 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29633 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29634 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29635 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
29636 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29637
29638 /* SSE4.1 */
29639 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
29640 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
29641 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29642 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29643
29644 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
29645 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
29646 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
29647 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
29648
29649 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
29650 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
29651
29652 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF },
29653 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
29654
29655 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
29656 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
29657 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
29658 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
29659
29660 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V4SF_ROUND },
29661 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V4SF_ROUND },
29662
29663 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29664 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2_sfix, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX, UNKNOWN, (int) V4SI_FTYPE_V4SF },
29665
29666 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
29667 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
29668 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
29669
29670 /* SSE4.2 */
29671 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29672 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
29673 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
29674 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29675 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29676
29677 /* SSE4A */
29678 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
29679 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
29680 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
29681 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29682
29683 /* AES */
29684 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
29685 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
29686
29687 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29688 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29689 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29690 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29691
29692 /* PCLMUL */
29693 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
29694
29695 /* AVX */
29696 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29697 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29698 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29699 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29700 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29701 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29702 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29703 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29704 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29705 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29706 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29707 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29708 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29709 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29710 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29711 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29712 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29713 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29714 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29715 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29716 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29717 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29718 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29719 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29720 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29721 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29722
29723 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
29724 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
29725 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
29726 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
29727
29728 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29729 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29730 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
29731 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
29732 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29733 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29734 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29735 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29736 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29737 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29738 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29739 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29740 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29741 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
29742 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
29743 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
29744 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv4siv4df2, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
29745 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv8siv8sf2, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
29746 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
29747 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_fix_notruncv8sfv8si, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
29748 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
29749 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv4dfv4si2, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
29750 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
29751 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv8sfv8si2, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
29752 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29753 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29754 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
29755 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
29756 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
29757 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
29758 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
29759 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
29760 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
29761 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
29762
29763 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29764 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29765 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
29766
29767 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
29768 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29769 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29770 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29771 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29772
29773 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29774
29775 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
29776 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
29777
29778 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
29779 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
29780 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
29781 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
29782
29783 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
29784 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
29785
29786 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
29787 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
29788
29789 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
29790 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
29791 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
29792 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
29793
29794 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V8SF_ROUND },
29795 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V8SF_ROUND },
29796
29797 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29798 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2_sfix, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
29799
29800 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29801 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29802 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29803 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29804
29805 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
29806 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
29807 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
29808 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
29809 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
29810 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
29811
29812 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
29813 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
29814 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
29815 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
29816 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
29817 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
29818 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
29819 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
29820 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
29821 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
29822 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
29823 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
29824 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
29825 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
29826 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
29827
29828 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
29829 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
29830
29831 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29832 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29833
29834 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_pack_sfix_v4df, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
29835
29836 /* AVX2 */
29837 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
29838 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
29839 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv16hi2, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI },
29840 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv8si2, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI },
29841 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packssdw, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
29842 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packsswb, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
29843 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packusdw, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
29844 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packuswb, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
29845 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv32qi3, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29846 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv16hi3, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29847 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv8si3, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29848 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv4di3, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29849 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv32qi3, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29850 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv16hi3, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29851 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv32qi3, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29852 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv16hi3, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29853 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv2ti, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT },
29854 { OPTION_MASK_ISA_AVX2, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29855 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29856 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29857 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29858 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI },
29859 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT },
29860 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv32qi3, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29861 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv16hi3, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29862 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv8si3, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29863 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv4di3, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29864 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv32qi3, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29865 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv16hi3, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29866 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv8si3, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29867 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv4di3, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29868 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddwv16hi3, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29869 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phadddv8si3, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29870 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddswv16hi3, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29871 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubwv16hi3, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29872 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubdv8si3, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29873 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubswv16hi3, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29874 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddubsw256, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
29875 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddwd, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI },
29876 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv32qi3, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29877 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv16hi3, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29878 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv8si3 , "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29879 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv32qi3, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29880 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv16hi3, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29881 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv8si3 , "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29882 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv32qi3, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29883 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv16hi3, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29884 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv8si3 , "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29885 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv32qi3, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29886 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv16hi3, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29887 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv8si3 , "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29888 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmovmskb, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256, UNKNOWN, (int) INT_FTYPE_V32QI },
29889 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv16qiv16hi2, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
29890 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8qiv8si2 , "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
29891 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4qiv4di2 , "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
29892 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8hiv8si2 , "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
29893 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4hiv4di2 , "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
29894 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4siv4di2 , "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
29895 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv16qiv16hi2, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
29896 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8qiv8si2 , "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
29897 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4qiv4di2 , "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
29898 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8hiv8si2 , "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
29899 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2 , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
29900 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2 , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
29901 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_smult_even_v8si, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
29902 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29903 { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29904 { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29905 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29906 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256 , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29907 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_umult_even_v8si, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
29908 { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29909 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
29910 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29911 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufdv3, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT },
29912 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufhwv3, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
29913 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshuflwv3, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
29914 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv32qi3, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29915 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv16hi3, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29916 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv8si3 , "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29917 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlv2ti3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
29918 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
29919 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
29920 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
29921 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
29922 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
29923 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
29924 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
29925 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
29926 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
29927 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
29928 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrv2ti3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
29929 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
29930 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
29931 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
29932 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
29933 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
29934 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
29935 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv32qi3, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29936 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv16hi3, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29937 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv8si3, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29938 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv4di3, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29939 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv32qi3, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29940 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv16hi3, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29941 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv32qi3, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29942 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv16hi3, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29943 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv32qi, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29944 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv16hi, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29945 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv8si, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29946 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv4di, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29947 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv32qi, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29948 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv16hi, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29949 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv8si, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29950 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv4di, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29951 { OPTION_MASK_ISA_AVX2, CODE_FOR_xorv4di3, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29952 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29953 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF },
29954 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF },
29955 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vbroadcasti128_v4di, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
29956 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv4si, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
29957 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv8si, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
29958 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv32qi, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256, UNKNOWN, (int) V32QI_FTYPE_V16QI },
29959 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16hi, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256, UNKNOWN, (int) V16HI_FTYPE_V8HI },
29960 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8si, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256, UNKNOWN, (int) V8SI_FTYPE_V4SI },
29961 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4di, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
29962 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16qi, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
29963 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8hi, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
29964 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4si, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
29965 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
29966 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29967 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
29968 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
29969 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
29970 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
29971 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vextractf128v4di, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
29972 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vinsertf128v4di, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
29973 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29974 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29975 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29976 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4si, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29977 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv8si, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29978 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv4si, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29979 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4di, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29980 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv2di, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29981 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv8si, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29982 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4si, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29983
29984 { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
29985
29986 /* BMI */
29987 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29988 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29989 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
29990
29991 /* TBM */
29992 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29993 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29994
29995 /* F16C */
29996 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
29997 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
29998 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
29999 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
30000
30001 /* BMI2 */
30002 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
30003 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
30004 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
30005 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
30006 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
30007 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
30008
30009 /* AVX512F */
30010 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_256si, "__builtin_ia32_si512_256si", IX86_BUILTIN_SI512_SI256, UNKNOWN, (int) V16SI_FTYPE_V8SI },
30011 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_256ps, "__builtin_ia32_ps512_256ps", IX86_BUILTIN_PS512_PS256, UNKNOWN, (int) V16SF_FTYPE_V8SF },
30012 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_256pd, "__builtin_ia32_pd512_256pd", IX86_BUILTIN_PD512_PD256, UNKNOWN, (int) V8DF_FTYPE_V4DF },
30013 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_si, "__builtin_ia32_si512_si", IX86_BUILTIN_SI512_SI, UNKNOWN, (int) V16SI_FTYPE_V4SI },
30014 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_ps, "__builtin_ia32_ps512_ps", IX86_BUILTIN_PS512_PS, UNKNOWN, (int) V16SF_FTYPE_V4SF },
30015 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_pd, "__builtin_ia32_pd512_pd", IX86_BUILTIN_PD512_PD, UNKNOWN, (int) V8DF_FTYPE_V2DF },
30016 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv16si_mask, "__builtin_ia32_alignd512_mask", IX86_BUILTIN_ALIGND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
30017 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv8di_mask, "__builtin_ia32_alignq512_mask", IX86_BUILTIN_ALIGNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
30018 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16si, "__builtin_ia32_blendmd_512_mask", IX86_BUILTIN_BLENDMD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30019 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8df, "__builtin_ia32_blendmpd_512_mask", IX86_BUILTIN_BLENDMPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30020 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16sf, "__builtin_ia32_blendmps_512_mask", IX86_BUILTIN_BLENDMPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30021 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8di, "__builtin_ia32_blendmq_512_mask", IX86_BUILTIN_BLENDMQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30022 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x4_512", IX86_BUILTIN_BROADCASTF32X4_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
30023 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8df_mask, "__builtin_ia32_broadcastf64x4_512", IX86_BUILTIN_BROADCASTF64X4_512, UNKNOWN, (int) V8DF_FTYPE_V4DF_V8DF_QI },
30024 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16si_mask, "__builtin_ia32_broadcasti32x4_512", IX86_BUILTIN_BROADCASTI32X4_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
30025 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8di_mask, "__builtin_ia32_broadcasti64x4_512", IX86_BUILTIN_BROADCASTI64X4_512, UNKNOWN, (int) V8DI_FTYPE_V4DI_V8DI_QI },
30026 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8df_mask, "__builtin_ia32_broadcastsd512", IX86_BUILTIN_BROADCASTSD512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
30027 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16sf_mask, "__builtin_ia32_broadcastss512", IX86_BUILTIN_BROADCASTSS512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
30028 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16si3_mask, "__builtin_ia32_cmpd512_mask", IX86_BUILTIN_CMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
30029 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8di3_mask, "__builtin_ia32_cmpq512_mask", IX86_BUILTIN_CMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
30030 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8df_mask, "__builtin_ia32_compressdf512_mask", IX86_BUILTIN_COMPRESSPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30031 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16sf_mask, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30032 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
30033 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtps2ph512_mask, "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_HI },
30034 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv8siv8df2_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
30035 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2sd32, "__builtin_ia32_cvtusi2sd32", IX86_BUILTIN_CVTUSI2SD32, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT },
30036 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30037 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30038 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandsf512_mask", IX86_BUILTIN_EXPANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30039 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandsf512_maskz", IX86_BUILTIN_EXPANDPS512Z, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30040 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf32x4_mask, "__builtin_ia32_extractf32x4_mask", IX86_BUILTIN_EXTRACTF32X4, UNKNOWN, (int) V4SF_FTYPE_V16SF_INT_V4SF_QI },
30041 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf64x4_mask, "__builtin_ia32_extractf64x4_mask", IX86_BUILTIN_EXTRACTF64X4, UNKNOWN, (int) V4DF_FTYPE_V8DF_INT_V4DF_QI },
30042 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti32x4_mask, "__builtin_ia32_extracti32x4_mask", IX86_BUILTIN_EXTRACTI32X4, UNKNOWN, (int) V4SI_FTYPE_V16SI_INT_V4SI_QI },
30043 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti64x4_mask, "__builtin_ia32_extracti64x4_mask", IX86_BUILTIN_EXTRACTI64X4, UNKNOWN, (int) V4DI_FTYPE_V8DI_INT_V4DI_QI },
30044 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf32x4_mask, "__builtin_ia32_insertf32x4_mask", IX86_BUILTIN_INSERTF32X4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI },
30045 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf64x4_mask, "__builtin_ia32_insertf64x4_mask", IX86_BUILTIN_INSERTF64X4, UNKNOWN, (int) V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI },
30046 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti32x4_mask, "__builtin_ia32_inserti32x4_mask", IX86_BUILTIN_INSERTI32X4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI },
30047 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti64x4_mask, "__builtin_ia32_inserti64x4_mask", IX86_BUILTIN_INSERTI64X4, UNKNOWN, (int) V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI },
30048 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_movapd512_mask", IX86_BUILTIN_MOVAPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30049 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_movaps512_mask", IX86_BUILTIN_MOVAPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30050 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movddup512_mask, "__builtin_ia32_movddup512_mask", IX86_BUILTIN_MOVDDUP512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30051 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32_512_mask", IX86_BUILTIN_MOVDQA32_512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30052 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64_512_mask", IX86_BUILTIN_MOVDQA64_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30053 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movshdup512_mask, "__builtin_ia32_movshdup512_mask", IX86_BUILTIN_MOVSHDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30054 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movsldup512_mask, "__builtin_ia32_movsldup512_mask", IX86_BUILTIN_MOVSLDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30055 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv16si2_mask, "__builtin_ia32_pabsd512_mask", IX86_BUILTIN_PABSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30056 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv8di2_mask, "__builtin_ia32_pabsq512_mask", IX86_BUILTIN_PABSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30057 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16si3_mask, "__builtin_ia32_paddd512_mask", IX86_BUILTIN_PADDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30058 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8di3_mask, "__builtin_ia32_paddq512_mask", IX86_BUILTIN_PADDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30059 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv16si3_mask, "__builtin_ia32_pandd512_mask", IX86_BUILTIN_PANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30060 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv16si3_mask, "__builtin_ia32_pandnd512_mask", IX86_BUILTIN_PANDND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30061 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv8di3_mask, "__builtin_ia32_pandnq512_mask", IX86_BUILTIN_PANDNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30062 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv8di3_mask, "__builtin_ia32_pandq512_mask", IX86_BUILTIN_PANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30063 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16si_mask, "__builtin_ia32_pbroadcastd512", IX86_BUILTIN_PBROADCASTD512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
30064 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv16si_mask, "__builtin_ia32_pbroadcastd512_gpr_mask", IX86_BUILTIN_PBROADCASTD512_GPR, UNKNOWN, (int) V16SI_FTYPE_SI_V16SI_HI },
30065 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_QI },
30066 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_HI },
30067 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
30068 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
30069 { OPTION_MASK_ISA_AVX512F & ~OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_memv8di_mask, "__builtin_ia32_pbroadcastq512_mem_mask", IX86_BUILTIN_PBROADCASTQ512_MEM, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
30070 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30071 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30072 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30073 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv8di3_mask, "__builtin_ia32_pcmpgtq512_mask", IX86_BUILTIN_PCMPGTQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30074 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16si_mask, "__builtin_ia32_compresssi512_mask", IX86_BUILTIN_PCOMPRESSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30075 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8di_mask, "__builtin_ia32_compressdi512_mask", IX86_BUILTIN_PCOMPRESSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30076 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandsi512_mask", IX86_BUILTIN_PEXPANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30077 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandsi512_maskz", IX86_BUILTIN_PEXPANDD512Z, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30078 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expanddi512_mask", IX86_BUILTIN_PEXPANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30079 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expanddi512_maskz", IX86_BUILTIN_PEXPANDQ512Z, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30080 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16si3_mask, "__builtin_ia32_pmaxsd512_mask", IX86_BUILTIN_PMAXSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30081 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8di3_mask, "__builtin_ia32_pmaxsq512_mask", IX86_BUILTIN_PMAXSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30082 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv16si3_mask, "__builtin_ia32_pmaxud512_mask", IX86_BUILTIN_PMAXUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30083 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv8di3_mask, "__builtin_ia32_pmaxuq512_mask", IX86_BUILTIN_PMAXUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30084 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16si3_mask, "__builtin_ia32_pminsd512_mask", IX86_BUILTIN_PMINSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30085 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8di3_mask, "__builtin_ia32_pminsq512_mask", IX86_BUILTIN_PMINSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30086 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv16si3_mask, "__builtin_ia32_pminud512_mask", IX86_BUILTIN_PMINUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30087 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv8di3_mask, "__builtin_ia32_pminuq512_mask", IX86_BUILTIN_PMINUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30088 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask, "__builtin_ia32_pmovdb512_mask", IX86_BUILTIN_PMOVDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
30089 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask, "__builtin_ia32_pmovdw512_mask", IX86_BUILTIN_PMOVDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
30090 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask, "__builtin_ia32_pmovqb512_mask", IX86_BUILTIN_PMOVQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
30091 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask, "__builtin_ia32_pmovqd512_mask", IX86_BUILTIN_PMOVQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
30092 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask, "__builtin_ia32_pmovqw512_mask", IX86_BUILTIN_PMOVQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
30093 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask, "__builtin_ia32_pmovsdb512_mask", IX86_BUILTIN_PMOVSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
30094 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask, "__builtin_ia32_pmovsdw512_mask", IX86_BUILTIN_PMOVSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
30095 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask, "__builtin_ia32_pmovsqb512_mask", IX86_BUILTIN_PMOVSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
30096 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask, "__builtin_ia32_pmovsqd512_mask", IX86_BUILTIN_PMOVSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
30097 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask, "__builtin_ia32_pmovsqw512_mask", IX86_BUILTIN_PMOVSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
30098 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16qiv16si2_mask, "__builtin_ia32_pmovsxbd512_mask", IX86_BUILTIN_PMOVSXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
30099 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8qiv8di2_mask, "__builtin_ia32_pmovsxbq512_mask", IX86_BUILTIN_PMOVSXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
30100 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8siv8di2_mask, "__builtin_ia32_pmovsxdq512_mask", IX86_BUILTIN_PMOVSXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
30101 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16hiv16si2_mask, "__builtin_ia32_pmovsxwd512_mask", IX86_BUILTIN_PMOVSXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
30102 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8hiv8di2_mask, "__builtin_ia32_pmovsxwq512_mask", IX86_BUILTIN_PMOVSXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
30103 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask, "__builtin_ia32_pmovusdb512_mask", IX86_BUILTIN_PMOVUSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
30104 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask, "__builtin_ia32_pmovusdw512_mask", IX86_BUILTIN_PMOVUSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
30105 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask, "__builtin_ia32_pmovusqb512_mask", IX86_BUILTIN_PMOVUSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
30106 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask, "__builtin_ia32_pmovusqd512_mask", IX86_BUILTIN_PMOVUSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
30107 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask, "__builtin_ia32_pmovusqw512_mask", IX86_BUILTIN_PMOVUSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
30108 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16qiv16si2_mask, "__builtin_ia32_pmovzxbd512_mask", IX86_BUILTIN_PMOVZXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
30109 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8qiv8di2_mask, "__builtin_ia32_pmovzxbq512_mask", IX86_BUILTIN_PMOVZXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
30110 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8siv8di2_mask, "__builtin_ia32_pmovzxdq512_mask", IX86_BUILTIN_PMOVZXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
30111 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16hiv16si2_mask, "__builtin_ia32_pmovzxwd512_mask", IX86_BUILTIN_PMOVZXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
30112 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8hiv8di2_mask, "__builtin_ia32_pmovzxwq512_mask", IX86_BUILTIN_PMOVZXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
30113 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_smult_even_v16si_mask, "__builtin_ia32_pmuldq512_mask", IX86_BUILTIN_PMULDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
30114 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16si3_mask, "__builtin_ia32_pmulld512_mask" , IX86_BUILTIN_PMULLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30115 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_umult_even_v16si_mask, "__builtin_ia32_pmuludq512_mask", IX86_BUILTIN_PMULUDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
30116 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv16si3_mask, "__builtin_ia32_pord512_mask", IX86_BUILTIN_PORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30117 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv8di3_mask, "__builtin_ia32_porq512_mask", IX86_BUILTIN_PORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30118 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv16si_mask, "__builtin_ia32_prold512_mask", IX86_BUILTIN_PROLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30119 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv8di_mask, "__builtin_ia32_prolq512_mask", IX86_BUILTIN_PROLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30120 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv16si_mask, "__builtin_ia32_prolvd512_mask", IX86_BUILTIN_PROLVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30121 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv8di_mask, "__builtin_ia32_prolvq512_mask", IX86_BUILTIN_PROLVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30122 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv16si_mask, "__builtin_ia32_prord512_mask", IX86_BUILTIN_PRORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30123 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv8di_mask, "__builtin_ia32_prorq512_mask", IX86_BUILTIN_PRORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30124 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv16si_mask, "__builtin_ia32_prorvd512_mask", IX86_BUILTIN_PRORVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30125 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv8di_mask, "__builtin_ia32_prorvq512_mask", IX86_BUILTIN_PRORVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30126 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pshufdv3_mask, "__builtin_ia32_pshufd512_mask", IX86_BUILTIN_PSHUFD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30127 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslld512_mask", IX86_BUILTIN_PSLLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
30128 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslldi512_mask", IX86_BUILTIN_PSLLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30129 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllq512_mask", IX86_BUILTIN_PSLLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
30130 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllqi512_mask", IX86_BUILTIN_PSLLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30131 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv16si_mask, "__builtin_ia32_psllv16si_mask", IX86_BUILTIN_PSLLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30132 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv8di_mask, "__builtin_ia32_psllv8di_mask", IX86_BUILTIN_PSLLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30133 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psrad512_mask", IX86_BUILTIN_PSRAD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
30134 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psradi512_mask", IX86_BUILTIN_PSRADI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30135 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraq512_mask", IX86_BUILTIN_PSRAQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
30136 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraqi512_mask", IX86_BUILTIN_PSRAQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30137 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv16si_mask, "__builtin_ia32_psrav16si_mask", IX86_BUILTIN_PSRAVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30138 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv8di_mask, "__builtin_ia32_psrav8di_mask", IX86_BUILTIN_PSRAVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30139 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrld512_mask", IX86_BUILTIN_PSRLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
30140 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrldi512_mask", IX86_BUILTIN_PSRLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30141 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlq512_mask", IX86_BUILTIN_PSRLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
30142 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlqi512_mask", IX86_BUILTIN_PSRLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30143 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv16si_mask, "__builtin_ia32_psrlv16si_mask", IX86_BUILTIN_PSRLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30144 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv8di_mask, "__builtin_ia32_psrlv8di_mask", IX86_BUILTIN_PSRLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30145 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16si3_mask, "__builtin_ia32_psubd512_mask", IX86_BUILTIN_PSUBD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30146 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8di3_mask, "__builtin_ia32_psubq512_mask", IX86_BUILTIN_PSUBQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30147 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv16si3_mask, "__builtin_ia32_ptestmd512", IX86_BUILTIN_PTESTMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30148 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv8di3_mask, "__builtin_ia32_ptestmq512", IX86_BUILTIN_PTESTMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30149 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv16si3_mask, "__builtin_ia32_ptestnmd512", IX86_BUILTIN_PTESTNMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30150 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv8di3_mask, "__builtin_ia32_ptestnmq512", IX86_BUILTIN_PTESTNMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30151 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv16si_mask, "__builtin_ia32_punpckhdq512_mask", IX86_BUILTIN_PUNPCKHDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30152 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv8di_mask, "__builtin_ia32_punpckhqdq512_mask", IX86_BUILTIN_PUNPCKHQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30153 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv16si_mask, "__builtin_ia32_punpckldq512_mask", IX86_BUILTIN_PUNPCKLDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30154 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv8di_mask, "__builtin_ia32_punpcklqdq512_mask", IX86_BUILTIN_PUNPCKLQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30155 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv16si3_mask, "__builtin_ia32_pxord512_mask", IX86_BUILTIN_PXORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30156 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv8di3_mask, "__builtin_ia32_pxorq512_mask", IX86_BUILTIN_PXORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30157 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v8df_mask, "__builtin_ia32_rcp14pd512_mask", IX86_BUILTIN_RCP14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30158 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v16sf_mask, "__builtin_ia32_rcp14ps512_mask", IX86_BUILTIN_RCP14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30159 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v2df, "__builtin_ia32_rcp14sd", IX86_BUILTIN_RCP14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30160 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v4sf, "__builtin_ia32_rcp14ss", IX86_BUILTIN_RCP14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30161 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30162 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30163 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v2df, "__builtin_ia32_rsqrt14sd", IX86_BUILTIN_RSQRT14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30164 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v4sf, "__builtin_ia32_rsqrt14ss", IX86_BUILTIN_RSQRT14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30165 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufpd512_mask, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
30166 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufps512_mask, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
30167 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
30168 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_mask", IX86_BUILTIN_SHUF_F64x2, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
30169 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_mask", IX86_BUILTIN_SHUF_I32x4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
30170 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_mask", IX86_BUILTIN_SHUF_I64x2, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
30171 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv16si3_mask, "__builtin_ia32_ucmpd512_mask", IX86_BUILTIN_UCMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
30172 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv8di3_mask, "__builtin_ia32_ucmpq512_mask", IX86_BUILTIN_UCMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
30173 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhpd512_mask, "__builtin_ia32_unpckhpd512_mask", IX86_BUILTIN_UNPCKHPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
30174 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhps512_mask, "__builtin_ia32_unpckhps512_mask", IX86_BUILTIN_UNPCKHPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
30175 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklpd512_mask, "__builtin_ia32_unpcklpd512_mask", IX86_BUILTIN_UNPCKLPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
30176 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklps512_mask, "__builtin_ia32_unpcklps512_mask", IX86_BUILTIN_UNPCKLPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
30177 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv16si2_mask, "__builtin_ia32_vplzcntd_512_mask", IX86_BUILTIN_VPCLZCNTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30178 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv8di2_mask, "__builtin_ia32_vplzcntq_512_mask", IX86_BUILTIN_VPCLZCNTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30179 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv16si_mask, "__builtin_ia32_vpconflictsi_512_mask", IX86_BUILTIN_VPCONFLICTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30180 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv8di_mask, "__builtin_ia32_vpconflictdi_512_mask", IX86_BUILTIN_VPCONFLICTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30181 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8df_mask, "__builtin_ia32_permdf512_mask", IX86_BUILTIN_VPERMDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
30182 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8di_mask, "__builtin_ia32_permdi512_mask", IX86_BUILTIN_VPERMDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30183 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16si3_mask, "__builtin_ia32_vpermi2vard512_mask", IX86_BUILTIN_VPERMI2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30184 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8df3_mask, "__builtin_ia32_vpermi2varpd512_mask", IX86_BUILTIN_VPERMI2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
30185 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16sf3_mask, "__builtin_ia32_vpermi2varps512_mask", IX86_BUILTIN_VPERMI2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
30186 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8di3_mask, "__builtin_ia32_vpermi2varq512_mask", IX86_BUILTIN_VPERMI2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30187 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv8df_mask, "__builtin_ia32_vpermilpd512_mask", IX86_BUILTIN_VPERMILPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
30188 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv16sf_mask, "__builtin_ia32_vpermilps512_mask", IX86_BUILTIN_VPERMILPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
30189 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv8df3_mask, "__builtin_ia32_vpermilvarpd512_mask", IX86_BUILTIN_VPERMILVARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
30190 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv16sf3_mask, "__builtin_ia32_vpermilvarps512_mask", IX86_BUILTIN_VPERMILVARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
30191 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_mask, "__builtin_ia32_vpermt2vard512_mask", IX86_BUILTIN_VPERMT2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30192 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_maskz, "__builtin_ia32_vpermt2vard512_maskz", IX86_BUILTIN_VPERMT2VARD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30193 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_mask, "__builtin_ia32_vpermt2varpd512_mask", IX86_BUILTIN_VPERMT2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
30194 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_maskz, "__builtin_ia32_vpermt2varpd512_maskz", IX86_BUILTIN_VPERMT2VARPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
30195 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_mask, "__builtin_ia32_vpermt2varps512_mask", IX86_BUILTIN_VPERMT2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
30196 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_maskz, "__builtin_ia32_vpermt2varps512_maskz", IX86_BUILTIN_VPERMT2VARPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
30197 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_mask, "__builtin_ia32_vpermt2varq512_mask", IX86_BUILTIN_VPERMT2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30198 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_maskz, "__builtin_ia32_vpermt2varq512_maskz", IX86_BUILTIN_VPERMT2VARQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30199 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8df_mask, "__builtin_ia32_permvardf512_mask", IX86_BUILTIN_VPERMVARDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
30200 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8di_mask, "__builtin_ia32_permvardi512_mask", IX86_BUILTIN_VPERMVARDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30201 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16sf_mask, "__builtin_ia32_permvarsf512_mask", IX86_BUILTIN_VPERMVARSF512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
30202 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16si_mask, "__builtin_ia32_permvarsi512_mask", IX86_BUILTIN_VPERMVARSI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30203 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_mask, "__builtin_ia32_pternlogd512_mask", IX86_BUILTIN_VTERNLOGD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
30204 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_maskz, "__builtin_ia32_pternlogd512_maskz", IX86_BUILTIN_VTERNLOGD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
30205 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_mask, "__builtin_ia32_pternlogq512_mask", IX86_BUILTIN_VTERNLOGQ512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
30206 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_maskz, "__builtin_ia32_pternlogq512_maskz", IX86_BUILTIN_VTERNLOGQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
30207
30208 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv16sf3, "__builtin_ia32_copysignps512", IX86_BUILTIN_CPYSGNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF },
30209 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv8df3, "__builtin_ia32_copysignpd512", IX86_BUILTIN_CPYSGNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF },
30210 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF },
30211 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sqrtv16sf2, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512, UNKNOWN, (int) V16SF_FTYPE_V16SF },
30212 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf, "__builtin_ia32_exp2ps", IX86_BUILTIN_EXP2PS, UNKNOWN, (int) V16SF_FTYPE_V16SF },
30213 { OPTION_MASK_ISA_AVX512F, CODE_FOR_roundv8df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix512", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF },
30214 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_floorpd_vec_pack_sfix512", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
30215 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
30216
30217 /* Mask arithmetic operations */
30218 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andhi3, "__builtin_ia32_kandhi", IX86_BUILTIN_KAND16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30219 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kandnhi, "__builtin_ia32_kandnhi", IX86_BUILTIN_KANDN16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30220 { OPTION_MASK_ISA_AVX512F, CODE_FOR_one_cmplhi2, "__builtin_ia32_knothi", IX86_BUILTIN_KNOT16, UNKNOWN, (int) HI_FTYPE_HI },
30221 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorhi3, "__builtin_ia32_korhi", IX86_BUILTIN_KOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30222 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestchi, "__builtin_ia32_kortestchi", IX86_BUILTIN_KORTESTC16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30223 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestzhi, "__builtin_ia32_kortestzhi", IX86_BUILTIN_KORTESTZ16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30224 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kunpckhi, "__builtin_ia32_kunpckhi", IX86_BUILTIN_KUNPCKBW, UNKNOWN, (int) HI_FTYPE_HI_HI },
30225 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kxnorhi, "__builtin_ia32_kxnorhi", IX86_BUILTIN_KXNOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30226 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorhi3, "__builtin_ia32_kxorhi", IX86_BUILTIN_KXOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30227 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kmovw, "__builtin_ia32_kmov16", IX86_BUILTIN_KMOV16, UNKNOWN, (int) HI_FTYPE_HI },
30228
30229 /* SHA */
30230 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg1, 0, IX86_BUILTIN_SHA1MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30231 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg2, 0, IX86_BUILTIN_SHA1MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30232 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1nexte, 0, IX86_BUILTIN_SHA1NEXTE, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30233 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1rnds4, 0, IX86_BUILTIN_SHA1RNDS4, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
30234 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg1, 0, IX86_BUILTIN_SHA256MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30235 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg2, 0, IX86_BUILTIN_SHA256MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30236 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256rnds2, 0, IX86_BUILTIN_SHA256RNDS2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
30237 };
30238
30239 /* Builtins with rounding support. */
30240 static const struct builtin_description bdesc_round_args[] =
30241 {
30242 /* AVX512F */
30243 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8df3_mask_round, "__builtin_ia32_addpd512_mask", IX86_BUILTIN_ADDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30244 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16sf3_mask_round, "__builtin_ia32_addps512_mask", IX86_BUILTIN_ADDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30245 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmaddv2df3_round, "__builtin_ia32_addsd_round", IX86_BUILTIN_ADDSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30246 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmaddv4sf3_round, "__builtin_ia32_addss_round", IX86_BUILTIN_ADDSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30247 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8df3_mask_round, "__builtin_ia32_cmppd512_mask", IX86_BUILTIN_CMPPD512, UNKNOWN, (int) QI_FTYPE_V8DF_V8DF_INT_QI_INT },
30248 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16sf3_mask_round, "__builtin_ia32_cmpps512_mask", IX86_BUILTIN_CMPPS512, UNKNOWN, (int) HI_FTYPE_V16SF_V16SF_INT_HI_INT },
30249 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv2df3_mask_round, "__builtin_ia32_cmpsd_mask", IX86_BUILTIN_CMPSD_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI_INT },
30250 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv4sf3_mask_round, "__builtin_ia32_cmpss_mask", IX86_BUILTIN_CMPSS_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI_INT },
30251 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_comi_round, "__builtin_ia32_vcomisd", IX86_BUILTIN_COMIDF, UNKNOWN, (int) INT_FTYPE_V2DF_V2DF_INT_INT },
30252 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_comi_round, "__builtin_ia32_vcomiss", IX86_BUILTIN_COMISF, UNKNOWN, (int) INT_FTYPE_V4SF_V4SF_INT_INT },
30253 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv16siv16sf2_mask_round, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
30254 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2dq512_mask_round, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30255 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2ps512_mask_round, "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT },
30256 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_notruncv8dfv8si2_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30257 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtph2ps512_mask_round, "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT },
30258 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30259 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT },
30260 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30261 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2ss_round, "__builtin_ia32_cvtsd2ss_round", IX86_BUILTIN_CVTSD2SS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_INT },
30262 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq_round, "__builtin_ia32_cvtsi2sd64", IX86_BUILTIN_CVTSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT64_INT },
30263 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtsi2ss_round, "__builtin_ia32_cvtsi2ss32", IX86_BUILTIN_CVTSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_INT },
30264 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq_round, "__builtin_ia32_cvtsi2ss64", IX86_BUILTIN_CVTSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT64_INT },
30265 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtss2sd_round, "__builtin_ia32_cvtss2sd_round", IX86_BUILTIN_CVTSS2SD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_INT },
30266 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30267 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30268 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30269 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30270 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
30271 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2sd64_round, "__builtin_ia32_cvtusi2sd64", IX86_BUILTIN_CVTUSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT64_INT },
30272 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2ss32_round, "__builtin_ia32_cvtusi2ss32", IX86_BUILTIN_CVTUSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT_INT },
30273 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2ss64_round, "__builtin_ia32_cvtusi2ss64", IX86_BUILTIN_CVTUSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT64_INT },
30274 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv8df3_mask_round, "__builtin_ia32_divpd512_mask", IX86_BUILTIN_DIVPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30275 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv16sf3_mask_round, "__builtin_ia32_divps512_mask", IX86_BUILTIN_DIVPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30276 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmdivv2df3_round, "__builtin_ia32_divsd_round", IX86_BUILTIN_DIVSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30277 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmdivv4sf3_round, "__builtin_ia32_divss_round", IX86_BUILTIN_DIVSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30278 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_mask_round, "__builtin_ia32_fixupimmpd512_mask", IX86_BUILTIN_FIXUPIMMPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
30279 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_maskz_round, "__builtin_ia32_fixupimmpd512_maskz", IX86_BUILTIN_FIXUPIMMPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
30280 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_mask_round, "__builtin_ia32_fixupimmps512_mask", IX86_BUILTIN_FIXUPIMMPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
30281 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_maskz_round, "__builtin_ia32_fixupimmps512_maskz", IX86_BUILTIN_FIXUPIMMPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
30282 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_mask_round, "__builtin_ia32_fixupimmsd_mask", IX86_BUILTIN_FIXUPIMMSD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
30283 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_maskz_round, "__builtin_ia32_fixupimmsd_maskz", IX86_BUILTIN_FIXUPIMMSD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
30284 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_mask_round, "__builtin_ia32_fixupimmss_mask", IX86_BUILTIN_FIXUPIMMSS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
30285 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_maskz_round, "__builtin_ia32_fixupimmss_maskz", IX86_BUILTIN_FIXUPIMMSS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
30286 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv8df_mask_round, "__builtin_ia32_getexppd512_mask", IX86_BUILTIN_GETEXPPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30287 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv16sf_mask_round, "__builtin_ia32_getexpps512_mask", IX86_BUILTIN_GETEXPPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30288 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv2df_round, "__builtin_ia32_getexpsd128_round", IX86_BUILTIN_GETEXPSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30289 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv4sf_round, "__builtin_ia32_getexpss128_round", IX86_BUILTIN_GETEXPSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30290 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv8df_mask_round, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
30291 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv16sf_mask_round, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
30292 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv2df_round, "__builtin_ia32_getmantsd_round", IX86_BUILTIN_GETMANTSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
30293 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv4sf_round, "__builtin_ia32_getmantss_round", IX86_BUILTIN_GETMANTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
30294 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30295 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30296 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsmaxv2df3_round, "__builtin_ia32_maxsd_round", IX86_BUILTIN_MAXSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30297 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsmaxv4sf3_round, "__builtin_ia32_maxss_round", IX86_BUILTIN_MAXSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30298 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8df3_mask_round, "__builtin_ia32_minpd512_mask", IX86_BUILTIN_MINPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30299 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16sf3_mask_round, "__builtin_ia32_minps512_mask", IX86_BUILTIN_MINPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30300 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsminv2df3_round, "__builtin_ia32_minsd_round", IX86_BUILTIN_MINSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30301 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsminv4sf3_round, "__builtin_ia32_minss_round", IX86_BUILTIN_MINSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30302 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv8df3_mask_round, "__builtin_ia32_mulpd512_mask", IX86_BUILTIN_MULPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30303 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16sf3_mask_round, "__builtin_ia32_mulps512_mask", IX86_BUILTIN_MULPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30304 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmmulv2df3_round, "__builtin_ia32_mulsd_round", IX86_BUILTIN_MULSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30305 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmmulv4sf3_round, "__builtin_ia32_mulss_round", IX86_BUILTIN_MULSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30306 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev8df_mask_round, "__builtin_ia32_rndscalepd_mask", IX86_BUILTIN_RNDSCALEPD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
30307 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev16sf_mask_round, "__builtin_ia32_rndscaleps_mask", IX86_BUILTIN_RNDSCALEPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
30308 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev2df_round, "__builtin_ia32_rndscalesd_round", IX86_BUILTIN_RNDSCALESD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
30309 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev4sf_round, "__builtin_ia32_rndscaless_round", IX86_BUILTIN_RNDSCALESS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
30310 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv8df_mask_round, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30311 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv16sf_mask_round, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30312 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv2df_round, "__builtin_ia32_scalefsd_round", IX86_BUILTIN_SCALEFSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30313 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv4sf_round, "__builtin_ia32_scalefss_round", IX86_BUILTIN_SCALEFSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30314 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2_mask_round, "__builtin_ia32_sqrtpd512_mask", IX86_BUILTIN_SQRTPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30315 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv16sf2_mask_round, "__builtin_ia32_sqrtps512_mask", IX86_BUILTIN_SQRTPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30316 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsqrtv2df2_round, "__builtin_ia32_sqrtsd_round", IX86_BUILTIN_SQRTSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30317 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsqrtv4sf2_round, "__builtin_ia32_sqrtss_round", IX86_BUILTIN_SQRTSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30318 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8df3_mask_round, "__builtin_ia32_subpd512_mask", IX86_BUILTIN_SUBPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30319 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16sf3_mask_round, "__builtin_ia32_subps512_mask", IX86_BUILTIN_SUBPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30320 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsubv2df3_round, "__builtin_ia32_subsd_round", IX86_BUILTIN_SUBSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30321 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsubv4sf3_round, "__builtin_ia32_subss_round", IX86_BUILTIN_SUBSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30322 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2si_round, "__builtin_ia32_vcvtsd2si32", IX86_BUILTIN_VCVTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
30323 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq_round, "__builtin_ia32_vcvtsd2si64", IX86_BUILTIN_VCVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
30324 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtsd2usi_round, "__builtin_ia32_vcvtsd2usi32", IX86_BUILTIN_VCVTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
30325 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtsd2usiq_round, "__builtin_ia32_vcvtsd2usi64", IX86_BUILTIN_VCVTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
30326 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtss2si_round, "__builtin_ia32_vcvtss2si32", IX86_BUILTIN_VCVTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
30327 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq_round, "__builtin_ia32_vcvtss2si64", IX86_BUILTIN_VCVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
30328 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtss2usi_round, "__builtin_ia32_vcvtss2usi32", IX86_BUILTIN_VCVTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
30329 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtss2usiq_round, "__builtin_ia32_vcvtss2usi64", IX86_BUILTIN_VCVTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
30330 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvttsd2si_round, "__builtin_ia32_vcvttsd2si32", IX86_BUILTIN_VCVTTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
30331 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq_round, "__builtin_ia32_vcvttsd2si64", IX86_BUILTIN_VCVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
30332 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttsd2usi_round, "__builtin_ia32_vcvttsd2usi32", IX86_BUILTIN_VCVTTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
30333 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttsd2usiq_round, "__builtin_ia32_vcvttsd2usi64", IX86_BUILTIN_VCVTTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
30334 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvttss2si_round, "__builtin_ia32_vcvttss2si32", IX86_BUILTIN_VCVTTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
30335 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq_round, "__builtin_ia32_vcvttss2si64", IX86_BUILTIN_VCVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
30336 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttss2usi_round, "__builtin_ia32_vcvttss2usi32", IX86_BUILTIN_VCVTTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
30337 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttss2usiq_round, "__builtin_ia32_vcvttss2usi64", IX86_BUILTIN_VCVTTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
30338 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask_round, "__builtin_ia32_vfmaddpd512_mask", IX86_BUILTIN_VFMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30339 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask3_round, "__builtin_ia32_vfmaddpd512_mask3", IX86_BUILTIN_VFMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30340 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_maskz_round, "__builtin_ia32_vfmaddpd512_maskz", IX86_BUILTIN_VFMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30341 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask_round, "__builtin_ia32_vfmaddps512_mask", IX86_BUILTIN_VFMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30342 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask3_round, "__builtin_ia32_vfmaddps512_mask3", IX86_BUILTIN_VFMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30343 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_maskz_round, "__builtin_ia32_vfmaddps512_maskz", IX86_BUILTIN_VFMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30344 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v2df_round, "__builtin_ia32_vfmaddsd3_round", IX86_BUILTIN_VFMADDSD3_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_INT },
30345 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v4sf_round, "__builtin_ia32_vfmaddss3_round", IX86_BUILTIN_VFMADDSS3_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_INT },
30346 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask_round, "__builtin_ia32_vfmaddsubpd512_mask", IX86_BUILTIN_VFMADDSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30347 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask3_round, "__builtin_ia32_vfmaddsubpd512_mask3", IX86_BUILTIN_VFMADDSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30348 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_maskz_round, "__builtin_ia32_vfmaddsubpd512_maskz", IX86_BUILTIN_VFMADDSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30349 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask_round, "__builtin_ia32_vfmaddsubps512_mask", IX86_BUILTIN_VFMADDSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30350 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask3_round, "__builtin_ia32_vfmaddsubps512_mask3", IX86_BUILTIN_VFMADDSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30351 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_maskz_round, "__builtin_ia32_vfmaddsubps512_maskz", IX86_BUILTIN_VFMADDSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30352 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v8df_mask3_round, "__builtin_ia32_vfmsubaddpd512_mask3", IX86_BUILTIN_VFMSUBADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30353 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v16sf_mask3_round, "__builtin_ia32_vfmsubaddps512_mask3", IX86_BUILTIN_VFMSUBADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30354 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v8df_mask3_round, "__builtin_ia32_vfmsubpd512_mask3", IX86_BUILTIN_VFMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30355 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30356 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v8df_mask_round, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30357 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30358 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30359 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30360 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30361 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask3_round, "__builtin_ia32_vfnmsubps512_mask3", IX86_BUILTIN_VFNMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30362
30363 /* AVX512ER */
30364 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v8df_mask_round, "__builtin_ia32_exp2pd_mask", IX86_BUILTIN_EXP2PD_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30365 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf_mask_round, "__builtin_ia32_exp2ps_mask", IX86_BUILTIN_EXP2PS_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30366 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v8df_mask_round, "__builtin_ia32_rcp28pd_mask", IX86_BUILTIN_RCP28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30367 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v16sf_mask_round, "__builtin_ia32_rcp28ps_mask", IX86_BUILTIN_RCP28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30368 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v2df_round, "__builtin_ia32_rcp28sd_round", IX86_BUILTIN_RCP28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30369 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v4sf_round, "__builtin_ia32_rcp28ss_round", IX86_BUILTIN_RCP28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30370 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v8df_mask_round, "__builtin_ia32_rsqrt28pd_mask", IX86_BUILTIN_RSQRT28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30371 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v16sf_mask_round, "__builtin_ia32_rsqrt28ps_mask", IX86_BUILTIN_RSQRT28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30372 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v2df_round, "__builtin_ia32_rsqrt28sd_round", IX86_BUILTIN_RSQRT28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30373 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v4sf_round, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30374 };
30375
30376 /* FMA4 and XOP. */
30377 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
30378 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
30379 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
30380 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
30381 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
30382 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
30383 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
30384 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
30385 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
30386 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
30387 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
30388 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
30389 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
30390 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
30391 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
30392 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
30393 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
30394 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
30395 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
30396 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
30397 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
30398 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
30399 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
30400 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
30401 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
30402 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
30403 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
30404 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
30405 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
30406 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
30407 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
30408 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
30409 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
30410 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
30411 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
30412 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
30413 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
30414 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
30415 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
30416 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
30417 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
30418 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
30419 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
30420 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
30421 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
30422 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
30423 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
30424 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
30425 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
30426 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
30427 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
30428 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
30429
30430 static const struct builtin_description bdesc_multi_arg[] =
30431 {
30432 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
30433 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
30434 UNKNOWN, (int)MULTI_ARG_3_SF },
30435 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
30436 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
30437 UNKNOWN, (int)MULTI_ARG_3_DF },
30438
30439 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf,
30440 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3,
30441 UNKNOWN, (int)MULTI_ARG_3_SF },
30442 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df,
30443 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3,
30444 UNKNOWN, (int)MULTI_ARG_3_DF },
30445
30446 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
30447 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
30448 UNKNOWN, (int)MULTI_ARG_3_SF },
30449 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
30450 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
30451 UNKNOWN, (int)MULTI_ARG_3_DF },
30452 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
30453 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
30454 UNKNOWN, (int)MULTI_ARG_3_SF2 },
30455 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
30456 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
30457 UNKNOWN, (int)MULTI_ARG_3_DF2 },
30458
30459 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
30460 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
30461 UNKNOWN, (int)MULTI_ARG_3_SF },
30462 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
30463 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
30464 UNKNOWN, (int)MULTI_ARG_3_DF },
30465 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
30466 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
30467 UNKNOWN, (int)MULTI_ARG_3_SF2 },
30468 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
30469 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
30470 UNKNOWN, (int)MULTI_ARG_3_DF2 },
30471
30472 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
30473 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
30474 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
30475 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
30476 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
30477 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
30478 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
30479
30480 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
30481 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
30482 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
30483 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
30484 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
30485 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
30486 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
30487
30488 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
30489
30490 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
30491 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
30492 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30493 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30494 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
30495 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
30496 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30497 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30498 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30499 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30500 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30501 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30502
30503 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
30504 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
30505 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
30506 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
30507 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
30508 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
30509 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
30510 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
30511 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
30512 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
30513 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
30514 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
30515 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
30516 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
30517 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
30518 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
30519
30520 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_1_SF },
30521 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_1_DF },
30522 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
30523 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
30524 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
30525 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
30526
30527 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
30528 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
30529 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
30530 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
30531 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
30532 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
30533 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
30534 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
30535 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
30536 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
30537 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
30538 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
30539 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
30540 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
30541 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
30542
30543 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
30544 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
30545 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
30546 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
30547 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
30548 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
30549 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
30550
30551 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
30552 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
30553 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
30554 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
30555 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
30556 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
30557 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
30558
30559 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
30560 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
30561 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
30562 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
30563 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
30564 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
30565 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
30566
30567 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
30568 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
30569 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
30570 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
30571 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
30572 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
30573 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
30574
30575 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
30576 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
30577 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
30578 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
30579 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
30580 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
30581 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
30582
30583 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
30584 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
30585 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
30586 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
30587 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
30588 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
30589 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
30590
30591 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
30592 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
30593 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
30594 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
30595 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
30596 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
30597 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
30598
30599 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
30600 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
30601 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
30602 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
30603 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
30604 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
30605 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
30606
30607 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
30608 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
30609 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
30610 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
30611 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
30612 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
30613 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
30614 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
30615
30616 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
30617 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
30618 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
30619 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
30620 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
30621 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
30622 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
30623 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
30624
30625 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
30626 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
30627 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
30628 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
30629
30630 };
30631 \f
30632 /* TM vector builtins. */
30633
30634 /* Reuse the existing x86-specific `struct builtin_description' cause
30635 we're lazy. Add casts to make them fit. */
30636 static const struct builtin_description bdesc_tm[] =
30637 {
30638 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30639 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30640 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30641 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30642 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30643 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30644 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30645
30646 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30647 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30648 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30649 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30650 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30651 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30652 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30653
30654 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30655 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30656 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30657 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30658 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30659 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30660 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30661
30662 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
30663 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
30664 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
30665 };
30666
30667 /* TM callbacks. */
30668
30669 /* Return the builtin decl needed to load a vector of TYPE. */
30670
30671 static tree
30672 ix86_builtin_tm_load (tree type)
30673 {
30674 if (TREE_CODE (type) == VECTOR_TYPE)
30675 {
30676 switch (tree_to_uhwi (TYPE_SIZE (type)))
30677 {
30678 case 64:
30679 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64);
30680 case 128:
30681 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128);
30682 case 256:
30683 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256);
30684 }
30685 }
30686 return NULL_TREE;
30687 }
30688
30689 /* Return the builtin decl needed to store a vector of TYPE. */
30690
30691 static tree
30692 ix86_builtin_tm_store (tree type)
30693 {
30694 if (TREE_CODE (type) == VECTOR_TYPE)
30695 {
30696 switch (tree_to_uhwi (TYPE_SIZE (type)))
30697 {
30698 case 64:
30699 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64);
30700 case 128:
30701 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128);
30702 case 256:
30703 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256);
30704 }
30705 }
30706 return NULL_TREE;
30707 }
30708 \f
30709 /* Initialize the transactional memory vector load/store builtins. */
30710
30711 static void
30712 ix86_init_tm_builtins (void)
30713 {
30714 enum ix86_builtin_func_type ftype;
30715 const struct builtin_description *d;
30716 size_t i;
30717 tree decl;
30718 tree attrs_load, attrs_type_load, attrs_store, attrs_type_store;
30719 tree attrs_log, attrs_type_log;
30720
30721 if (!flag_tm)
30722 return;
30723
30724 /* If there are no builtins defined, we must be compiling in a
30725 language without trans-mem support. */
30726 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1))
30727 return;
30728
30729 /* Use whatever attributes a normal TM load has. */
30730 decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
30731 attrs_load = DECL_ATTRIBUTES (decl);
30732 attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30733 /* Use whatever attributes a normal TM store has. */
30734 decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1);
30735 attrs_store = DECL_ATTRIBUTES (decl);
30736 attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30737 /* Use whatever attributes a normal TM log has. */
30738 decl = builtin_decl_explicit (BUILT_IN_TM_LOG);
30739 attrs_log = DECL_ATTRIBUTES (decl);
30740 attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30741
30742 for (i = 0, d = bdesc_tm;
30743 i < ARRAY_SIZE (bdesc_tm);
30744 i++, d++)
30745 {
30746 if ((d->mask & ix86_isa_flags) != 0
30747 || (lang_hooks.builtin_function
30748 == lang_hooks.builtin_function_ext_scope))
30749 {
30750 tree type, attrs, attrs_type;
30751 enum built_in_function code = (enum built_in_function) d->code;
30752
30753 ftype = (enum ix86_builtin_func_type) d->flag;
30754 type = ix86_get_builtin_func_type (ftype);
30755
30756 if (BUILTIN_TM_LOAD_P (code))
30757 {
30758 attrs = attrs_load;
30759 attrs_type = attrs_type_load;
30760 }
30761 else if (BUILTIN_TM_STORE_P (code))
30762 {
30763 attrs = attrs_store;
30764 attrs_type = attrs_type_store;
30765 }
30766 else
30767 {
30768 attrs = attrs_log;
30769 attrs_type = attrs_type_log;
30770 }
30771 decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL,
30772 /* The builtin without the prefix for
30773 calling it directly. */
30774 d->name + strlen ("__builtin_"),
30775 attrs);
30776 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
30777 set the TYPE_ATTRIBUTES. */
30778 decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN);
30779
30780 set_builtin_decl (code, decl, false);
30781 }
30782 }
30783 }
30784
30785 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
30786 in the current target ISA to allow the user to compile particular modules
30787 with different target specific options that differ from the command line
30788 options. */
30789 static void
30790 ix86_init_mmx_sse_builtins (void)
30791 {
30792 const struct builtin_description * d;
30793 enum ix86_builtin_func_type ftype;
30794 size_t i;
30795
30796 /* Add all special builtins with variable number of operands. */
30797 for (i = 0, d = bdesc_special_args;
30798 i < ARRAY_SIZE (bdesc_special_args);
30799 i++, d++)
30800 {
30801 if (d->name == 0)
30802 continue;
30803
30804 ftype = (enum ix86_builtin_func_type) d->flag;
30805 def_builtin (d->mask, d->name, ftype, d->code);
30806 }
30807
30808 /* Add all builtins with variable number of operands. */
30809 for (i = 0, d = bdesc_args;
30810 i < ARRAY_SIZE (bdesc_args);
30811 i++, d++)
30812 {
30813 if (d->name == 0)
30814 continue;
30815
30816 ftype = (enum ix86_builtin_func_type) d->flag;
30817 def_builtin_const (d->mask, d->name, ftype, d->code);
30818 }
30819
30820 /* Add all builtins with rounding. */
30821 for (i = 0, d = bdesc_round_args;
30822 i < ARRAY_SIZE (bdesc_round_args);
30823 i++, d++)
30824 {
30825 if (d->name == 0)
30826 continue;
30827
30828 ftype = (enum ix86_builtin_func_type) d->flag;
30829 def_builtin_const (d->mask, d->name, ftype, d->code);
30830 }
30831
30832 /* pcmpestr[im] insns. */
30833 for (i = 0, d = bdesc_pcmpestr;
30834 i < ARRAY_SIZE (bdesc_pcmpestr);
30835 i++, d++)
30836 {
30837 if (d->code == IX86_BUILTIN_PCMPESTRM128)
30838 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
30839 else
30840 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
30841 def_builtin_const (d->mask, d->name, ftype, d->code);
30842 }
30843
30844 /* pcmpistr[im] insns. */
30845 for (i = 0, d = bdesc_pcmpistr;
30846 i < ARRAY_SIZE (bdesc_pcmpistr);
30847 i++, d++)
30848 {
30849 if (d->code == IX86_BUILTIN_PCMPISTRM128)
30850 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
30851 else
30852 ftype = INT_FTYPE_V16QI_V16QI_INT;
30853 def_builtin_const (d->mask, d->name, ftype, d->code);
30854 }
30855
30856 /* comi/ucomi insns. */
30857 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
30858 {
30859 if (d->mask == OPTION_MASK_ISA_SSE2)
30860 ftype = INT_FTYPE_V2DF_V2DF;
30861 else
30862 ftype = INT_FTYPE_V4SF_V4SF;
30863 def_builtin_const (d->mask, d->name, ftype, d->code);
30864 }
30865
30866 /* SSE */
30867 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
30868 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
30869 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
30870 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
30871
30872 /* SSE or 3DNow!A */
30873 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
30874 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
30875 IX86_BUILTIN_MASKMOVQ);
30876
30877 /* SSE2 */
30878 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
30879 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
30880
30881 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
30882 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
30883 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
30884 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
30885
30886 /* SSE3. */
30887 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
30888 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
30889 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
30890 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
30891
30892 /* AES */
30893 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
30894 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
30895 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
30896 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
30897 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
30898 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
30899 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
30900 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
30901 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
30902 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
30903 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
30904 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
30905
30906 /* PCLMUL */
30907 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
30908 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
30909
30910 /* RDRND */
30911 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
30912 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
30913 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
30914 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
30915 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
30916 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
30917 IX86_BUILTIN_RDRAND64_STEP);
30918
30919 /* AVX2 */
30920 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
30921 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
30922 IX86_BUILTIN_GATHERSIV2DF);
30923
30924 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
30925 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
30926 IX86_BUILTIN_GATHERSIV4DF);
30927
30928 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
30929 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
30930 IX86_BUILTIN_GATHERDIV2DF);
30931
30932 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
30933 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
30934 IX86_BUILTIN_GATHERDIV4DF);
30935
30936 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
30937 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
30938 IX86_BUILTIN_GATHERSIV4SF);
30939
30940 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
30941 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
30942 IX86_BUILTIN_GATHERSIV8SF);
30943
30944 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
30945 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
30946 IX86_BUILTIN_GATHERDIV4SF);
30947
30948 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
30949 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
30950 IX86_BUILTIN_GATHERDIV8SF);
30951
30952 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
30953 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
30954 IX86_BUILTIN_GATHERSIV2DI);
30955
30956 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
30957 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
30958 IX86_BUILTIN_GATHERSIV4DI);
30959
30960 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
30961 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
30962 IX86_BUILTIN_GATHERDIV2DI);
30963
30964 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
30965 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
30966 IX86_BUILTIN_GATHERDIV4DI);
30967
30968 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
30969 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
30970 IX86_BUILTIN_GATHERSIV4SI);
30971
30972 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
30973 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
30974 IX86_BUILTIN_GATHERSIV8SI);
30975
30976 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
30977 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
30978 IX86_BUILTIN_GATHERDIV4SI);
30979
30980 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
30981 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
30982 IX86_BUILTIN_GATHERDIV8SI);
30983
30984 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ",
30985 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
30986 IX86_BUILTIN_GATHERALTSIV4DF);
30987
30988 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ",
30989 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
30990 IX86_BUILTIN_GATHERALTDIV8SF);
30991
30992 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ",
30993 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
30994 IX86_BUILTIN_GATHERALTSIV4DI);
30995
30996 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ",
30997 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
30998 IX86_BUILTIN_GATHERALTDIV8SI);
30999
31000 /* AVX512F */
31001 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16sf",
31002 V16SF_FTYPE_V16SF_PCFLOAT_V16SI_HI_INT,
31003 IX86_BUILTIN_GATHER3SIV16SF);
31004
31005 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8df",
31006 V8DF_FTYPE_V8DF_PCDOUBLE_V8SI_QI_INT,
31007 IX86_BUILTIN_GATHER3SIV8DF);
31008
31009 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16sf",
31010 V8SF_FTYPE_V8SF_PCFLOAT_V8DI_QI_INT,
31011 IX86_BUILTIN_GATHER3DIV16SF);
31012
31013 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8df",
31014 V8DF_FTYPE_V8DF_PCDOUBLE_V8DI_QI_INT,
31015 IX86_BUILTIN_GATHER3DIV8DF);
31016
31017 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16si",
31018 V16SI_FTYPE_V16SI_PCINT_V16SI_HI_INT,
31019 IX86_BUILTIN_GATHER3SIV16SI);
31020
31021 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8di",
31022 V8DI_FTYPE_V8DI_PCINT64_V8SI_QI_INT,
31023 IX86_BUILTIN_GATHER3SIV8DI);
31024
31025 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16si",
31026 V8SI_FTYPE_V8SI_PCINT_V8DI_QI_INT,
31027 IX86_BUILTIN_GATHER3DIV16SI);
31028
31029 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8di",
31030 V8DI_FTYPE_V8DI_PCINT64_V8DI_QI_INT,
31031 IX86_BUILTIN_GATHER3DIV8DI);
31032
31033 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8df ",
31034 V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT,
31035 IX86_BUILTIN_GATHER3ALTSIV8DF);
31036
31037 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8sf ",
31038 V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT,
31039 IX86_BUILTIN_GATHER3ALTDIV16SF);
31040
31041 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8di ",
31042 V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT,
31043 IX86_BUILTIN_GATHER3ALTSIV8DI);
31044
31045 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8si ",
31046 V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT,
31047 IX86_BUILTIN_GATHER3ALTDIV16SI);
31048
31049 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16sf",
31050 VOID_FTYPE_PFLOAT_HI_V16SI_V16SF_INT,
31051 IX86_BUILTIN_SCATTERSIV16SF);
31052
31053 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8df",
31054 VOID_FTYPE_PDOUBLE_QI_V8SI_V8DF_INT,
31055 IX86_BUILTIN_SCATTERSIV8DF);
31056
31057 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16sf",
31058 VOID_FTYPE_PFLOAT_QI_V8DI_V8SF_INT,
31059 IX86_BUILTIN_SCATTERDIV16SF);
31060
31061 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8df",
31062 VOID_FTYPE_PDOUBLE_QI_V8DI_V8DF_INT,
31063 IX86_BUILTIN_SCATTERDIV8DF);
31064
31065 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16si",
31066 VOID_FTYPE_PINT_HI_V16SI_V16SI_INT,
31067 IX86_BUILTIN_SCATTERSIV16SI);
31068
31069 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8di",
31070 VOID_FTYPE_PLONGLONG_QI_V8SI_V8DI_INT,
31071 IX86_BUILTIN_SCATTERSIV8DI);
31072
31073 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16si",
31074 VOID_FTYPE_PINT_QI_V8DI_V8SI_INT,
31075 IX86_BUILTIN_SCATTERDIV16SI);
31076
31077 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8di",
31078 VOID_FTYPE_PLONGLONG_QI_V8DI_V8DI_INT,
31079 IX86_BUILTIN_SCATTERDIV8DI);
31080
31081 /* AVX512PF */
31082 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdpd",
31083 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
31084 IX86_BUILTIN_GATHERPFDPD);
31085 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdps",
31086 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
31087 IX86_BUILTIN_GATHERPFDPS);
31088 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqpd",
31089 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
31090 IX86_BUILTIN_GATHERPFQPD);
31091 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqps",
31092 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
31093 IX86_BUILTIN_GATHERPFQPS);
31094 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdpd",
31095 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
31096 IX86_BUILTIN_SCATTERPFDPD);
31097 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdps",
31098 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
31099 IX86_BUILTIN_SCATTERPFDPS);
31100 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqpd",
31101 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
31102 IX86_BUILTIN_SCATTERPFQPD);
31103 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqps",
31104 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
31105 IX86_BUILTIN_SCATTERPFQPS);
31106
31107 /* SHA */
31108 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg1",
31109 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG1);
31110 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg2",
31111 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG2);
31112 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1nexte",
31113 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1NEXTE);
31114 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1rnds4",
31115 V4SI_FTYPE_V4SI_V4SI_INT, IX86_BUILTIN_SHA1RNDS4);
31116 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg1",
31117 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG1);
31118 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg2",
31119 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG2);
31120 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256rnds2",
31121 V4SI_FTYPE_V4SI_V4SI_V4SI, IX86_BUILTIN_SHA256RNDS2);
31122
31123 /* RTM. */
31124 def_builtin (OPTION_MASK_ISA_RTM, "__builtin_ia32_xabort",
31125 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT);
31126
31127 /* MMX access to the vec_init patterns. */
31128 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
31129 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
31130
31131 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
31132 V4HI_FTYPE_HI_HI_HI_HI,
31133 IX86_BUILTIN_VEC_INIT_V4HI);
31134
31135 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
31136 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
31137 IX86_BUILTIN_VEC_INIT_V8QI);
31138
31139 /* Access to the vec_extract patterns. */
31140 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
31141 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
31142 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
31143 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
31144 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
31145 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
31146 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
31147 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
31148 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
31149 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
31150
31151 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
31152 "__builtin_ia32_vec_ext_v4hi",
31153 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
31154
31155 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
31156 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
31157
31158 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
31159 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
31160
31161 /* Access to the vec_set patterns. */
31162 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
31163 "__builtin_ia32_vec_set_v2di",
31164 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
31165
31166 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
31167 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
31168
31169 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
31170 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
31171
31172 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
31173 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
31174
31175 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
31176 "__builtin_ia32_vec_set_v4hi",
31177 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
31178
31179 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
31180 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
31181
31182 /* RDSEED */
31183 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_hi_step",
31184 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDSEED16_STEP);
31185 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_si_step",
31186 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDSEED32_STEP);
31187 def_builtin (OPTION_MASK_ISA_RDSEED | OPTION_MASK_ISA_64BIT,
31188 "__builtin_ia32_rdseed_di_step",
31189 INT_FTYPE_PULONGLONG, IX86_BUILTIN_RDSEED64_STEP);
31190
31191 /* ADCX */
31192 def_builtin (0, "__builtin_ia32_addcarryx_u32",
31193 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_ADDCARRYX32);
31194 def_builtin (OPTION_MASK_ISA_64BIT,
31195 "__builtin_ia32_addcarryx_u64",
31196 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
31197 IX86_BUILTIN_ADDCARRYX64);
31198
31199 /* SBB */
31200 def_builtin (0, "__builtin_ia32_sbb_u32",
31201 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_SBB32);
31202 def_builtin (OPTION_MASK_ISA_64BIT,
31203 "__builtin_ia32_sbb_u64",
31204 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
31205 IX86_BUILTIN_SBB64);
31206
31207 /* Read/write FLAGS. */
31208 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u32",
31209 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
31210 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u64",
31211 UINT64_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
31212 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u32",
31213 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_WRITE_FLAGS);
31214 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u64",
31215 VOID_FTYPE_UINT64, IX86_BUILTIN_WRITE_FLAGS);
31216
31217 /* CLFLUSHOPT. */
31218 def_builtin (OPTION_MASK_ISA_CLFLUSHOPT, "__builtin_ia32_clflushopt",
31219 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSHOPT);
31220
31221 /* Add FMA4 multi-arg argument instructions */
31222 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
31223 {
31224 if (d->name == 0)
31225 continue;
31226
31227 ftype = (enum ix86_builtin_func_type) d->flag;
31228 def_builtin_const (d->mask, d->name, ftype, d->code);
31229 }
31230 }
31231
31232 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
31233 to return a pointer to VERSION_DECL if the outcome of the expression
31234 formed by PREDICATE_CHAIN is true. This function will be called during
31235 version dispatch to decide which function version to execute. It returns
31236 the basic block at the end, to which more conditions can be added. */
31237
31238 static basic_block
31239 add_condition_to_bb (tree function_decl, tree version_decl,
31240 tree predicate_chain, basic_block new_bb)
31241 {
31242 gimple return_stmt;
31243 tree convert_expr, result_var;
31244 gimple convert_stmt;
31245 gimple call_cond_stmt;
31246 gimple if_else_stmt;
31247
31248 basic_block bb1, bb2, bb3;
31249 edge e12, e23;
31250
31251 tree cond_var, and_expr_var = NULL_TREE;
31252 gimple_seq gseq;
31253
31254 tree predicate_decl, predicate_arg;
31255
31256 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
31257
31258 gcc_assert (new_bb != NULL);
31259 gseq = bb_seq (new_bb);
31260
31261
31262 convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
31263 build_fold_addr_expr (version_decl));
31264 result_var = create_tmp_var (ptr_type_node, NULL);
31265 convert_stmt = gimple_build_assign (result_var, convert_expr);
31266 return_stmt = gimple_build_return (result_var);
31267
31268 if (predicate_chain == NULL_TREE)
31269 {
31270 gimple_seq_add_stmt (&gseq, convert_stmt);
31271 gimple_seq_add_stmt (&gseq, return_stmt);
31272 set_bb_seq (new_bb, gseq);
31273 gimple_set_bb (convert_stmt, new_bb);
31274 gimple_set_bb (return_stmt, new_bb);
31275 pop_cfun ();
31276 return new_bb;
31277 }
31278
31279 while (predicate_chain != NULL)
31280 {
31281 cond_var = create_tmp_var (integer_type_node, NULL);
31282 predicate_decl = TREE_PURPOSE (predicate_chain);
31283 predicate_arg = TREE_VALUE (predicate_chain);
31284 call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
31285 gimple_call_set_lhs (call_cond_stmt, cond_var);
31286
31287 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
31288 gimple_set_bb (call_cond_stmt, new_bb);
31289 gimple_seq_add_stmt (&gseq, call_cond_stmt);
31290
31291 predicate_chain = TREE_CHAIN (predicate_chain);
31292
31293 if (and_expr_var == NULL)
31294 and_expr_var = cond_var;
31295 else
31296 {
31297 gimple assign_stmt;
31298 /* Use MIN_EXPR to check if any integer is zero?.
31299 and_expr_var = min_expr <cond_var, and_expr_var> */
31300 assign_stmt = gimple_build_assign (and_expr_var,
31301 build2 (MIN_EXPR, integer_type_node,
31302 cond_var, and_expr_var));
31303
31304 gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
31305 gimple_set_bb (assign_stmt, new_bb);
31306 gimple_seq_add_stmt (&gseq, assign_stmt);
31307 }
31308 }
31309
31310 if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
31311 integer_zero_node,
31312 NULL_TREE, NULL_TREE);
31313 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
31314 gimple_set_bb (if_else_stmt, new_bb);
31315 gimple_seq_add_stmt (&gseq, if_else_stmt);
31316
31317 gimple_seq_add_stmt (&gseq, convert_stmt);
31318 gimple_seq_add_stmt (&gseq, return_stmt);
31319 set_bb_seq (new_bb, gseq);
31320
31321 bb1 = new_bb;
31322 e12 = split_block (bb1, if_else_stmt);
31323 bb2 = e12->dest;
31324 e12->flags &= ~EDGE_FALLTHRU;
31325 e12->flags |= EDGE_TRUE_VALUE;
31326
31327 e23 = split_block (bb2, return_stmt);
31328
31329 gimple_set_bb (convert_stmt, bb2);
31330 gimple_set_bb (return_stmt, bb2);
31331
31332 bb3 = e23->dest;
31333 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
31334
31335 remove_edge (e23);
31336 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
31337
31338 pop_cfun ();
31339
31340 return bb3;
31341 }
31342
31343 /* This parses the attribute arguments to target in DECL and determines
31344 the right builtin to use to match the platform specification.
31345 It returns the priority value for this version decl. If PREDICATE_LIST
31346 is not NULL, it stores the list of cpu features that need to be checked
31347 before dispatching this function. */
31348
31349 static unsigned int
31350 get_builtin_code_for_version (tree decl, tree *predicate_list)
31351 {
31352 tree attrs;
31353 struct cl_target_option cur_target;
31354 tree target_node;
31355 struct cl_target_option *new_target;
31356 const char *arg_str = NULL;
31357 const char *attrs_str = NULL;
31358 char *tok_str = NULL;
31359 char *token;
31360
31361 /* Priority of i386 features, greater value is higher priority. This is
31362 used to decide the order in which function dispatch must happen. For
31363 instance, a version specialized for SSE4.2 should be checked for dispatch
31364 before a version for SSE3, as SSE4.2 implies SSE3. */
31365 enum feature_priority
31366 {
31367 P_ZERO = 0,
31368 P_MMX,
31369 P_SSE,
31370 P_SSE2,
31371 P_SSE3,
31372 P_SSSE3,
31373 P_PROC_SSSE3,
31374 P_SSE4_A,
31375 P_PROC_SSE4_A,
31376 P_SSE4_1,
31377 P_SSE4_2,
31378 P_PROC_SSE4_2,
31379 P_POPCNT,
31380 P_AVX,
31381 P_PROC_AVX,
31382 P_FMA4,
31383 P_XOP,
31384 P_PROC_XOP,
31385 P_FMA,
31386 P_PROC_FMA,
31387 P_AVX2,
31388 P_PROC_AVX2
31389 };
31390
31391 enum feature_priority priority = P_ZERO;
31392
31393 /* These are the target attribute strings for which a dispatcher is
31394 available, from fold_builtin_cpu. */
31395
31396 static struct _feature_list
31397 {
31398 const char *const name;
31399 const enum feature_priority priority;
31400 }
31401 const feature_list[] =
31402 {
31403 {"mmx", P_MMX},
31404 {"sse", P_SSE},
31405 {"sse2", P_SSE2},
31406 {"sse3", P_SSE3},
31407 {"sse4a", P_SSE4_A},
31408 {"ssse3", P_SSSE3},
31409 {"sse4.1", P_SSE4_1},
31410 {"sse4.2", P_SSE4_2},
31411 {"popcnt", P_POPCNT},
31412 {"avx", P_AVX},
31413 {"fma4", P_FMA4},
31414 {"xop", P_XOP},
31415 {"fma", P_FMA},
31416 {"avx2", P_AVX2}
31417 };
31418
31419
31420 static unsigned int NUM_FEATURES
31421 = sizeof (feature_list) / sizeof (struct _feature_list);
31422
31423 unsigned int i;
31424
31425 tree predicate_chain = NULL_TREE;
31426 tree predicate_decl, predicate_arg;
31427
31428 attrs = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
31429 gcc_assert (attrs != NULL);
31430
31431 attrs = TREE_VALUE (TREE_VALUE (attrs));
31432
31433 gcc_assert (TREE_CODE (attrs) == STRING_CST);
31434 attrs_str = TREE_STRING_POINTER (attrs);
31435
31436 /* Return priority zero for default function. */
31437 if (strcmp (attrs_str, "default") == 0)
31438 return 0;
31439
31440 /* Handle arch= if specified. For priority, set it to be 1 more than
31441 the best instruction set the processor can handle. For instance, if
31442 there is a version for atom and a version for ssse3 (the highest ISA
31443 priority for atom), the atom version must be checked for dispatch
31444 before the ssse3 version. */
31445 if (strstr (attrs_str, "arch=") != NULL)
31446 {
31447 cl_target_option_save (&cur_target, &global_options);
31448 target_node = ix86_valid_target_attribute_tree (attrs, &global_options,
31449 &global_options_set);
31450
31451 gcc_assert (target_node);
31452 new_target = TREE_TARGET_OPTION (target_node);
31453 gcc_assert (new_target);
31454
31455 if (new_target->arch_specified && new_target->arch > 0)
31456 {
31457 switch (new_target->arch)
31458 {
31459 case PROCESSOR_CORE2:
31460 arg_str = "core2";
31461 priority = P_PROC_SSSE3;
31462 break;
31463 case PROCESSOR_NEHALEM:
31464 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AES)
31465 arg_str = "westmere";
31466 else
31467 /* We translate "arch=corei7" and "arch=nehalem" to
31468 "corei7" so that it will be mapped to M_INTEL_COREI7
31469 as cpu type to cover all M_INTEL_COREI7_XXXs. */
31470 arg_str = "corei7";
31471 priority = P_PROC_SSE4_2;
31472 break;
31473 case PROCESSOR_SANDYBRIDGE:
31474 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_F16C)
31475 arg_str = "ivybridge";
31476 else
31477 arg_str = "sandybridge";
31478 priority = P_PROC_AVX;
31479 break;
31480 case PROCESSOR_HASWELL:
31481 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_ADX)
31482 arg_str = "broadwell";
31483 else
31484 arg_str = "haswell";
31485 priority = P_PROC_AVX2;
31486 break;
31487 case PROCESSOR_BONNELL:
31488 arg_str = "bonnell";
31489 priority = P_PROC_SSSE3;
31490 break;
31491 case PROCESSOR_SILVERMONT:
31492 arg_str = "silvermont";
31493 priority = P_PROC_SSE4_2;
31494 break;
31495 case PROCESSOR_AMDFAM10:
31496 arg_str = "amdfam10h";
31497 priority = P_PROC_SSE4_A;
31498 break;
31499 case PROCESSOR_BTVER1:
31500 arg_str = "btver1";
31501 priority = P_PROC_SSE4_A;
31502 break;
31503 case PROCESSOR_BTVER2:
31504 arg_str = "btver2";
31505 priority = P_PROC_AVX;
31506 break;
31507 case PROCESSOR_BDVER1:
31508 arg_str = "bdver1";
31509 priority = P_PROC_XOP;
31510 break;
31511 case PROCESSOR_BDVER2:
31512 arg_str = "bdver2";
31513 priority = P_PROC_FMA;
31514 break;
31515 case PROCESSOR_BDVER3:
31516 arg_str = "bdver3";
31517 priority = P_PROC_FMA;
31518 break;
31519 case PROCESSOR_BDVER4:
31520 arg_str = "bdver4";
31521 priority = P_PROC_AVX2;
31522 break;
31523 }
31524 }
31525
31526 cl_target_option_restore (&global_options, &cur_target);
31527
31528 if (predicate_list && arg_str == NULL)
31529 {
31530 error_at (DECL_SOURCE_LOCATION (decl),
31531 "No dispatcher found for the versioning attributes");
31532 return 0;
31533 }
31534
31535 if (predicate_list)
31536 {
31537 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS];
31538 /* For a C string literal the length includes the trailing NULL. */
31539 predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
31540 predicate_chain = tree_cons (predicate_decl, predicate_arg,
31541 predicate_chain);
31542 }
31543 }
31544
31545 /* Process feature name. */
31546 tok_str = (char *) xmalloc (strlen (attrs_str) + 1);
31547 strcpy (tok_str, attrs_str);
31548 token = strtok (tok_str, ",");
31549 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_SUPPORTS];
31550
31551 while (token != NULL)
31552 {
31553 /* Do not process "arch=" */
31554 if (strncmp (token, "arch=", 5) == 0)
31555 {
31556 token = strtok (NULL, ",");
31557 continue;
31558 }
31559 for (i = 0; i < NUM_FEATURES; ++i)
31560 {
31561 if (strcmp (token, feature_list[i].name) == 0)
31562 {
31563 if (predicate_list)
31564 {
31565 predicate_arg = build_string_literal (
31566 strlen (feature_list[i].name) + 1,
31567 feature_list[i].name);
31568 predicate_chain = tree_cons (predicate_decl, predicate_arg,
31569 predicate_chain);
31570 }
31571 /* Find the maximum priority feature. */
31572 if (feature_list[i].priority > priority)
31573 priority = feature_list[i].priority;
31574
31575 break;
31576 }
31577 }
31578 if (predicate_list && i == NUM_FEATURES)
31579 {
31580 error_at (DECL_SOURCE_LOCATION (decl),
31581 "No dispatcher found for %s", token);
31582 return 0;
31583 }
31584 token = strtok (NULL, ",");
31585 }
31586 free (tok_str);
31587
31588 if (predicate_list && predicate_chain == NULL_TREE)
31589 {
31590 error_at (DECL_SOURCE_LOCATION (decl),
31591 "No dispatcher found for the versioning attributes : %s",
31592 attrs_str);
31593 return 0;
31594 }
31595 else if (predicate_list)
31596 {
31597 predicate_chain = nreverse (predicate_chain);
31598 *predicate_list = predicate_chain;
31599 }
31600
31601 return priority;
31602 }
31603
31604 /* This compares the priority of target features in function DECL1
31605 and DECL2. It returns positive value if DECL1 is higher priority,
31606 negative value if DECL2 is higher priority and 0 if they are the
31607 same. */
31608
31609 static int
31610 ix86_compare_version_priority (tree decl1, tree decl2)
31611 {
31612 unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
31613 unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
31614
31615 return (int)priority1 - (int)priority2;
31616 }
31617
31618 /* V1 and V2 point to function versions with different priorities
31619 based on the target ISA. This function compares their priorities. */
31620
31621 static int
31622 feature_compare (const void *v1, const void *v2)
31623 {
31624 typedef struct _function_version_info
31625 {
31626 tree version_decl;
31627 tree predicate_chain;
31628 unsigned int dispatch_priority;
31629 } function_version_info;
31630
31631 const function_version_info c1 = *(const function_version_info *)v1;
31632 const function_version_info c2 = *(const function_version_info *)v2;
31633 return (c2.dispatch_priority - c1.dispatch_priority);
31634 }
31635
31636 /* This function generates the dispatch function for
31637 multi-versioned functions. DISPATCH_DECL is the function which will
31638 contain the dispatch logic. FNDECLS are the function choices for
31639 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
31640 in DISPATCH_DECL in which the dispatch code is generated. */
31641
31642 static int
31643 dispatch_function_versions (tree dispatch_decl,
31644 void *fndecls_p,
31645 basic_block *empty_bb)
31646 {
31647 tree default_decl;
31648 gimple ifunc_cpu_init_stmt;
31649 gimple_seq gseq;
31650 int ix;
31651 tree ele;
31652 vec<tree> *fndecls;
31653 unsigned int num_versions = 0;
31654 unsigned int actual_versions = 0;
31655 unsigned int i;
31656
31657 struct _function_version_info
31658 {
31659 tree version_decl;
31660 tree predicate_chain;
31661 unsigned int dispatch_priority;
31662 }*function_version_info;
31663
31664 gcc_assert (dispatch_decl != NULL
31665 && fndecls_p != NULL
31666 && empty_bb != NULL);
31667
31668 /*fndecls_p is actually a vector. */
31669 fndecls = static_cast<vec<tree> *> (fndecls_p);
31670
31671 /* At least one more version other than the default. */
31672 num_versions = fndecls->length ();
31673 gcc_assert (num_versions >= 2);
31674
31675 function_version_info = (struct _function_version_info *)
31676 XNEWVEC (struct _function_version_info, (num_versions - 1));
31677
31678 /* The first version in the vector is the default decl. */
31679 default_decl = (*fndecls)[0];
31680
31681 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
31682
31683 gseq = bb_seq (*empty_bb);
31684 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
31685 constructors, so explicity call __builtin_cpu_init here. */
31686 ifunc_cpu_init_stmt = gimple_build_call_vec (
31687 ix86_builtins [(int) IX86_BUILTIN_CPU_INIT], vNULL);
31688 gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
31689 gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
31690 set_bb_seq (*empty_bb, gseq);
31691
31692 pop_cfun ();
31693
31694
31695 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
31696 {
31697 tree version_decl = ele;
31698 tree predicate_chain = NULL_TREE;
31699 unsigned int priority;
31700 /* Get attribute string, parse it and find the right predicate decl.
31701 The predicate function could be a lengthy combination of many
31702 features, like arch-type and various isa-variants. */
31703 priority = get_builtin_code_for_version (version_decl,
31704 &predicate_chain);
31705
31706 if (predicate_chain == NULL_TREE)
31707 continue;
31708
31709 function_version_info [actual_versions].version_decl = version_decl;
31710 function_version_info [actual_versions].predicate_chain
31711 = predicate_chain;
31712 function_version_info [actual_versions].dispatch_priority = priority;
31713 actual_versions++;
31714 }
31715
31716 /* Sort the versions according to descending order of dispatch priority. The
31717 priority is based on the ISA. This is not a perfect solution. There
31718 could still be ambiguity. If more than one function version is suitable
31719 to execute, which one should be dispatched? In future, allow the user
31720 to specify a dispatch priority next to the version. */
31721 qsort (function_version_info, actual_versions,
31722 sizeof (struct _function_version_info), feature_compare);
31723
31724 for (i = 0; i < actual_versions; ++i)
31725 *empty_bb = add_condition_to_bb (dispatch_decl,
31726 function_version_info[i].version_decl,
31727 function_version_info[i].predicate_chain,
31728 *empty_bb);
31729
31730 /* dispatch default version at the end. */
31731 *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
31732 NULL, *empty_bb);
31733
31734 free (function_version_info);
31735 return 0;
31736 }
31737
31738 /* Comparator function to be used in qsort routine to sort attribute
31739 specification strings to "target". */
31740
31741 static int
31742 attr_strcmp (const void *v1, const void *v2)
31743 {
31744 const char *c1 = *(char *const*)v1;
31745 const char *c2 = *(char *const*)v2;
31746 return strcmp (c1, c2);
31747 }
31748
31749 /* ARGLIST is the argument to target attribute. This function tokenizes
31750 the comma separated arguments, sorts them and returns a string which
31751 is a unique identifier for the comma separated arguments. It also
31752 replaces non-identifier characters "=,-" with "_". */
31753
31754 static char *
31755 sorted_attr_string (tree arglist)
31756 {
31757 tree arg;
31758 size_t str_len_sum = 0;
31759 char **args = NULL;
31760 char *attr_str, *ret_str;
31761 char *attr = NULL;
31762 unsigned int argnum = 1;
31763 unsigned int i;
31764
31765 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
31766 {
31767 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
31768 size_t len = strlen (str);
31769 str_len_sum += len + 1;
31770 if (arg != arglist)
31771 argnum++;
31772 for (i = 0; i < strlen (str); i++)
31773 if (str[i] == ',')
31774 argnum++;
31775 }
31776
31777 attr_str = XNEWVEC (char, str_len_sum);
31778 str_len_sum = 0;
31779 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
31780 {
31781 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
31782 size_t len = strlen (str);
31783 memcpy (attr_str + str_len_sum, str, len);
31784 attr_str[str_len_sum + len] = TREE_CHAIN (arg) ? ',' : '\0';
31785 str_len_sum += len + 1;
31786 }
31787
31788 /* Replace "=,-" with "_". */
31789 for (i = 0; i < strlen (attr_str); i++)
31790 if (attr_str[i] == '=' || attr_str[i]== '-')
31791 attr_str[i] = '_';
31792
31793 if (argnum == 1)
31794 return attr_str;
31795
31796 args = XNEWVEC (char *, argnum);
31797
31798 i = 0;
31799 attr = strtok (attr_str, ",");
31800 while (attr != NULL)
31801 {
31802 args[i] = attr;
31803 i++;
31804 attr = strtok (NULL, ",");
31805 }
31806
31807 qsort (args, argnum, sizeof (char *), attr_strcmp);
31808
31809 ret_str = XNEWVEC (char, str_len_sum);
31810 str_len_sum = 0;
31811 for (i = 0; i < argnum; i++)
31812 {
31813 size_t len = strlen (args[i]);
31814 memcpy (ret_str + str_len_sum, args[i], len);
31815 ret_str[str_len_sum + len] = i < argnum - 1 ? '_' : '\0';
31816 str_len_sum += len + 1;
31817 }
31818
31819 XDELETEVEC (args);
31820 XDELETEVEC (attr_str);
31821 return ret_str;
31822 }
31823
31824 /* This function changes the assembler name for functions that are
31825 versions. If DECL is a function version and has a "target"
31826 attribute, it appends the attribute string to its assembler name. */
31827
31828 static tree
31829 ix86_mangle_function_version_assembler_name (tree decl, tree id)
31830 {
31831 tree version_attr;
31832 const char *orig_name, *version_string;
31833 char *attr_str, *assembler_name;
31834
31835 if (DECL_DECLARED_INLINE_P (decl)
31836 && lookup_attribute ("gnu_inline",
31837 DECL_ATTRIBUTES (decl)))
31838 error_at (DECL_SOURCE_LOCATION (decl),
31839 "Function versions cannot be marked as gnu_inline,"
31840 " bodies have to be generated");
31841
31842 if (DECL_VIRTUAL_P (decl)
31843 || DECL_VINDEX (decl))
31844 sorry ("Virtual function multiversioning not supported");
31845
31846 version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
31847
31848 /* target attribute string cannot be NULL. */
31849 gcc_assert (version_attr != NULL_TREE);
31850
31851 orig_name = IDENTIFIER_POINTER (id);
31852 version_string
31853 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr)));
31854
31855 if (strcmp (version_string, "default") == 0)
31856 return id;
31857
31858 attr_str = sorted_attr_string (TREE_VALUE (version_attr));
31859 assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2);
31860
31861 sprintf (assembler_name, "%s.%s", orig_name, attr_str);
31862
31863 /* Allow assembler name to be modified if already set. */
31864 if (DECL_ASSEMBLER_NAME_SET_P (decl))
31865 SET_DECL_RTL (decl, NULL);
31866
31867 tree ret = get_identifier (assembler_name);
31868 XDELETEVEC (attr_str);
31869 XDELETEVEC (assembler_name);
31870 return ret;
31871 }
31872
31873 /* This function returns true if FN1 and FN2 are versions of the same function,
31874 that is, the target strings of the function decls are different. This assumes
31875 that FN1 and FN2 have the same signature. */
31876
31877 static bool
31878 ix86_function_versions (tree fn1, tree fn2)
31879 {
31880 tree attr1, attr2;
31881 char *target1, *target2;
31882 bool result;
31883
31884 if (TREE_CODE (fn1) != FUNCTION_DECL
31885 || TREE_CODE (fn2) != FUNCTION_DECL)
31886 return false;
31887
31888 attr1 = lookup_attribute ("target", DECL_ATTRIBUTES (fn1));
31889 attr2 = lookup_attribute ("target", DECL_ATTRIBUTES (fn2));
31890
31891 /* At least one function decl should have the target attribute specified. */
31892 if (attr1 == NULL_TREE && attr2 == NULL_TREE)
31893 return false;
31894
31895 /* Diagnose missing target attribute if one of the decls is already
31896 multi-versioned. */
31897 if (attr1 == NULL_TREE || attr2 == NULL_TREE)
31898 {
31899 if (DECL_FUNCTION_VERSIONED (fn1) || DECL_FUNCTION_VERSIONED (fn2))
31900 {
31901 if (attr2 != NULL_TREE)
31902 {
31903 tree tem = fn1;
31904 fn1 = fn2;
31905 fn2 = tem;
31906 attr1 = attr2;
31907 }
31908 error_at (DECL_SOURCE_LOCATION (fn2),
31909 "missing %<target%> attribute for multi-versioned %D",
31910 fn2);
31911 inform (DECL_SOURCE_LOCATION (fn1),
31912 "previous declaration of %D", fn1);
31913 /* Prevent diagnosing of the same error multiple times. */
31914 DECL_ATTRIBUTES (fn2)
31915 = tree_cons (get_identifier ("target"),
31916 copy_node (TREE_VALUE (attr1)),
31917 DECL_ATTRIBUTES (fn2));
31918 }
31919 return false;
31920 }
31921
31922 target1 = sorted_attr_string (TREE_VALUE (attr1));
31923 target2 = sorted_attr_string (TREE_VALUE (attr2));
31924
31925 /* The sorted target strings must be different for fn1 and fn2
31926 to be versions. */
31927 if (strcmp (target1, target2) == 0)
31928 result = false;
31929 else
31930 result = true;
31931
31932 XDELETEVEC (target1);
31933 XDELETEVEC (target2);
31934
31935 return result;
31936 }
31937
31938 static tree
31939 ix86_mangle_decl_assembler_name (tree decl, tree id)
31940 {
31941 /* For function version, add the target suffix to the assembler name. */
31942 if (TREE_CODE (decl) == FUNCTION_DECL
31943 && DECL_FUNCTION_VERSIONED (decl))
31944 id = ix86_mangle_function_version_assembler_name (decl, id);
31945 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
31946 id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
31947 #endif
31948
31949 return id;
31950 }
31951
31952 /* Return a new name by appending SUFFIX to the DECL name. If make_unique
31953 is true, append the full path name of the source file. */
31954
31955 static char *
31956 make_name (tree decl, const char *suffix, bool make_unique)
31957 {
31958 char *global_var_name;
31959 int name_len;
31960 const char *name;
31961 const char *unique_name = NULL;
31962
31963 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
31964
31965 /* Get a unique name that can be used globally without any chances
31966 of collision at link time. */
31967 if (make_unique)
31968 unique_name = IDENTIFIER_POINTER (get_file_function_name ("\0"));
31969
31970 name_len = strlen (name) + strlen (suffix) + 2;
31971
31972 if (make_unique)
31973 name_len += strlen (unique_name) + 1;
31974 global_var_name = XNEWVEC (char, name_len);
31975
31976 /* Use '.' to concatenate names as it is demangler friendly. */
31977 if (make_unique)
31978 snprintf (global_var_name, name_len, "%s.%s.%s", name, unique_name,
31979 suffix);
31980 else
31981 snprintf (global_var_name, name_len, "%s.%s", name, suffix);
31982
31983 return global_var_name;
31984 }
31985
31986 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
31987
31988 /* Make a dispatcher declaration for the multi-versioned function DECL.
31989 Calls to DECL function will be replaced with calls to the dispatcher
31990 by the front-end. Return the decl created. */
31991
31992 static tree
31993 make_dispatcher_decl (const tree decl)
31994 {
31995 tree func_decl;
31996 char *func_name;
31997 tree fn_type, func_type;
31998 bool is_uniq = false;
31999
32000 if (TREE_PUBLIC (decl) == 0)
32001 is_uniq = true;
32002
32003 func_name = make_name (decl, "ifunc", is_uniq);
32004
32005 fn_type = TREE_TYPE (decl);
32006 func_type = build_function_type (TREE_TYPE (fn_type),
32007 TYPE_ARG_TYPES (fn_type));
32008
32009 func_decl = build_fn_decl (func_name, func_type);
32010 XDELETEVEC (func_name);
32011 TREE_USED (func_decl) = 1;
32012 DECL_CONTEXT (func_decl) = NULL_TREE;
32013 DECL_INITIAL (func_decl) = error_mark_node;
32014 DECL_ARTIFICIAL (func_decl) = 1;
32015 /* Mark this func as external, the resolver will flip it again if
32016 it gets generated. */
32017 DECL_EXTERNAL (func_decl) = 1;
32018 /* This will be of type IFUNCs have to be externally visible. */
32019 TREE_PUBLIC (func_decl) = 1;
32020
32021 return func_decl;
32022 }
32023
32024 #endif
32025
32026 /* Returns true if decl is multi-versioned and DECL is the default function,
32027 that is it is not tagged with target specific optimization. */
32028
32029 static bool
32030 is_function_default_version (const tree decl)
32031 {
32032 if (TREE_CODE (decl) != FUNCTION_DECL
32033 || !DECL_FUNCTION_VERSIONED (decl))
32034 return false;
32035 tree attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
32036 gcc_assert (attr);
32037 attr = TREE_VALUE (TREE_VALUE (attr));
32038 return (TREE_CODE (attr) == STRING_CST
32039 && strcmp (TREE_STRING_POINTER (attr), "default") == 0);
32040 }
32041
32042 /* Make a dispatcher declaration for the multi-versioned function DECL.
32043 Calls to DECL function will be replaced with calls to the dispatcher
32044 by the front-end. Returns the decl of the dispatcher function. */
32045
32046 static tree
32047 ix86_get_function_versions_dispatcher (void *decl)
32048 {
32049 tree fn = (tree) decl;
32050 struct cgraph_node *node = NULL;
32051 struct cgraph_node *default_node = NULL;
32052 struct cgraph_function_version_info *node_v = NULL;
32053 struct cgraph_function_version_info *first_v = NULL;
32054
32055 tree dispatch_decl = NULL;
32056
32057 struct cgraph_function_version_info *default_version_info = NULL;
32058
32059 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
32060
32061 node = cgraph_node::get (fn);
32062 gcc_assert (node != NULL);
32063
32064 node_v = node->function_version ();
32065 gcc_assert (node_v != NULL);
32066
32067 if (node_v->dispatcher_resolver != NULL)
32068 return node_v->dispatcher_resolver;
32069
32070 /* Find the default version and make it the first node. */
32071 first_v = node_v;
32072 /* Go to the beginning of the chain. */
32073 while (first_v->prev != NULL)
32074 first_v = first_v->prev;
32075 default_version_info = first_v;
32076 while (default_version_info != NULL)
32077 {
32078 if (is_function_default_version
32079 (default_version_info->this_node->decl))
32080 break;
32081 default_version_info = default_version_info->next;
32082 }
32083
32084 /* If there is no default node, just return NULL. */
32085 if (default_version_info == NULL)
32086 return NULL;
32087
32088 /* Make default info the first node. */
32089 if (first_v != default_version_info)
32090 {
32091 default_version_info->prev->next = default_version_info->next;
32092 if (default_version_info->next)
32093 default_version_info->next->prev = default_version_info->prev;
32094 first_v->prev = default_version_info;
32095 default_version_info->next = first_v;
32096 default_version_info->prev = NULL;
32097 }
32098
32099 default_node = default_version_info->this_node;
32100
32101 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
32102 if (targetm.has_ifunc_p ())
32103 {
32104 struct cgraph_function_version_info *it_v = NULL;
32105 struct cgraph_node *dispatcher_node = NULL;
32106 struct cgraph_function_version_info *dispatcher_version_info = NULL;
32107
32108 /* Right now, the dispatching is done via ifunc. */
32109 dispatch_decl = make_dispatcher_decl (default_node->decl);
32110
32111 dispatcher_node = cgraph_node::get_create (dispatch_decl);
32112 gcc_assert (dispatcher_node != NULL);
32113 dispatcher_node->dispatcher_function = 1;
32114 dispatcher_version_info
32115 = dispatcher_node->insert_new_function_version ();
32116 dispatcher_version_info->next = default_version_info;
32117 dispatcher_node->definition = 1;
32118
32119 /* Set the dispatcher for all the versions. */
32120 it_v = default_version_info;
32121 while (it_v != NULL)
32122 {
32123 it_v->dispatcher_resolver = dispatch_decl;
32124 it_v = it_v->next;
32125 }
32126 }
32127 else
32128 #endif
32129 {
32130 error_at (DECL_SOURCE_LOCATION (default_node->decl),
32131 "multiversioning needs ifunc which is not supported "
32132 "on this target");
32133 }
32134
32135 return dispatch_decl;
32136 }
32137
32138 /* Makes a function attribute of the form NAME(ARG_NAME) and chains
32139 it to CHAIN. */
32140
32141 static tree
32142 make_attribute (const char *name, const char *arg_name, tree chain)
32143 {
32144 tree attr_name;
32145 tree attr_arg_name;
32146 tree attr_args;
32147 tree attr;
32148
32149 attr_name = get_identifier (name);
32150 attr_arg_name = build_string (strlen (arg_name), arg_name);
32151 attr_args = tree_cons (NULL_TREE, attr_arg_name, NULL_TREE);
32152 attr = tree_cons (attr_name, attr_args, chain);
32153 return attr;
32154 }
32155
32156 /* Make the resolver function decl to dispatch the versions of
32157 a multi-versioned function, DEFAULT_DECL. Create an
32158 empty basic block in the resolver and store the pointer in
32159 EMPTY_BB. Return the decl of the resolver function. */
32160
32161 static tree
32162 make_resolver_func (const tree default_decl,
32163 const tree dispatch_decl,
32164 basic_block *empty_bb)
32165 {
32166 char *resolver_name;
32167 tree decl, type, decl_name, t;
32168 bool is_uniq = false;
32169
32170 /* IFUNC's have to be globally visible. So, if the default_decl is
32171 not, then the name of the IFUNC should be made unique. */
32172 if (TREE_PUBLIC (default_decl) == 0)
32173 is_uniq = true;
32174
32175 /* Append the filename to the resolver function if the versions are
32176 not externally visible. This is because the resolver function has
32177 to be externally visible for the loader to find it. So, appending
32178 the filename will prevent conflicts with a resolver function from
32179 another module which is based on the same version name. */
32180 resolver_name = make_name (default_decl, "resolver", is_uniq);
32181
32182 /* The resolver function should return a (void *). */
32183 type = build_function_type_list (ptr_type_node, NULL_TREE);
32184
32185 decl = build_fn_decl (resolver_name, type);
32186 decl_name = get_identifier (resolver_name);
32187 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
32188
32189 DECL_NAME (decl) = decl_name;
32190 TREE_USED (decl) = 1;
32191 DECL_ARTIFICIAL (decl) = 1;
32192 DECL_IGNORED_P (decl) = 0;
32193 /* IFUNC resolvers have to be externally visible. */
32194 TREE_PUBLIC (decl) = 1;
32195 DECL_UNINLINABLE (decl) = 1;
32196
32197 /* Resolver is not external, body is generated. */
32198 DECL_EXTERNAL (decl) = 0;
32199 DECL_EXTERNAL (dispatch_decl) = 0;
32200
32201 DECL_CONTEXT (decl) = NULL_TREE;
32202 DECL_INITIAL (decl) = make_node (BLOCK);
32203 DECL_STATIC_CONSTRUCTOR (decl) = 0;
32204
32205 if (DECL_COMDAT_GROUP (default_decl)
32206 || TREE_PUBLIC (default_decl))
32207 {
32208 /* In this case, each translation unit with a call to this
32209 versioned function will put out a resolver. Ensure it
32210 is comdat to keep just one copy. */
32211 DECL_COMDAT (decl) = 1;
32212 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
32213 }
32214 /* Build result decl and add to function_decl. */
32215 t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
32216 DECL_ARTIFICIAL (t) = 1;
32217 DECL_IGNORED_P (t) = 1;
32218 DECL_RESULT (decl) = t;
32219
32220 gimplify_function_tree (decl);
32221 push_cfun (DECL_STRUCT_FUNCTION (decl));
32222 *empty_bb = init_lowered_empty_function (decl, false);
32223
32224 cgraph_node::add_new_function (decl, true);
32225 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
32226
32227 pop_cfun ();
32228
32229 gcc_assert (dispatch_decl != NULL);
32230 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
32231 DECL_ATTRIBUTES (dispatch_decl)
32232 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
32233
32234 /* Create the alias for dispatch to resolver here. */
32235 /*cgraph_create_function_alias (dispatch_decl, decl);*/
32236 cgraph_node::create_same_body_alias (dispatch_decl, decl);
32237 XDELETEVEC (resolver_name);
32238 return decl;
32239 }
32240
32241 /* Generate the dispatching code body to dispatch multi-versioned function
32242 DECL. The target hook is called to process the "target" attributes and
32243 provide the code to dispatch the right function at run-time. NODE points
32244 to the dispatcher decl whose body will be created. */
32245
32246 static tree
32247 ix86_generate_version_dispatcher_body (void *node_p)
32248 {
32249 tree resolver_decl;
32250 basic_block empty_bb;
32251 tree default_ver_decl;
32252 struct cgraph_node *versn;
32253 struct cgraph_node *node;
32254
32255 struct cgraph_function_version_info *node_version_info = NULL;
32256 struct cgraph_function_version_info *versn_info = NULL;
32257
32258 node = (cgraph_node *)node_p;
32259
32260 node_version_info = node->function_version ();
32261 gcc_assert (node->dispatcher_function
32262 && node_version_info != NULL);
32263
32264 if (node_version_info->dispatcher_resolver)
32265 return node_version_info->dispatcher_resolver;
32266
32267 /* The first version in the chain corresponds to the default version. */
32268 default_ver_decl = node_version_info->next->this_node->decl;
32269
32270 /* node is going to be an alias, so remove the finalized bit. */
32271 node->definition = false;
32272
32273 resolver_decl = make_resolver_func (default_ver_decl,
32274 node->decl, &empty_bb);
32275
32276 node_version_info->dispatcher_resolver = resolver_decl;
32277
32278 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
32279
32280 auto_vec<tree, 2> fn_ver_vec;
32281
32282 for (versn_info = node_version_info->next; versn_info;
32283 versn_info = versn_info->next)
32284 {
32285 versn = versn_info->this_node;
32286 /* Check for virtual functions here again, as by this time it should
32287 have been determined if this function needs a vtable index or
32288 not. This happens for methods in derived classes that override
32289 virtual methods in base classes but are not explicitly marked as
32290 virtual. */
32291 if (DECL_VINDEX (versn->decl))
32292 sorry ("Virtual function multiversioning not supported");
32293
32294 fn_ver_vec.safe_push (versn->decl);
32295 }
32296
32297 dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
32298 cgraph_edge::rebuild_edges ();
32299 pop_cfun ();
32300 return resolver_decl;
32301 }
32302 /* This builds the processor_model struct type defined in
32303 libgcc/config/i386/cpuinfo.c */
32304
32305 static tree
32306 build_processor_model_struct (void)
32307 {
32308 const char *field_name[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
32309 "__cpu_features"};
32310 tree field = NULL_TREE, field_chain = NULL_TREE;
32311 int i;
32312 tree type = make_node (RECORD_TYPE);
32313
32314 /* The first 3 fields are unsigned int. */
32315 for (i = 0; i < 3; ++i)
32316 {
32317 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
32318 get_identifier (field_name[i]), unsigned_type_node);
32319 if (field_chain != NULL_TREE)
32320 DECL_CHAIN (field) = field_chain;
32321 field_chain = field;
32322 }
32323
32324 /* The last field is an array of unsigned integers of size one. */
32325 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
32326 get_identifier (field_name[3]),
32327 build_array_type (unsigned_type_node,
32328 build_index_type (size_one_node)));
32329 if (field_chain != NULL_TREE)
32330 DECL_CHAIN (field) = field_chain;
32331 field_chain = field;
32332
32333 finish_builtin_struct (type, "__processor_model", field_chain, NULL_TREE);
32334 return type;
32335 }
32336
32337 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
32338
32339 static tree
32340 make_var_decl (tree type, const char *name)
32341 {
32342 tree new_decl;
32343
32344 new_decl = build_decl (UNKNOWN_LOCATION,
32345 VAR_DECL,
32346 get_identifier(name),
32347 type);
32348
32349 DECL_EXTERNAL (new_decl) = 1;
32350 TREE_STATIC (new_decl) = 1;
32351 TREE_PUBLIC (new_decl) = 1;
32352 DECL_INITIAL (new_decl) = 0;
32353 DECL_ARTIFICIAL (new_decl) = 0;
32354 DECL_PRESERVE_P (new_decl) = 1;
32355
32356 make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl));
32357 assemble_variable (new_decl, 0, 0, 0);
32358
32359 return new_decl;
32360 }
32361
32362 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
32363 into an integer defined in libgcc/config/i386/cpuinfo.c */
32364
32365 static tree
32366 fold_builtin_cpu (tree fndecl, tree *args)
32367 {
32368 unsigned int i;
32369 enum ix86_builtins fn_code = (enum ix86_builtins)
32370 DECL_FUNCTION_CODE (fndecl);
32371 tree param_string_cst = NULL;
32372
32373 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
32374 enum processor_features
32375 {
32376 F_CMOV = 0,
32377 F_MMX,
32378 F_POPCNT,
32379 F_SSE,
32380 F_SSE2,
32381 F_SSE3,
32382 F_SSSE3,
32383 F_SSE4_1,
32384 F_SSE4_2,
32385 F_AVX,
32386 F_AVX2,
32387 F_SSE4_A,
32388 F_FMA4,
32389 F_XOP,
32390 F_FMA,
32391 F_MAX
32392 };
32393
32394 /* These are the values for vendor types and cpu types and subtypes
32395 in cpuinfo.c. Cpu types and subtypes should be subtracted by
32396 the corresponding start value. */
32397 enum processor_model
32398 {
32399 M_INTEL = 1,
32400 M_AMD,
32401 M_CPU_TYPE_START,
32402 M_INTEL_BONNELL,
32403 M_INTEL_CORE2,
32404 M_INTEL_COREI7,
32405 M_AMDFAM10H,
32406 M_AMDFAM15H,
32407 M_INTEL_SILVERMONT,
32408 M_AMD_BTVER1,
32409 M_AMD_BTVER2,
32410 M_CPU_SUBTYPE_START,
32411 M_INTEL_COREI7_NEHALEM,
32412 M_INTEL_COREI7_WESTMERE,
32413 M_INTEL_COREI7_SANDYBRIDGE,
32414 M_AMDFAM10H_BARCELONA,
32415 M_AMDFAM10H_SHANGHAI,
32416 M_AMDFAM10H_ISTANBUL,
32417 M_AMDFAM15H_BDVER1,
32418 M_AMDFAM15H_BDVER2,
32419 M_AMDFAM15H_BDVER3,
32420 M_AMDFAM15H_BDVER4,
32421 M_INTEL_COREI7_IVYBRIDGE,
32422 M_INTEL_COREI7_HASWELL
32423 };
32424
32425 static struct _arch_names_table
32426 {
32427 const char *const name;
32428 const enum processor_model model;
32429 }
32430 const arch_names_table[] =
32431 {
32432 {"amd", M_AMD},
32433 {"intel", M_INTEL},
32434 {"atom", M_INTEL_BONNELL},
32435 {"slm", M_INTEL_SILVERMONT},
32436 {"core2", M_INTEL_CORE2},
32437 {"corei7", M_INTEL_COREI7},
32438 {"nehalem", M_INTEL_COREI7_NEHALEM},
32439 {"westmere", M_INTEL_COREI7_WESTMERE},
32440 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE},
32441 {"ivybridge", M_INTEL_COREI7_IVYBRIDGE},
32442 {"haswell", M_INTEL_COREI7_HASWELL},
32443 {"bonnell", M_INTEL_BONNELL},
32444 {"silvermont", M_INTEL_SILVERMONT},
32445 {"amdfam10h", M_AMDFAM10H},
32446 {"barcelona", M_AMDFAM10H_BARCELONA},
32447 {"shanghai", M_AMDFAM10H_SHANGHAI},
32448 {"istanbul", M_AMDFAM10H_ISTANBUL},
32449 {"btver1", M_AMD_BTVER1},
32450 {"amdfam15h", M_AMDFAM15H},
32451 {"bdver1", M_AMDFAM15H_BDVER1},
32452 {"bdver2", M_AMDFAM15H_BDVER2},
32453 {"bdver3", M_AMDFAM15H_BDVER3},
32454 {"bdver4", M_AMDFAM15H_BDVER4},
32455 {"btver2", M_AMD_BTVER2},
32456 };
32457
32458 static struct _isa_names_table
32459 {
32460 const char *const name;
32461 const enum processor_features feature;
32462 }
32463 const isa_names_table[] =
32464 {
32465 {"cmov", F_CMOV},
32466 {"mmx", F_MMX},
32467 {"popcnt", F_POPCNT},
32468 {"sse", F_SSE},
32469 {"sse2", F_SSE2},
32470 {"sse3", F_SSE3},
32471 {"ssse3", F_SSSE3},
32472 {"sse4a", F_SSE4_A},
32473 {"sse4.1", F_SSE4_1},
32474 {"sse4.2", F_SSE4_2},
32475 {"avx", F_AVX},
32476 {"fma4", F_FMA4},
32477 {"xop", F_XOP},
32478 {"fma", F_FMA},
32479 {"avx2", F_AVX2}
32480 };
32481
32482 tree __processor_model_type = build_processor_model_struct ();
32483 tree __cpu_model_var = make_var_decl (__processor_model_type,
32484 "__cpu_model");
32485
32486
32487 varpool_node::add (__cpu_model_var);
32488
32489 gcc_assert ((args != NULL) && (*args != NULL));
32490
32491 param_string_cst = *args;
32492 while (param_string_cst
32493 && TREE_CODE (param_string_cst) != STRING_CST)
32494 {
32495 /* *args must be a expr that can contain other EXPRS leading to a
32496 STRING_CST. */
32497 if (!EXPR_P (param_string_cst))
32498 {
32499 error ("Parameter to builtin must be a string constant or literal");
32500 return integer_zero_node;
32501 }
32502 param_string_cst = TREE_OPERAND (EXPR_CHECK (param_string_cst), 0);
32503 }
32504
32505 gcc_assert (param_string_cst);
32506
32507 if (fn_code == IX86_BUILTIN_CPU_IS)
32508 {
32509 tree ref;
32510 tree field;
32511 tree final;
32512
32513 unsigned int field_val = 0;
32514 unsigned int NUM_ARCH_NAMES
32515 = sizeof (arch_names_table) / sizeof (struct _arch_names_table);
32516
32517 for (i = 0; i < NUM_ARCH_NAMES; i++)
32518 if (strcmp (arch_names_table[i].name,
32519 TREE_STRING_POINTER (param_string_cst)) == 0)
32520 break;
32521
32522 if (i == NUM_ARCH_NAMES)
32523 {
32524 error ("Parameter to builtin not valid: %s",
32525 TREE_STRING_POINTER (param_string_cst));
32526 return integer_zero_node;
32527 }
32528
32529 field = TYPE_FIELDS (__processor_model_type);
32530 field_val = arch_names_table[i].model;
32531
32532 /* CPU types are stored in the next field. */
32533 if (field_val > M_CPU_TYPE_START
32534 && field_val < M_CPU_SUBTYPE_START)
32535 {
32536 field = DECL_CHAIN (field);
32537 field_val -= M_CPU_TYPE_START;
32538 }
32539
32540 /* CPU subtypes are stored in the next field. */
32541 if (field_val > M_CPU_SUBTYPE_START)
32542 {
32543 field = DECL_CHAIN ( DECL_CHAIN (field));
32544 field_val -= M_CPU_SUBTYPE_START;
32545 }
32546
32547 /* Get the appropriate field in __cpu_model. */
32548 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
32549 field, NULL_TREE);
32550
32551 /* Check the value. */
32552 final = build2 (EQ_EXPR, unsigned_type_node, ref,
32553 build_int_cstu (unsigned_type_node, field_val));
32554 return build1 (CONVERT_EXPR, integer_type_node, final);
32555 }
32556 else if (fn_code == IX86_BUILTIN_CPU_SUPPORTS)
32557 {
32558 tree ref;
32559 tree array_elt;
32560 tree field;
32561 tree final;
32562
32563 unsigned int field_val = 0;
32564 unsigned int NUM_ISA_NAMES
32565 = sizeof (isa_names_table) / sizeof (struct _isa_names_table);
32566
32567 for (i = 0; i < NUM_ISA_NAMES; i++)
32568 if (strcmp (isa_names_table[i].name,
32569 TREE_STRING_POINTER (param_string_cst)) == 0)
32570 break;
32571
32572 if (i == NUM_ISA_NAMES)
32573 {
32574 error ("Parameter to builtin not valid: %s",
32575 TREE_STRING_POINTER (param_string_cst));
32576 return integer_zero_node;
32577 }
32578
32579 field = TYPE_FIELDS (__processor_model_type);
32580 /* Get the last field, which is __cpu_features. */
32581 while (DECL_CHAIN (field))
32582 field = DECL_CHAIN (field);
32583
32584 /* Get the appropriate field: __cpu_model.__cpu_features */
32585 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
32586 field, NULL_TREE);
32587
32588 /* Access the 0th element of __cpu_features array. */
32589 array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
32590 integer_zero_node, NULL_TREE, NULL_TREE);
32591
32592 field_val = (1 << isa_names_table[i].feature);
32593 /* Return __cpu_model.__cpu_features[0] & field_val */
32594 final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
32595 build_int_cstu (unsigned_type_node, field_val));
32596 return build1 (CONVERT_EXPR, integer_type_node, final);
32597 }
32598 gcc_unreachable ();
32599 }
32600
32601 static tree
32602 ix86_fold_builtin (tree fndecl, int n_args,
32603 tree *args, bool ignore ATTRIBUTE_UNUSED)
32604 {
32605 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
32606 {
32607 enum ix86_builtins fn_code = (enum ix86_builtins)
32608 DECL_FUNCTION_CODE (fndecl);
32609 if (fn_code == IX86_BUILTIN_CPU_IS
32610 || fn_code == IX86_BUILTIN_CPU_SUPPORTS)
32611 {
32612 gcc_assert (n_args == 1);
32613 return fold_builtin_cpu (fndecl, args);
32614 }
32615 }
32616
32617 #ifdef SUBTARGET_FOLD_BUILTIN
32618 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
32619 #endif
32620
32621 return NULL_TREE;
32622 }
32623
32624 /* Make builtins to detect cpu type and features supported. NAME is
32625 the builtin name, CODE is the builtin code, and FTYPE is the function
32626 type of the builtin. */
32627
32628 static void
32629 make_cpu_type_builtin (const char* name, int code,
32630 enum ix86_builtin_func_type ftype, bool is_const)
32631 {
32632 tree decl;
32633 tree type;
32634
32635 type = ix86_get_builtin_func_type (ftype);
32636 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
32637 NULL, NULL_TREE);
32638 gcc_assert (decl != NULL_TREE);
32639 ix86_builtins[(int) code] = decl;
32640 TREE_READONLY (decl) = is_const;
32641 }
32642
32643 /* Make builtins to get CPU type and features supported. The created
32644 builtins are :
32645
32646 __builtin_cpu_init (), to detect cpu type and features,
32647 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
32648 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
32649 */
32650
32651 static void
32652 ix86_init_platform_type_builtins (void)
32653 {
32654 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT,
32655 INT_FTYPE_VOID, false);
32656 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS,
32657 INT_FTYPE_PCCHAR, true);
32658 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS,
32659 INT_FTYPE_PCCHAR, true);
32660 }
32661
32662 /* Internal method for ix86_init_builtins. */
32663
32664 static void
32665 ix86_init_builtins_va_builtins_abi (void)
32666 {
32667 tree ms_va_ref, sysv_va_ref;
32668 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
32669 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
32670 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
32671 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
32672
32673 if (!TARGET_64BIT)
32674 return;
32675 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
32676 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
32677 ms_va_ref = build_reference_type (ms_va_list_type_node);
32678 sysv_va_ref =
32679 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
32680
32681 fnvoid_va_end_ms =
32682 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
32683 fnvoid_va_start_ms =
32684 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
32685 fnvoid_va_end_sysv =
32686 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
32687 fnvoid_va_start_sysv =
32688 build_varargs_function_type_list (void_type_node, sysv_va_ref,
32689 NULL_TREE);
32690 fnvoid_va_copy_ms =
32691 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
32692 NULL_TREE);
32693 fnvoid_va_copy_sysv =
32694 build_function_type_list (void_type_node, sysv_va_ref,
32695 sysv_va_ref, NULL_TREE);
32696
32697 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
32698 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
32699 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
32700 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
32701 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
32702 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
32703 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
32704 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32705 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
32706 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32707 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
32708 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32709 }
32710
32711 static void
32712 ix86_init_builtin_types (void)
32713 {
32714 tree float128_type_node, float80_type_node;
32715
32716 /* The __float80 type. */
32717 float80_type_node = long_double_type_node;
32718 if (TYPE_MODE (float80_type_node) != XFmode)
32719 {
32720 /* The __float80 type. */
32721 float80_type_node = make_node (REAL_TYPE);
32722
32723 TYPE_PRECISION (float80_type_node) = 80;
32724 layout_type (float80_type_node);
32725 }
32726 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
32727
32728 /* The __float128 type. */
32729 float128_type_node = make_node (REAL_TYPE);
32730 TYPE_PRECISION (float128_type_node) = 128;
32731 layout_type (float128_type_node);
32732 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
32733
32734 /* This macro is built by i386-builtin-types.awk. */
32735 DEFINE_BUILTIN_PRIMITIVE_TYPES;
32736 }
32737
32738 static void
32739 ix86_init_builtins (void)
32740 {
32741 tree t;
32742
32743 ix86_init_builtin_types ();
32744
32745 /* Builtins to get CPU type and features. */
32746 ix86_init_platform_type_builtins ();
32747
32748 /* TFmode support builtins. */
32749 def_builtin_const (0, "__builtin_infq",
32750 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
32751 def_builtin_const (0, "__builtin_huge_valq",
32752 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
32753
32754 /* We will expand them to normal call if SSE isn't available since
32755 they are used by libgcc. */
32756 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
32757 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
32758 BUILT_IN_MD, "__fabstf2", NULL_TREE);
32759 TREE_READONLY (t) = 1;
32760 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
32761
32762 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
32763 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
32764 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
32765 TREE_READONLY (t) = 1;
32766 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
32767
32768 ix86_init_tm_builtins ();
32769 ix86_init_mmx_sse_builtins ();
32770
32771 if (TARGET_LP64)
32772 ix86_init_builtins_va_builtins_abi ();
32773
32774 #ifdef SUBTARGET_INIT_BUILTINS
32775 SUBTARGET_INIT_BUILTINS;
32776 #endif
32777 }
32778
32779 /* Return the ix86 builtin for CODE. */
32780
32781 static tree
32782 ix86_builtin_decl (unsigned code, bool)
32783 {
32784 if (code >= IX86_BUILTIN_MAX)
32785 return error_mark_node;
32786
32787 return ix86_builtins[code];
32788 }
32789
32790 /* Errors in the source file can cause expand_expr to return const0_rtx
32791 where we expect a vector. To avoid crashing, use one of the vector
32792 clear instructions. */
32793 static rtx
32794 safe_vector_operand (rtx x, enum machine_mode mode)
32795 {
32796 if (x == const0_rtx)
32797 x = CONST0_RTX (mode);
32798 return x;
32799 }
32800
32801 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
32802
32803 static rtx
32804 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
32805 {
32806 rtx pat;
32807 tree arg0 = CALL_EXPR_ARG (exp, 0);
32808 tree arg1 = CALL_EXPR_ARG (exp, 1);
32809 rtx op0 = expand_normal (arg0);
32810 rtx op1 = expand_normal (arg1);
32811 enum machine_mode tmode = insn_data[icode].operand[0].mode;
32812 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
32813 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
32814
32815 if (VECTOR_MODE_P (mode0))
32816 op0 = safe_vector_operand (op0, mode0);
32817 if (VECTOR_MODE_P (mode1))
32818 op1 = safe_vector_operand (op1, mode1);
32819
32820 if (optimize || !target
32821 || GET_MODE (target) != tmode
32822 || !insn_data[icode].operand[0].predicate (target, tmode))
32823 target = gen_reg_rtx (tmode);
32824
32825 if (GET_MODE (op1) == SImode && mode1 == TImode)
32826 {
32827 rtx x = gen_reg_rtx (V4SImode);
32828 emit_insn (gen_sse2_loadd (x, op1));
32829 op1 = gen_lowpart (TImode, x);
32830 }
32831
32832 if (!insn_data[icode].operand[1].predicate (op0, mode0))
32833 op0 = copy_to_mode_reg (mode0, op0);
32834 if (!insn_data[icode].operand[2].predicate (op1, mode1))
32835 op1 = copy_to_mode_reg (mode1, op1);
32836
32837 pat = GEN_FCN (icode) (target, op0, op1);
32838 if (! pat)
32839 return 0;
32840
32841 emit_insn (pat);
32842
32843 return target;
32844 }
32845
32846 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
32847
32848 static rtx
32849 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
32850 enum ix86_builtin_func_type m_type,
32851 enum rtx_code sub_code)
32852 {
32853 rtx pat;
32854 int i;
32855 int nargs;
32856 bool comparison_p = false;
32857 bool tf_p = false;
32858 bool last_arg_constant = false;
32859 int num_memory = 0;
32860 struct {
32861 rtx op;
32862 enum machine_mode mode;
32863 } args[4];
32864
32865 enum machine_mode tmode = insn_data[icode].operand[0].mode;
32866
32867 switch (m_type)
32868 {
32869 case MULTI_ARG_4_DF2_DI_I:
32870 case MULTI_ARG_4_DF2_DI_I1:
32871 case MULTI_ARG_4_SF2_SI_I:
32872 case MULTI_ARG_4_SF2_SI_I1:
32873 nargs = 4;
32874 last_arg_constant = true;
32875 break;
32876
32877 case MULTI_ARG_3_SF:
32878 case MULTI_ARG_3_DF:
32879 case MULTI_ARG_3_SF2:
32880 case MULTI_ARG_3_DF2:
32881 case MULTI_ARG_3_DI:
32882 case MULTI_ARG_3_SI:
32883 case MULTI_ARG_3_SI_DI:
32884 case MULTI_ARG_3_HI:
32885 case MULTI_ARG_3_HI_SI:
32886 case MULTI_ARG_3_QI:
32887 case MULTI_ARG_3_DI2:
32888 case MULTI_ARG_3_SI2:
32889 case MULTI_ARG_3_HI2:
32890 case MULTI_ARG_3_QI2:
32891 nargs = 3;
32892 break;
32893
32894 case MULTI_ARG_2_SF:
32895 case MULTI_ARG_2_DF:
32896 case MULTI_ARG_2_DI:
32897 case MULTI_ARG_2_SI:
32898 case MULTI_ARG_2_HI:
32899 case MULTI_ARG_2_QI:
32900 nargs = 2;
32901 break;
32902
32903 case MULTI_ARG_2_DI_IMM:
32904 case MULTI_ARG_2_SI_IMM:
32905 case MULTI_ARG_2_HI_IMM:
32906 case MULTI_ARG_2_QI_IMM:
32907 nargs = 2;
32908 last_arg_constant = true;
32909 break;
32910
32911 case MULTI_ARG_1_SF:
32912 case MULTI_ARG_1_DF:
32913 case MULTI_ARG_1_SF2:
32914 case MULTI_ARG_1_DF2:
32915 case MULTI_ARG_1_DI:
32916 case MULTI_ARG_1_SI:
32917 case MULTI_ARG_1_HI:
32918 case MULTI_ARG_1_QI:
32919 case MULTI_ARG_1_SI_DI:
32920 case MULTI_ARG_1_HI_DI:
32921 case MULTI_ARG_1_HI_SI:
32922 case MULTI_ARG_1_QI_DI:
32923 case MULTI_ARG_1_QI_SI:
32924 case MULTI_ARG_1_QI_HI:
32925 nargs = 1;
32926 break;
32927
32928 case MULTI_ARG_2_DI_CMP:
32929 case MULTI_ARG_2_SI_CMP:
32930 case MULTI_ARG_2_HI_CMP:
32931 case MULTI_ARG_2_QI_CMP:
32932 nargs = 2;
32933 comparison_p = true;
32934 break;
32935
32936 case MULTI_ARG_2_SF_TF:
32937 case MULTI_ARG_2_DF_TF:
32938 case MULTI_ARG_2_DI_TF:
32939 case MULTI_ARG_2_SI_TF:
32940 case MULTI_ARG_2_HI_TF:
32941 case MULTI_ARG_2_QI_TF:
32942 nargs = 2;
32943 tf_p = true;
32944 break;
32945
32946 default:
32947 gcc_unreachable ();
32948 }
32949
32950 if (optimize || !target
32951 || GET_MODE (target) != tmode
32952 || !insn_data[icode].operand[0].predicate (target, tmode))
32953 target = gen_reg_rtx (tmode);
32954
32955 gcc_assert (nargs <= 4);
32956
32957 for (i = 0; i < nargs; i++)
32958 {
32959 tree arg = CALL_EXPR_ARG (exp, i);
32960 rtx op = expand_normal (arg);
32961 int adjust = (comparison_p) ? 1 : 0;
32962 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
32963
32964 if (last_arg_constant && i == nargs - 1)
32965 {
32966 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
32967 {
32968 enum insn_code new_icode = icode;
32969 switch (icode)
32970 {
32971 case CODE_FOR_xop_vpermil2v2df3:
32972 case CODE_FOR_xop_vpermil2v4sf3:
32973 case CODE_FOR_xop_vpermil2v4df3:
32974 case CODE_FOR_xop_vpermil2v8sf3:
32975 error ("the last argument must be a 2-bit immediate");
32976 return gen_reg_rtx (tmode);
32977 case CODE_FOR_xop_rotlv2di3:
32978 new_icode = CODE_FOR_rotlv2di3;
32979 goto xop_rotl;
32980 case CODE_FOR_xop_rotlv4si3:
32981 new_icode = CODE_FOR_rotlv4si3;
32982 goto xop_rotl;
32983 case CODE_FOR_xop_rotlv8hi3:
32984 new_icode = CODE_FOR_rotlv8hi3;
32985 goto xop_rotl;
32986 case CODE_FOR_xop_rotlv16qi3:
32987 new_icode = CODE_FOR_rotlv16qi3;
32988 xop_rotl:
32989 if (CONST_INT_P (op))
32990 {
32991 int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
32992 op = GEN_INT (INTVAL (op) & mask);
32993 gcc_checking_assert
32994 (insn_data[icode].operand[i + 1].predicate (op, mode));
32995 }
32996 else
32997 {
32998 gcc_checking_assert
32999 (nargs == 2
33000 && insn_data[new_icode].operand[0].mode == tmode
33001 && insn_data[new_icode].operand[1].mode == tmode
33002 && insn_data[new_icode].operand[2].mode == mode
33003 && insn_data[new_icode].operand[0].predicate
33004 == insn_data[icode].operand[0].predicate
33005 && insn_data[new_icode].operand[1].predicate
33006 == insn_data[icode].operand[1].predicate);
33007 icode = new_icode;
33008 goto non_constant;
33009 }
33010 break;
33011 default:
33012 gcc_unreachable ();
33013 }
33014 }
33015 }
33016 else
33017 {
33018 non_constant:
33019 if (VECTOR_MODE_P (mode))
33020 op = safe_vector_operand (op, mode);
33021
33022 /* If we aren't optimizing, only allow one memory operand to be
33023 generated. */
33024 if (memory_operand (op, mode))
33025 num_memory++;
33026
33027 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
33028
33029 if (optimize
33030 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
33031 || num_memory > 1)
33032 op = force_reg (mode, op);
33033 }
33034
33035 args[i].op = op;
33036 args[i].mode = mode;
33037 }
33038
33039 switch (nargs)
33040 {
33041 case 1:
33042 pat = GEN_FCN (icode) (target, args[0].op);
33043 break;
33044
33045 case 2:
33046 if (tf_p)
33047 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
33048 GEN_INT ((int)sub_code));
33049 else if (! comparison_p)
33050 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
33051 else
33052 {
33053 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
33054 args[0].op,
33055 args[1].op);
33056
33057 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
33058 }
33059 break;
33060
33061 case 3:
33062 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
33063 break;
33064
33065 case 4:
33066 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
33067 break;
33068
33069 default:
33070 gcc_unreachable ();
33071 }
33072
33073 if (! pat)
33074 return 0;
33075
33076 emit_insn (pat);
33077 return target;
33078 }
33079
33080 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
33081 insns with vec_merge. */
33082
33083 static rtx
33084 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
33085 rtx target)
33086 {
33087 rtx pat;
33088 tree arg0 = CALL_EXPR_ARG (exp, 0);
33089 rtx op1, op0 = expand_normal (arg0);
33090 enum machine_mode tmode = insn_data[icode].operand[0].mode;
33091 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
33092
33093 if (optimize || !target
33094 || GET_MODE (target) != tmode
33095 || !insn_data[icode].operand[0].predicate (target, tmode))
33096 target = gen_reg_rtx (tmode);
33097
33098 if (VECTOR_MODE_P (mode0))
33099 op0 = safe_vector_operand (op0, mode0);
33100
33101 if ((optimize && !register_operand (op0, mode0))
33102 || !insn_data[icode].operand[1].predicate (op0, mode0))
33103 op0 = copy_to_mode_reg (mode0, op0);
33104
33105 op1 = op0;
33106 if (!insn_data[icode].operand[2].predicate (op1, mode0))
33107 op1 = copy_to_mode_reg (mode0, op1);
33108
33109 pat = GEN_FCN (icode) (target, op0, op1);
33110 if (! pat)
33111 return 0;
33112 emit_insn (pat);
33113 return target;
33114 }
33115
33116 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
33117
33118 static rtx
33119 ix86_expand_sse_compare (const struct builtin_description *d,
33120 tree exp, rtx target, bool swap)
33121 {
33122 rtx pat;
33123 tree arg0 = CALL_EXPR_ARG (exp, 0);
33124 tree arg1 = CALL_EXPR_ARG (exp, 1);
33125 rtx op0 = expand_normal (arg0);
33126 rtx op1 = expand_normal (arg1);
33127 rtx op2;
33128 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
33129 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
33130 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
33131 enum rtx_code comparison = d->comparison;
33132
33133 if (VECTOR_MODE_P (mode0))
33134 op0 = safe_vector_operand (op0, mode0);
33135 if (VECTOR_MODE_P (mode1))
33136 op1 = safe_vector_operand (op1, mode1);
33137
33138 /* Swap operands if we have a comparison that isn't available in
33139 hardware. */
33140 if (swap)
33141 {
33142 rtx tmp = gen_reg_rtx (mode1);
33143 emit_move_insn (tmp, op1);
33144 op1 = op0;
33145 op0 = tmp;
33146 }
33147
33148 if (optimize || !target
33149 || GET_MODE (target) != tmode
33150 || !insn_data[d->icode].operand[0].predicate (target, tmode))
33151 target = gen_reg_rtx (tmode);
33152
33153 if ((optimize && !register_operand (op0, mode0))
33154 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
33155 op0 = copy_to_mode_reg (mode0, op0);
33156 if ((optimize && !register_operand (op1, mode1))
33157 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
33158 op1 = copy_to_mode_reg (mode1, op1);
33159
33160 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
33161 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
33162 if (! pat)
33163 return 0;
33164 emit_insn (pat);
33165 return target;
33166 }
33167
33168 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
33169
33170 static rtx
33171 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
33172 rtx target)
33173 {
33174 rtx pat;
33175 tree arg0 = CALL_EXPR_ARG (exp, 0);
33176 tree arg1 = CALL_EXPR_ARG (exp, 1);
33177 rtx op0 = expand_normal (arg0);
33178 rtx op1 = expand_normal (arg1);
33179 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
33180 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
33181 enum rtx_code comparison = d->comparison;
33182
33183 if (VECTOR_MODE_P (mode0))
33184 op0 = safe_vector_operand (op0, mode0);
33185 if (VECTOR_MODE_P (mode1))
33186 op1 = safe_vector_operand (op1, mode1);
33187
33188 /* Swap operands if we have a comparison that isn't available in
33189 hardware. */
33190 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
33191 {
33192 rtx tmp = op1;
33193 op1 = op0;
33194 op0 = tmp;
33195 }
33196
33197 target = gen_reg_rtx (SImode);
33198 emit_move_insn (target, const0_rtx);
33199 target = gen_rtx_SUBREG (QImode, target, 0);
33200
33201 if ((optimize && !register_operand (op0, mode0))
33202 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33203 op0 = copy_to_mode_reg (mode0, op0);
33204 if ((optimize && !register_operand (op1, mode1))
33205 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
33206 op1 = copy_to_mode_reg (mode1, op1);
33207
33208 pat = GEN_FCN (d->icode) (op0, op1);
33209 if (! pat)
33210 return 0;
33211 emit_insn (pat);
33212 emit_insn (gen_rtx_SET (VOIDmode,
33213 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33214 gen_rtx_fmt_ee (comparison, QImode,
33215 SET_DEST (pat),
33216 const0_rtx)));
33217
33218 return SUBREG_REG (target);
33219 }
33220
33221 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
33222
33223 static rtx
33224 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
33225 rtx target)
33226 {
33227 rtx pat;
33228 tree arg0 = CALL_EXPR_ARG (exp, 0);
33229 rtx op1, op0 = expand_normal (arg0);
33230 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
33231 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
33232
33233 if (optimize || target == 0
33234 || GET_MODE (target) != tmode
33235 || !insn_data[d->icode].operand[0].predicate (target, tmode))
33236 target = gen_reg_rtx (tmode);
33237
33238 if (VECTOR_MODE_P (mode0))
33239 op0 = safe_vector_operand (op0, mode0);
33240
33241 if ((optimize && !register_operand (op0, mode0))
33242 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33243 op0 = copy_to_mode_reg (mode0, op0);
33244
33245 op1 = GEN_INT (d->comparison);
33246
33247 pat = GEN_FCN (d->icode) (target, op0, op1);
33248 if (! pat)
33249 return 0;
33250 emit_insn (pat);
33251 return target;
33252 }
33253
33254 static rtx
33255 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d,
33256 tree exp, rtx target)
33257 {
33258 rtx pat;
33259 tree arg0 = CALL_EXPR_ARG (exp, 0);
33260 tree arg1 = CALL_EXPR_ARG (exp, 1);
33261 rtx op0 = expand_normal (arg0);
33262 rtx op1 = expand_normal (arg1);
33263 rtx op2;
33264 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
33265 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
33266 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
33267
33268 if (optimize || target == 0
33269 || GET_MODE (target) != tmode
33270 || !insn_data[d->icode].operand[0].predicate (target, tmode))
33271 target = gen_reg_rtx (tmode);
33272
33273 op0 = safe_vector_operand (op0, mode0);
33274 op1 = safe_vector_operand (op1, mode1);
33275
33276 if ((optimize && !register_operand (op0, mode0))
33277 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33278 op0 = copy_to_mode_reg (mode0, op0);
33279 if ((optimize && !register_operand (op1, mode1))
33280 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
33281 op1 = copy_to_mode_reg (mode1, op1);
33282
33283 op2 = GEN_INT (d->comparison);
33284
33285 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
33286 if (! pat)
33287 return 0;
33288 emit_insn (pat);
33289 return target;
33290 }
33291
33292 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
33293
33294 static rtx
33295 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
33296 rtx target)
33297 {
33298 rtx pat;
33299 tree arg0 = CALL_EXPR_ARG (exp, 0);
33300 tree arg1 = CALL_EXPR_ARG (exp, 1);
33301 rtx op0 = expand_normal (arg0);
33302 rtx op1 = expand_normal (arg1);
33303 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
33304 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
33305 enum rtx_code comparison = d->comparison;
33306
33307 if (VECTOR_MODE_P (mode0))
33308 op0 = safe_vector_operand (op0, mode0);
33309 if (VECTOR_MODE_P (mode1))
33310 op1 = safe_vector_operand (op1, mode1);
33311
33312 target = gen_reg_rtx (SImode);
33313 emit_move_insn (target, const0_rtx);
33314 target = gen_rtx_SUBREG (QImode, target, 0);
33315
33316 if ((optimize && !register_operand (op0, mode0))
33317 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33318 op0 = copy_to_mode_reg (mode0, op0);
33319 if ((optimize && !register_operand (op1, mode1))
33320 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
33321 op1 = copy_to_mode_reg (mode1, op1);
33322
33323 pat = GEN_FCN (d->icode) (op0, op1);
33324 if (! pat)
33325 return 0;
33326 emit_insn (pat);
33327 emit_insn (gen_rtx_SET (VOIDmode,
33328 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33329 gen_rtx_fmt_ee (comparison, QImode,
33330 SET_DEST (pat),
33331 const0_rtx)));
33332
33333 return SUBREG_REG (target);
33334 }
33335
33336 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
33337
33338 static rtx
33339 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
33340 tree exp, rtx target)
33341 {
33342 rtx pat;
33343 tree arg0 = CALL_EXPR_ARG (exp, 0);
33344 tree arg1 = CALL_EXPR_ARG (exp, 1);
33345 tree arg2 = CALL_EXPR_ARG (exp, 2);
33346 tree arg3 = CALL_EXPR_ARG (exp, 3);
33347 tree arg4 = CALL_EXPR_ARG (exp, 4);
33348 rtx scratch0, scratch1;
33349 rtx op0 = expand_normal (arg0);
33350 rtx op1 = expand_normal (arg1);
33351 rtx op2 = expand_normal (arg2);
33352 rtx op3 = expand_normal (arg3);
33353 rtx op4 = expand_normal (arg4);
33354 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
33355
33356 tmode0 = insn_data[d->icode].operand[0].mode;
33357 tmode1 = insn_data[d->icode].operand[1].mode;
33358 modev2 = insn_data[d->icode].operand[2].mode;
33359 modei3 = insn_data[d->icode].operand[3].mode;
33360 modev4 = insn_data[d->icode].operand[4].mode;
33361 modei5 = insn_data[d->icode].operand[5].mode;
33362 modeimm = insn_data[d->icode].operand[6].mode;
33363
33364 if (VECTOR_MODE_P (modev2))
33365 op0 = safe_vector_operand (op0, modev2);
33366 if (VECTOR_MODE_P (modev4))
33367 op2 = safe_vector_operand (op2, modev4);
33368
33369 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
33370 op0 = copy_to_mode_reg (modev2, op0);
33371 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
33372 op1 = copy_to_mode_reg (modei3, op1);
33373 if ((optimize && !register_operand (op2, modev4))
33374 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
33375 op2 = copy_to_mode_reg (modev4, op2);
33376 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
33377 op3 = copy_to_mode_reg (modei5, op3);
33378
33379 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
33380 {
33381 error ("the fifth argument must be an 8-bit immediate");
33382 return const0_rtx;
33383 }
33384
33385 if (d->code == IX86_BUILTIN_PCMPESTRI128)
33386 {
33387 if (optimize || !target
33388 || GET_MODE (target) != tmode0
33389 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
33390 target = gen_reg_rtx (tmode0);
33391
33392 scratch1 = gen_reg_rtx (tmode1);
33393
33394 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
33395 }
33396 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
33397 {
33398 if (optimize || !target
33399 || GET_MODE (target) != tmode1
33400 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
33401 target = gen_reg_rtx (tmode1);
33402
33403 scratch0 = gen_reg_rtx (tmode0);
33404
33405 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
33406 }
33407 else
33408 {
33409 gcc_assert (d->flag);
33410
33411 scratch0 = gen_reg_rtx (tmode0);
33412 scratch1 = gen_reg_rtx (tmode1);
33413
33414 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
33415 }
33416
33417 if (! pat)
33418 return 0;
33419
33420 emit_insn (pat);
33421
33422 if (d->flag)
33423 {
33424 target = gen_reg_rtx (SImode);
33425 emit_move_insn (target, const0_rtx);
33426 target = gen_rtx_SUBREG (QImode, target, 0);
33427
33428 emit_insn
33429 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33430 gen_rtx_fmt_ee (EQ, QImode,
33431 gen_rtx_REG ((enum machine_mode) d->flag,
33432 FLAGS_REG),
33433 const0_rtx)));
33434 return SUBREG_REG (target);
33435 }
33436 else
33437 return target;
33438 }
33439
33440
33441 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
33442
33443 static rtx
33444 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
33445 tree exp, rtx target)
33446 {
33447 rtx pat;
33448 tree arg0 = CALL_EXPR_ARG (exp, 0);
33449 tree arg1 = CALL_EXPR_ARG (exp, 1);
33450 tree arg2 = CALL_EXPR_ARG (exp, 2);
33451 rtx scratch0, scratch1;
33452 rtx op0 = expand_normal (arg0);
33453 rtx op1 = expand_normal (arg1);
33454 rtx op2 = expand_normal (arg2);
33455 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
33456
33457 tmode0 = insn_data[d->icode].operand[0].mode;
33458 tmode1 = insn_data[d->icode].operand[1].mode;
33459 modev2 = insn_data[d->icode].operand[2].mode;
33460 modev3 = insn_data[d->icode].operand[3].mode;
33461 modeimm = insn_data[d->icode].operand[4].mode;
33462
33463 if (VECTOR_MODE_P (modev2))
33464 op0 = safe_vector_operand (op0, modev2);
33465 if (VECTOR_MODE_P (modev3))
33466 op1 = safe_vector_operand (op1, modev3);
33467
33468 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
33469 op0 = copy_to_mode_reg (modev2, op0);
33470 if ((optimize && !register_operand (op1, modev3))
33471 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
33472 op1 = copy_to_mode_reg (modev3, op1);
33473
33474 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
33475 {
33476 error ("the third argument must be an 8-bit immediate");
33477 return const0_rtx;
33478 }
33479
33480 if (d->code == IX86_BUILTIN_PCMPISTRI128)
33481 {
33482 if (optimize || !target
33483 || GET_MODE (target) != tmode0
33484 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
33485 target = gen_reg_rtx (tmode0);
33486
33487 scratch1 = gen_reg_rtx (tmode1);
33488
33489 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
33490 }
33491 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
33492 {
33493 if (optimize || !target
33494 || GET_MODE (target) != tmode1
33495 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
33496 target = gen_reg_rtx (tmode1);
33497
33498 scratch0 = gen_reg_rtx (tmode0);
33499
33500 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
33501 }
33502 else
33503 {
33504 gcc_assert (d->flag);
33505
33506 scratch0 = gen_reg_rtx (tmode0);
33507 scratch1 = gen_reg_rtx (tmode1);
33508
33509 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
33510 }
33511
33512 if (! pat)
33513 return 0;
33514
33515 emit_insn (pat);
33516
33517 if (d->flag)
33518 {
33519 target = gen_reg_rtx (SImode);
33520 emit_move_insn (target, const0_rtx);
33521 target = gen_rtx_SUBREG (QImode, target, 0);
33522
33523 emit_insn
33524 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33525 gen_rtx_fmt_ee (EQ, QImode,
33526 gen_rtx_REG ((enum machine_mode) d->flag,
33527 FLAGS_REG),
33528 const0_rtx)));
33529 return SUBREG_REG (target);
33530 }
33531 else
33532 return target;
33533 }
33534
33535 /* Subroutine of ix86_expand_builtin to take care of insns with
33536 variable number of operands. */
33537
33538 static rtx
33539 ix86_expand_args_builtin (const struct builtin_description *d,
33540 tree exp, rtx target)
33541 {
33542 rtx pat, real_target;
33543 unsigned int i, nargs;
33544 unsigned int nargs_constant = 0;
33545 unsigned int mask_pos = 0;
33546 int num_memory = 0;
33547 struct
33548 {
33549 rtx op;
33550 enum machine_mode mode;
33551 } args[6];
33552 bool last_arg_count = false;
33553 enum insn_code icode = d->icode;
33554 const struct insn_data_d *insn_p = &insn_data[icode];
33555 enum machine_mode tmode = insn_p->operand[0].mode;
33556 enum machine_mode rmode = VOIDmode;
33557 bool swap = false;
33558 enum rtx_code comparison = d->comparison;
33559
33560 switch ((enum ix86_builtin_func_type) d->flag)
33561 {
33562 case V2DF_FTYPE_V2DF_ROUND:
33563 case V4DF_FTYPE_V4DF_ROUND:
33564 case V4SF_FTYPE_V4SF_ROUND:
33565 case V8SF_FTYPE_V8SF_ROUND:
33566 case V4SI_FTYPE_V4SF_ROUND:
33567 case V8SI_FTYPE_V8SF_ROUND:
33568 return ix86_expand_sse_round (d, exp, target);
33569 case V4SI_FTYPE_V2DF_V2DF_ROUND:
33570 case V8SI_FTYPE_V4DF_V4DF_ROUND:
33571 case V16SI_FTYPE_V8DF_V8DF_ROUND:
33572 return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
33573 case INT_FTYPE_V8SF_V8SF_PTEST:
33574 case INT_FTYPE_V4DI_V4DI_PTEST:
33575 case INT_FTYPE_V4DF_V4DF_PTEST:
33576 case INT_FTYPE_V4SF_V4SF_PTEST:
33577 case INT_FTYPE_V2DI_V2DI_PTEST:
33578 case INT_FTYPE_V2DF_V2DF_PTEST:
33579 return ix86_expand_sse_ptest (d, exp, target);
33580 case FLOAT128_FTYPE_FLOAT128:
33581 case FLOAT_FTYPE_FLOAT:
33582 case INT_FTYPE_INT:
33583 case UINT64_FTYPE_INT:
33584 case UINT16_FTYPE_UINT16:
33585 case INT64_FTYPE_INT64:
33586 case INT64_FTYPE_V4SF:
33587 case INT64_FTYPE_V2DF:
33588 case INT_FTYPE_V16QI:
33589 case INT_FTYPE_V8QI:
33590 case INT_FTYPE_V8SF:
33591 case INT_FTYPE_V4DF:
33592 case INT_FTYPE_V4SF:
33593 case INT_FTYPE_V2DF:
33594 case INT_FTYPE_V32QI:
33595 case V16QI_FTYPE_V16QI:
33596 case V8SI_FTYPE_V8SF:
33597 case V8SI_FTYPE_V4SI:
33598 case V8HI_FTYPE_V8HI:
33599 case V8HI_FTYPE_V16QI:
33600 case V8QI_FTYPE_V8QI:
33601 case V8SF_FTYPE_V8SF:
33602 case V8SF_FTYPE_V8SI:
33603 case V8SF_FTYPE_V4SF:
33604 case V8SF_FTYPE_V8HI:
33605 case V4SI_FTYPE_V4SI:
33606 case V4SI_FTYPE_V16QI:
33607 case V4SI_FTYPE_V4SF:
33608 case V4SI_FTYPE_V8SI:
33609 case V4SI_FTYPE_V8HI:
33610 case V4SI_FTYPE_V4DF:
33611 case V4SI_FTYPE_V2DF:
33612 case V4HI_FTYPE_V4HI:
33613 case V4DF_FTYPE_V4DF:
33614 case V4DF_FTYPE_V4SI:
33615 case V4DF_FTYPE_V4SF:
33616 case V4DF_FTYPE_V2DF:
33617 case V4SF_FTYPE_V4SF:
33618 case V4SF_FTYPE_V4SI:
33619 case V4SF_FTYPE_V8SF:
33620 case V4SF_FTYPE_V4DF:
33621 case V4SF_FTYPE_V8HI:
33622 case V4SF_FTYPE_V2DF:
33623 case V2DI_FTYPE_V2DI:
33624 case V2DI_FTYPE_V16QI:
33625 case V2DI_FTYPE_V8HI:
33626 case V2DI_FTYPE_V4SI:
33627 case V2DF_FTYPE_V2DF:
33628 case V2DF_FTYPE_V4SI:
33629 case V2DF_FTYPE_V4DF:
33630 case V2DF_FTYPE_V4SF:
33631 case V2DF_FTYPE_V2SI:
33632 case V2SI_FTYPE_V2SI:
33633 case V2SI_FTYPE_V4SF:
33634 case V2SI_FTYPE_V2SF:
33635 case V2SI_FTYPE_V2DF:
33636 case V2SF_FTYPE_V2SF:
33637 case V2SF_FTYPE_V2SI:
33638 case V32QI_FTYPE_V32QI:
33639 case V32QI_FTYPE_V16QI:
33640 case V16HI_FTYPE_V16HI:
33641 case V16HI_FTYPE_V8HI:
33642 case V8SI_FTYPE_V8SI:
33643 case V16HI_FTYPE_V16QI:
33644 case V8SI_FTYPE_V16QI:
33645 case V4DI_FTYPE_V16QI:
33646 case V8SI_FTYPE_V8HI:
33647 case V4DI_FTYPE_V8HI:
33648 case V4DI_FTYPE_V4SI:
33649 case V4DI_FTYPE_V2DI:
33650 case HI_FTYPE_HI:
33651 case UINT_FTYPE_V2DF:
33652 case UINT_FTYPE_V4SF:
33653 case UINT64_FTYPE_V2DF:
33654 case UINT64_FTYPE_V4SF:
33655 case V16QI_FTYPE_V8DI:
33656 case V16HI_FTYPE_V16SI:
33657 case V16SI_FTYPE_HI:
33658 case V16SI_FTYPE_V16SI:
33659 case V16SI_FTYPE_INT:
33660 case V16SF_FTYPE_FLOAT:
33661 case V16SF_FTYPE_V8SF:
33662 case V16SI_FTYPE_V8SI:
33663 case V16SF_FTYPE_V4SF:
33664 case V16SI_FTYPE_V4SI:
33665 case V16SF_FTYPE_V16SF:
33666 case V8HI_FTYPE_V8DI:
33667 case V8UHI_FTYPE_V8UHI:
33668 case V8SI_FTYPE_V8DI:
33669 case V8USI_FTYPE_V8USI:
33670 case V8SF_FTYPE_V8DF:
33671 case V8DI_FTYPE_QI:
33672 case V8DI_FTYPE_INT64:
33673 case V8DI_FTYPE_V4DI:
33674 case V8DI_FTYPE_V8DI:
33675 case V8DF_FTYPE_DOUBLE:
33676 case V8DF_FTYPE_V4DF:
33677 case V8DF_FTYPE_V2DF:
33678 case V8DF_FTYPE_V8DF:
33679 case V8DF_FTYPE_V8SI:
33680 nargs = 1;
33681 break;
33682 case V4SF_FTYPE_V4SF_VEC_MERGE:
33683 case V2DF_FTYPE_V2DF_VEC_MERGE:
33684 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
33685 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
33686 case V16QI_FTYPE_V16QI_V16QI:
33687 case V16QI_FTYPE_V8HI_V8HI:
33688 case V16SI_FTYPE_V16SI_V16SI:
33689 case V16SF_FTYPE_V16SF_V16SF:
33690 case V16SF_FTYPE_V16SF_V16SI:
33691 case V8QI_FTYPE_V8QI_V8QI:
33692 case V8QI_FTYPE_V4HI_V4HI:
33693 case V8HI_FTYPE_V8HI_V8HI:
33694 case V8HI_FTYPE_V16QI_V16QI:
33695 case V8HI_FTYPE_V4SI_V4SI:
33696 case V8SF_FTYPE_V8SF_V8SF:
33697 case V8SF_FTYPE_V8SF_V8SI:
33698 case V8DI_FTYPE_V8DI_V8DI:
33699 case V8DF_FTYPE_V8DF_V8DF:
33700 case V8DF_FTYPE_V8DF_V8DI:
33701 case V4SI_FTYPE_V4SI_V4SI:
33702 case V4SI_FTYPE_V8HI_V8HI:
33703 case V4SI_FTYPE_V4SF_V4SF:
33704 case V4SI_FTYPE_V2DF_V2DF:
33705 case V4HI_FTYPE_V4HI_V4HI:
33706 case V4HI_FTYPE_V8QI_V8QI:
33707 case V4HI_FTYPE_V2SI_V2SI:
33708 case V4DF_FTYPE_V4DF_V4DF:
33709 case V4DF_FTYPE_V4DF_V4DI:
33710 case V4SF_FTYPE_V4SF_V4SF:
33711 case V4SF_FTYPE_V4SF_V4SI:
33712 case V4SF_FTYPE_V4SF_V2SI:
33713 case V4SF_FTYPE_V4SF_V2DF:
33714 case V4SF_FTYPE_V4SF_UINT:
33715 case V4SF_FTYPE_V4SF_UINT64:
33716 case V4SF_FTYPE_V4SF_DI:
33717 case V4SF_FTYPE_V4SF_SI:
33718 case V2DI_FTYPE_V2DI_V2DI:
33719 case V2DI_FTYPE_V16QI_V16QI:
33720 case V2DI_FTYPE_V4SI_V4SI:
33721 case V2UDI_FTYPE_V4USI_V4USI:
33722 case V2DI_FTYPE_V2DI_V16QI:
33723 case V2DI_FTYPE_V2DF_V2DF:
33724 case V2SI_FTYPE_V2SI_V2SI:
33725 case V2SI_FTYPE_V4HI_V4HI:
33726 case V2SI_FTYPE_V2SF_V2SF:
33727 case V2DF_FTYPE_V2DF_V2DF:
33728 case V2DF_FTYPE_V2DF_V4SF:
33729 case V2DF_FTYPE_V2DF_V2DI:
33730 case V2DF_FTYPE_V2DF_DI:
33731 case V2DF_FTYPE_V2DF_SI:
33732 case V2DF_FTYPE_V2DF_UINT:
33733 case V2DF_FTYPE_V2DF_UINT64:
33734 case V2SF_FTYPE_V2SF_V2SF:
33735 case V1DI_FTYPE_V1DI_V1DI:
33736 case V1DI_FTYPE_V8QI_V8QI:
33737 case V1DI_FTYPE_V2SI_V2SI:
33738 case V32QI_FTYPE_V16HI_V16HI:
33739 case V16HI_FTYPE_V8SI_V8SI:
33740 case V32QI_FTYPE_V32QI_V32QI:
33741 case V16HI_FTYPE_V32QI_V32QI:
33742 case V16HI_FTYPE_V16HI_V16HI:
33743 case V8SI_FTYPE_V4DF_V4DF:
33744 case V8SI_FTYPE_V8SI_V8SI:
33745 case V8SI_FTYPE_V16HI_V16HI:
33746 case V4DI_FTYPE_V4DI_V4DI:
33747 case V4DI_FTYPE_V8SI_V8SI:
33748 case V4UDI_FTYPE_V8USI_V8USI:
33749 case QI_FTYPE_V8DI_V8DI:
33750 case HI_FTYPE_V16SI_V16SI:
33751 if (comparison == UNKNOWN)
33752 return ix86_expand_binop_builtin (icode, exp, target);
33753 nargs = 2;
33754 break;
33755 case V4SF_FTYPE_V4SF_V4SF_SWAP:
33756 case V2DF_FTYPE_V2DF_V2DF_SWAP:
33757 gcc_assert (comparison != UNKNOWN);
33758 nargs = 2;
33759 swap = true;
33760 break;
33761 case V16HI_FTYPE_V16HI_V8HI_COUNT:
33762 case V16HI_FTYPE_V16HI_SI_COUNT:
33763 case V8SI_FTYPE_V8SI_V4SI_COUNT:
33764 case V8SI_FTYPE_V8SI_SI_COUNT:
33765 case V4DI_FTYPE_V4DI_V2DI_COUNT:
33766 case V4DI_FTYPE_V4DI_INT_COUNT:
33767 case V8HI_FTYPE_V8HI_V8HI_COUNT:
33768 case V8HI_FTYPE_V8HI_SI_COUNT:
33769 case V4SI_FTYPE_V4SI_V4SI_COUNT:
33770 case V4SI_FTYPE_V4SI_SI_COUNT:
33771 case V4HI_FTYPE_V4HI_V4HI_COUNT:
33772 case V4HI_FTYPE_V4HI_SI_COUNT:
33773 case V2DI_FTYPE_V2DI_V2DI_COUNT:
33774 case V2DI_FTYPE_V2DI_SI_COUNT:
33775 case V2SI_FTYPE_V2SI_V2SI_COUNT:
33776 case V2SI_FTYPE_V2SI_SI_COUNT:
33777 case V1DI_FTYPE_V1DI_V1DI_COUNT:
33778 case V1DI_FTYPE_V1DI_SI_COUNT:
33779 nargs = 2;
33780 last_arg_count = true;
33781 break;
33782 case UINT64_FTYPE_UINT64_UINT64:
33783 case UINT_FTYPE_UINT_UINT:
33784 case UINT_FTYPE_UINT_USHORT:
33785 case UINT_FTYPE_UINT_UCHAR:
33786 case UINT16_FTYPE_UINT16_INT:
33787 case UINT8_FTYPE_UINT8_INT:
33788 case HI_FTYPE_HI_HI:
33789 case V16SI_FTYPE_V8DF_V8DF:
33790 nargs = 2;
33791 break;
33792 case V2DI_FTYPE_V2DI_INT_CONVERT:
33793 nargs = 2;
33794 rmode = V1TImode;
33795 nargs_constant = 1;
33796 break;
33797 case V4DI_FTYPE_V4DI_INT_CONVERT:
33798 nargs = 2;
33799 rmode = V2TImode;
33800 nargs_constant = 1;
33801 break;
33802 case V8HI_FTYPE_V8HI_INT:
33803 case V8HI_FTYPE_V8SF_INT:
33804 case V16HI_FTYPE_V16SF_INT:
33805 case V8HI_FTYPE_V4SF_INT:
33806 case V8SF_FTYPE_V8SF_INT:
33807 case V4SF_FTYPE_V16SF_INT:
33808 case V16SF_FTYPE_V16SF_INT:
33809 case V4SI_FTYPE_V4SI_INT:
33810 case V4SI_FTYPE_V8SI_INT:
33811 case V4HI_FTYPE_V4HI_INT:
33812 case V4DF_FTYPE_V4DF_INT:
33813 case V4DF_FTYPE_V8DF_INT:
33814 case V4SF_FTYPE_V4SF_INT:
33815 case V4SF_FTYPE_V8SF_INT:
33816 case V2DI_FTYPE_V2DI_INT:
33817 case V2DF_FTYPE_V2DF_INT:
33818 case V2DF_FTYPE_V4DF_INT:
33819 case V16HI_FTYPE_V16HI_INT:
33820 case V8SI_FTYPE_V8SI_INT:
33821 case V16SI_FTYPE_V16SI_INT:
33822 case V4SI_FTYPE_V16SI_INT:
33823 case V4DI_FTYPE_V4DI_INT:
33824 case V2DI_FTYPE_V4DI_INT:
33825 case V4DI_FTYPE_V8DI_INT:
33826 case HI_FTYPE_HI_INT:
33827 nargs = 2;
33828 nargs_constant = 1;
33829 break;
33830 case V16QI_FTYPE_V16QI_V16QI_V16QI:
33831 case V8SF_FTYPE_V8SF_V8SF_V8SF:
33832 case V4DF_FTYPE_V4DF_V4DF_V4DF:
33833 case V4SF_FTYPE_V4SF_V4SF_V4SF:
33834 case V2DF_FTYPE_V2DF_V2DF_V2DF:
33835 case V32QI_FTYPE_V32QI_V32QI_V32QI:
33836 case HI_FTYPE_V16SI_V16SI_HI:
33837 case QI_FTYPE_V8DI_V8DI_QI:
33838 case V16HI_FTYPE_V16SI_V16HI_HI:
33839 case V16QI_FTYPE_V16SI_V16QI_HI:
33840 case V16QI_FTYPE_V8DI_V16QI_QI:
33841 case V16SF_FTYPE_V16SF_V16SF_HI:
33842 case V16SF_FTYPE_V16SF_V16SF_V16SF:
33843 case V16SF_FTYPE_V16SF_V16SI_V16SF:
33844 case V16SF_FTYPE_V16SI_V16SF_HI:
33845 case V16SF_FTYPE_V16SI_V16SF_V16SF:
33846 case V16SF_FTYPE_V4SF_V16SF_HI:
33847 case V16SI_FTYPE_SI_V16SI_HI:
33848 case V16SI_FTYPE_V16HI_V16SI_HI:
33849 case V16SI_FTYPE_V16QI_V16SI_HI:
33850 case V16SI_FTYPE_V16SF_V16SI_HI:
33851 case V16SI_FTYPE_V16SI_V16SI_HI:
33852 case V16SI_FTYPE_V16SI_V16SI_V16SI:
33853 case V16SI_FTYPE_V4SI_V16SI_HI:
33854 case V2DI_FTYPE_V2DI_V2DI_V2DI:
33855 case V4DI_FTYPE_V4DI_V4DI_V4DI:
33856 case V8DF_FTYPE_V2DF_V8DF_QI:
33857 case V8DF_FTYPE_V4DF_V8DF_QI:
33858 case V8DF_FTYPE_V8DF_V8DF_QI:
33859 case V8DF_FTYPE_V8DF_V8DF_V8DF:
33860 case V8DF_FTYPE_V8DF_V8DI_V8DF:
33861 case V8DF_FTYPE_V8DI_V8DF_V8DF:
33862 case V8DF_FTYPE_V8SF_V8DF_QI:
33863 case V8DF_FTYPE_V8SI_V8DF_QI:
33864 case V8DI_FTYPE_DI_V8DI_QI:
33865 case V8DI_FTYPE_V16QI_V8DI_QI:
33866 case V8DI_FTYPE_V2DI_V8DI_QI:
33867 case V8DI_FTYPE_V4DI_V8DI_QI:
33868 case V8DI_FTYPE_V8DI_V8DI_QI:
33869 case V8DI_FTYPE_V8DI_V8DI_V8DI:
33870 case V8DI_FTYPE_V8HI_V8DI_QI:
33871 case V8DI_FTYPE_V8SI_V8DI_QI:
33872 case V8HI_FTYPE_V8DI_V8HI_QI:
33873 case V8SF_FTYPE_V8DF_V8SF_QI:
33874 case V8SI_FTYPE_V8DF_V8SI_QI:
33875 case V8SI_FTYPE_V8DI_V8SI_QI:
33876 case V4SI_FTYPE_V4SI_V4SI_V4SI:
33877 nargs = 3;
33878 break;
33879 case V32QI_FTYPE_V32QI_V32QI_INT:
33880 case V16HI_FTYPE_V16HI_V16HI_INT:
33881 case V16QI_FTYPE_V16QI_V16QI_INT:
33882 case V4DI_FTYPE_V4DI_V4DI_INT:
33883 case V8HI_FTYPE_V8HI_V8HI_INT:
33884 case V8SI_FTYPE_V8SI_V8SI_INT:
33885 case V8SI_FTYPE_V8SI_V4SI_INT:
33886 case V8SF_FTYPE_V8SF_V8SF_INT:
33887 case V8SF_FTYPE_V8SF_V4SF_INT:
33888 case V4SI_FTYPE_V4SI_V4SI_INT:
33889 case V4DF_FTYPE_V4DF_V4DF_INT:
33890 case V16SF_FTYPE_V16SF_V16SF_INT:
33891 case V16SF_FTYPE_V16SF_V4SF_INT:
33892 case V16SI_FTYPE_V16SI_V4SI_INT:
33893 case V4DF_FTYPE_V4DF_V2DF_INT:
33894 case V4SF_FTYPE_V4SF_V4SF_INT:
33895 case V2DI_FTYPE_V2DI_V2DI_INT:
33896 case V4DI_FTYPE_V4DI_V2DI_INT:
33897 case V2DF_FTYPE_V2DF_V2DF_INT:
33898 case QI_FTYPE_V8DI_V8DI_INT:
33899 case QI_FTYPE_V8DF_V8DF_INT:
33900 case QI_FTYPE_V2DF_V2DF_INT:
33901 case QI_FTYPE_V4SF_V4SF_INT:
33902 case HI_FTYPE_V16SI_V16SI_INT:
33903 case HI_FTYPE_V16SF_V16SF_INT:
33904 nargs = 3;
33905 nargs_constant = 1;
33906 break;
33907 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
33908 nargs = 3;
33909 rmode = V4DImode;
33910 nargs_constant = 1;
33911 break;
33912 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
33913 nargs = 3;
33914 rmode = V2DImode;
33915 nargs_constant = 1;
33916 break;
33917 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
33918 nargs = 3;
33919 rmode = DImode;
33920 nargs_constant = 1;
33921 break;
33922 case V2DI_FTYPE_V2DI_UINT_UINT:
33923 nargs = 3;
33924 nargs_constant = 2;
33925 break;
33926 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI:
33927 case V16SF_FTYPE_V16SF_V16SI_V16SF_HI:
33928 case V16SF_FTYPE_V16SI_V16SF_V16SF_HI:
33929 case V16SI_FTYPE_V16SI_V16SI_V16SI_HI:
33930 case V16SI_FTYPE_V16SI_V4SI_V16SI_HI:
33931 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI:
33932 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI:
33933 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI:
33934 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI:
33935 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI:
33936 case V8DF_FTYPE_V8DF_V8DI_V8DF_QI:
33937 case V8DF_FTYPE_V8DI_V8DF_V8DF_QI:
33938 case V8DI_FTYPE_V16SI_V16SI_V8DI_QI:
33939 case V8DI_FTYPE_V8DI_SI_V8DI_V8DI:
33940 case V8DI_FTYPE_V8DI_V2DI_V8DI_QI:
33941 case V8DI_FTYPE_V8DI_V8DI_V8DI_QI:
33942 nargs = 4;
33943 break;
33944 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
33945 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
33946 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
33947 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
33948 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT:
33949 nargs = 4;
33950 nargs_constant = 1;
33951 break;
33952 case QI_FTYPE_V2DF_V2DF_INT_QI:
33953 case QI_FTYPE_V4SF_V4SF_INT_QI:
33954 nargs = 4;
33955 mask_pos = 1;
33956 nargs_constant = 1;
33957 break;
33958 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
33959 nargs = 4;
33960 nargs_constant = 2;
33961 break;
33962 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED:
33963 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG:
33964 nargs = 4;
33965 break;
33966 case QI_FTYPE_V8DI_V8DI_INT_QI:
33967 case HI_FTYPE_V16SI_V16SI_INT_HI:
33968 case QI_FTYPE_V8DF_V8DF_INT_QI:
33969 case HI_FTYPE_V16SF_V16SF_INT_HI:
33970 mask_pos = 1;
33971 nargs = 4;
33972 nargs_constant = 1;
33973 break;
33974 case V8DF_FTYPE_V8DF_INT_V8DF_QI:
33975 case V16SF_FTYPE_V16SF_INT_V16SF_HI:
33976 case V16HI_FTYPE_V16SF_INT_V16HI_HI:
33977 case V16SI_FTYPE_V16SI_INT_V16SI_HI:
33978 case V4SI_FTYPE_V16SI_INT_V4SI_QI:
33979 case V4DI_FTYPE_V8DI_INT_V4DI_QI:
33980 case V4DF_FTYPE_V8DF_INT_V4DF_QI:
33981 case V4SF_FTYPE_V16SF_INT_V4SF_QI:
33982 case V8DI_FTYPE_V8DI_INT_V8DI_QI:
33983 nargs = 4;
33984 mask_pos = 2;
33985 nargs_constant = 1;
33986 break;
33987 case V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI:
33988 case V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI:
33989 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI:
33990 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI:
33991 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI:
33992 case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI:
33993 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI:
33994 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI:
33995 case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI:
33996 case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI:
33997 nargs = 5;
33998 mask_pos = 2;
33999 nargs_constant = 1;
34000 break;
34001 case V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI:
34002 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI:
34003 case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI:
34004 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI:
34005 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI:
34006 nargs = 5;
34007 mask_pos = 1;
34008 nargs_constant = 1;
34009 break;
34010
34011 default:
34012 gcc_unreachable ();
34013 }
34014
34015 gcc_assert (nargs <= ARRAY_SIZE (args));
34016
34017 if (comparison != UNKNOWN)
34018 {
34019 gcc_assert (nargs == 2);
34020 return ix86_expand_sse_compare (d, exp, target, swap);
34021 }
34022
34023 if (rmode == VOIDmode || rmode == tmode)
34024 {
34025 if (optimize
34026 || target == 0
34027 || GET_MODE (target) != tmode
34028 || !insn_p->operand[0].predicate (target, tmode))
34029 target = gen_reg_rtx (tmode);
34030 real_target = target;
34031 }
34032 else
34033 {
34034 real_target = gen_reg_rtx (tmode);
34035 target = simplify_gen_subreg (rmode, real_target, tmode, 0);
34036 }
34037
34038 for (i = 0; i < nargs; i++)
34039 {
34040 tree arg = CALL_EXPR_ARG (exp, i);
34041 rtx op = expand_normal (arg);
34042 enum machine_mode mode = insn_p->operand[i + 1].mode;
34043 bool match = insn_p->operand[i + 1].predicate (op, mode);
34044
34045 if (last_arg_count && (i + 1) == nargs)
34046 {
34047 /* SIMD shift insns take either an 8-bit immediate or
34048 register as count. But builtin functions take int as
34049 count. If count doesn't match, we put it in register. */
34050 if (!match)
34051 {
34052 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
34053 if (!insn_p->operand[i + 1].predicate (op, mode))
34054 op = copy_to_reg (op);
34055 }
34056 }
34057 else if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
34058 (!mask_pos && (nargs - i) <= nargs_constant))
34059 {
34060 if (!match)
34061 switch (icode)
34062 {
34063 case CODE_FOR_avx_vinsertf128v4di:
34064 case CODE_FOR_avx_vextractf128v4di:
34065 error ("the last argument must be an 1-bit immediate");
34066 return const0_rtx;
34067
34068 case CODE_FOR_avx512f_cmpv8di3_mask:
34069 case CODE_FOR_avx512f_cmpv16si3_mask:
34070 case CODE_FOR_avx512f_ucmpv8di3_mask:
34071 case CODE_FOR_avx512f_ucmpv16si3_mask:
34072 error ("the last argument must be a 3-bit immediate");
34073 return const0_rtx;
34074
34075 case CODE_FOR_sse4_1_roundsd:
34076 case CODE_FOR_sse4_1_roundss:
34077
34078 case CODE_FOR_sse4_1_roundpd:
34079 case CODE_FOR_sse4_1_roundps:
34080 case CODE_FOR_avx_roundpd256:
34081 case CODE_FOR_avx_roundps256:
34082
34083 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix:
34084 case CODE_FOR_sse4_1_roundps_sfix:
34085 case CODE_FOR_avx_roundpd_vec_pack_sfix256:
34086 case CODE_FOR_avx_roundps_sfix256:
34087
34088 case CODE_FOR_sse4_1_blendps:
34089 case CODE_FOR_avx_blendpd256:
34090 case CODE_FOR_avx_vpermilv4df:
34091 case CODE_FOR_avx512f_getmantv8df_mask:
34092 case CODE_FOR_avx512f_getmantv16sf_mask:
34093 case CODE_FOR_avx512vl_getmantv8sf_mask:
34094 case CODE_FOR_avx512vl_getmantv4df_mask:
34095 case CODE_FOR_avx512vl_getmantv4sf_mask:
34096 case CODE_FOR_avx512vl_getmantv2df_mask:
34097 case CODE_FOR_avx512dq_rangepv8df_mask_round:
34098 case CODE_FOR_avx512dq_rangepv16sf_mask_round:
34099 case CODE_FOR_avx512dq_rangepv4df_mask:
34100 case CODE_FOR_avx512dq_rangepv8sf_mask:
34101 case CODE_FOR_avx512dq_rangepv2df_mask:
34102 case CODE_FOR_avx512dq_rangepv4sf_mask:
34103 error ("the last argument must be a 4-bit immediate");
34104 return const0_rtx;
34105
34106 case CODE_FOR_sha1rnds4:
34107 case CODE_FOR_sse4_1_blendpd:
34108 case CODE_FOR_avx_vpermilv2df:
34109 case CODE_FOR_xop_vpermil2v2df3:
34110 case CODE_FOR_xop_vpermil2v4sf3:
34111 case CODE_FOR_xop_vpermil2v4df3:
34112 case CODE_FOR_xop_vpermil2v8sf3:
34113 case CODE_FOR_avx512f_vinsertf32x4_mask:
34114 case CODE_FOR_avx512f_vinserti32x4_mask:
34115 case CODE_FOR_avx512f_vextractf32x4_mask:
34116 case CODE_FOR_avx512f_vextracti32x4_mask:
34117 case CODE_FOR_sse2_shufpd:
34118 case CODE_FOR_sse2_shufpd_mask:
34119 case CODE_FOR_avx512dq_shuf_f64x2_mask:
34120 case CODE_FOR_avx512dq_shuf_i64x2_mask:
34121 case CODE_FOR_avx512vl_shuf_i32x4_mask:
34122 case CODE_FOR_avx512vl_shuf_f32x4_mask:
34123 error ("the last argument must be a 2-bit immediate");
34124 return const0_rtx;
34125
34126 case CODE_FOR_avx_vextractf128v4df:
34127 case CODE_FOR_avx_vextractf128v8sf:
34128 case CODE_FOR_avx_vextractf128v8si:
34129 case CODE_FOR_avx_vinsertf128v4df:
34130 case CODE_FOR_avx_vinsertf128v8sf:
34131 case CODE_FOR_avx_vinsertf128v8si:
34132 case CODE_FOR_avx512f_vinsertf64x4_mask:
34133 case CODE_FOR_avx512f_vinserti64x4_mask:
34134 case CODE_FOR_avx512f_vextractf64x4_mask:
34135 case CODE_FOR_avx512f_vextracti64x4_mask:
34136 case CODE_FOR_avx512dq_vinsertf32x8_mask:
34137 case CODE_FOR_avx512dq_vinserti32x8_mask:
34138 case CODE_FOR_avx512vl_vinsertv4df:
34139 case CODE_FOR_avx512vl_vinsertv4di:
34140 case CODE_FOR_avx512vl_vinsertv8sf:
34141 case CODE_FOR_avx512vl_vinsertv8si:
34142 error ("the last argument must be a 1-bit immediate");
34143 return const0_rtx;
34144
34145 case CODE_FOR_avx_vmcmpv2df3:
34146 case CODE_FOR_avx_vmcmpv4sf3:
34147 case CODE_FOR_avx_cmpv2df3:
34148 case CODE_FOR_avx_cmpv4sf3:
34149 case CODE_FOR_avx_cmpv4df3:
34150 case CODE_FOR_avx_cmpv8sf3:
34151 case CODE_FOR_avx512f_cmpv8df3_mask:
34152 case CODE_FOR_avx512f_cmpv16sf3_mask:
34153 case CODE_FOR_avx512f_vmcmpv2df3_mask:
34154 case CODE_FOR_avx512f_vmcmpv4sf3_mask:
34155 error ("the last argument must be a 5-bit immediate");
34156 return const0_rtx;
34157
34158 default:
34159 switch (nargs_constant)
34160 {
34161 case 2:
34162 if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
34163 (!mask_pos && (nargs - i) == nargs_constant))
34164 {
34165 error ("the next to last argument must be an 8-bit immediate");
34166 break;
34167 }
34168 case 1:
34169 error ("the last argument must be an 8-bit immediate");
34170 break;
34171 default:
34172 gcc_unreachable ();
34173 }
34174 return const0_rtx;
34175 }
34176 }
34177 else
34178 {
34179 if (VECTOR_MODE_P (mode))
34180 op = safe_vector_operand (op, mode);
34181
34182 /* If we aren't optimizing, only allow one memory operand to
34183 be generated. */
34184 if (memory_operand (op, mode))
34185 num_memory++;
34186
34187 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34188 {
34189 if (optimize || !match || num_memory > 1)
34190 op = copy_to_mode_reg (mode, op);
34191 }
34192 else
34193 {
34194 op = copy_to_reg (op);
34195 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
34196 }
34197 }
34198
34199 args[i].op = op;
34200 args[i].mode = mode;
34201 }
34202
34203 switch (nargs)
34204 {
34205 case 1:
34206 pat = GEN_FCN (icode) (real_target, args[0].op);
34207 break;
34208 case 2:
34209 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
34210 break;
34211 case 3:
34212 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34213 args[2].op);
34214 break;
34215 case 4:
34216 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34217 args[2].op, args[3].op);
34218 break;
34219 case 5:
34220 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34221 args[2].op, args[3].op, args[4].op);
34222 case 6:
34223 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34224 args[2].op, args[3].op, args[4].op,
34225 args[5].op);
34226 break;
34227 default:
34228 gcc_unreachable ();
34229 }
34230
34231 if (! pat)
34232 return 0;
34233
34234 emit_insn (pat);
34235 return target;
34236 }
34237
34238 /* Transform pattern of following layout:
34239 (parallel [
34240 set (A B)
34241 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)])
34242 ])
34243 into:
34244 (set (A B))
34245
34246 Or:
34247 (parallel [ A B
34248 ...
34249 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)
34250 ...
34251 ])
34252 into:
34253 (parallel [ A B ... ]) */
34254
34255 static rtx
34256 ix86_erase_embedded_rounding (rtx pat)
34257 {
34258 if (GET_CODE (pat) == INSN)
34259 pat = PATTERN (pat);
34260
34261 gcc_assert (GET_CODE (pat) == PARALLEL);
34262
34263 if (XVECLEN (pat, 0) == 2)
34264 {
34265 rtx p0 = XVECEXP (pat, 0, 0);
34266 rtx p1 = XVECEXP (pat, 0, 1);
34267
34268 gcc_assert (GET_CODE (p0) == SET
34269 && GET_CODE (p1) == UNSPEC
34270 && XINT (p1, 1) == UNSPEC_EMBEDDED_ROUNDING);
34271
34272 return p0;
34273 }
34274 else
34275 {
34276 rtx *res = XALLOCAVEC (rtx, XVECLEN (pat, 0));
34277 int i = 0;
34278 int j = 0;
34279
34280 for (; i < XVECLEN (pat, 0); ++i)
34281 {
34282 rtx elem = XVECEXP (pat, 0, i);
34283 if (GET_CODE (elem) != UNSPEC
34284 || XINT (elem, 1) != UNSPEC_EMBEDDED_ROUNDING)
34285 res [j++] = elem;
34286 }
34287
34288 /* No more than 1 occurence was removed. */
34289 gcc_assert (j >= XVECLEN (pat, 0) - 1);
34290
34291 return gen_rtx_PARALLEL (GET_MODE (pat), gen_rtvec_v (j, res));
34292 }
34293 }
34294
34295 /* Subroutine of ix86_expand_round_builtin to take care of comi insns
34296 with rounding. */
34297 static rtx
34298 ix86_expand_sse_comi_round (const struct builtin_description *d,
34299 tree exp, rtx target)
34300 {
34301 rtx pat, set_dst;
34302 tree arg0 = CALL_EXPR_ARG (exp, 0);
34303 tree arg1 = CALL_EXPR_ARG (exp, 1);
34304 tree arg2 = CALL_EXPR_ARG (exp, 2);
34305 tree arg3 = CALL_EXPR_ARG (exp, 3);
34306 rtx op0 = expand_normal (arg0);
34307 rtx op1 = expand_normal (arg1);
34308 rtx op2 = expand_normal (arg2);
34309 rtx op3 = expand_normal (arg3);
34310 enum insn_code icode = d->icode;
34311 const struct insn_data_d *insn_p = &insn_data[icode];
34312 enum machine_mode mode0 = insn_p->operand[0].mode;
34313 enum machine_mode mode1 = insn_p->operand[1].mode;
34314 enum rtx_code comparison = UNEQ;
34315 bool need_ucomi = false;
34316
34317 /* See avxintrin.h for values. */
34318 enum rtx_code comi_comparisons[32] =
34319 {
34320 UNEQ, GT, GE, UNORDERED, LTGT, UNLE, UNLT, ORDERED, UNEQ, UNLT,
34321 UNLE, LT, LTGT, GE, GT, LT, UNEQ, GT, GE, UNORDERED, LTGT, UNLE,
34322 UNLT, ORDERED, UNEQ, UNLT, UNLE, LT, LTGT, GE, GT, LT
34323 };
34324 bool need_ucomi_values[32] =
34325 {
34326 true, false, false, true, true, false, false, true,
34327 true, false, false, true, true, false, false, true,
34328 false, true, true, false, false, true, true, false,
34329 false, true, true, false, false, true, true, false
34330 };
34331
34332 if (!CONST_INT_P (op2))
34333 {
34334 error ("the third argument must be comparison constant");
34335 return const0_rtx;
34336 }
34337 if (INTVAL (op2) < 0 || INTVAL (op2) >= 32)
34338 {
34339 error ("incorect comparison mode");
34340 return const0_rtx;
34341 }
34342
34343 if (!insn_p->operand[2].predicate (op3, SImode))
34344 {
34345 error ("incorrect rounding operand");
34346 return const0_rtx;
34347 }
34348
34349 comparison = comi_comparisons[INTVAL (op2)];
34350 need_ucomi = need_ucomi_values[INTVAL (op2)];
34351
34352 if (VECTOR_MODE_P (mode0))
34353 op0 = safe_vector_operand (op0, mode0);
34354 if (VECTOR_MODE_P (mode1))
34355 op1 = safe_vector_operand (op1, mode1);
34356
34357 target = gen_reg_rtx (SImode);
34358 emit_move_insn (target, const0_rtx);
34359 target = gen_rtx_SUBREG (QImode, target, 0);
34360
34361 if ((optimize && !register_operand (op0, mode0))
34362 || !insn_p->operand[0].predicate (op0, mode0))
34363 op0 = copy_to_mode_reg (mode0, op0);
34364 if ((optimize && !register_operand (op1, mode1))
34365 || !insn_p->operand[1].predicate (op1, mode1))
34366 op1 = copy_to_mode_reg (mode1, op1);
34367
34368 if (need_ucomi)
34369 icode = icode == CODE_FOR_sse_comi_round
34370 ? CODE_FOR_sse_ucomi_round
34371 : CODE_FOR_sse2_ucomi_round;
34372
34373 pat = GEN_FCN (icode) (op0, op1, op3);
34374 if (! pat)
34375 return 0;
34376
34377 /* Rounding operand can be either NO_ROUND or ROUND_SAE at this point. */
34378 if (INTVAL (op3) == NO_ROUND)
34379 {
34380 pat = ix86_erase_embedded_rounding (pat);
34381 if (! pat)
34382 return 0;
34383
34384 set_dst = SET_DEST (pat);
34385 }
34386 else
34387 {
34388 gcc_assert (GET_CODE (XVECEXP (pat, 0, 0)) == SET);
34389 set_dst = SET_DEST (XVECEXP (pat, 0, 0));
34390 }
34391
34392 emit_insn (pat);
34393 emit_insn (gen_rtx_SET (VOIDmode,
34394 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
34395 gen_rtx_fmt_ee (comparison, QImode,
34396 set_dst,
34397 const0_rtx)));
34398
34399 return SUBREG_REG (target);
34400 }
34401
34402 static rtx
34403 ix86_expand_round_builtin (const struct builtin_description *d,
34404 tree exp, rtx target)
34405 {
34406 rtx pat;
34407 unsigned int i, nargs;
34408 struct
34409 {
34410 rtx op;
34411 enum machine_mode mode;
34412 } args[6];
34413 enum insn_code icode = d->icode;
34414 const struct insn_data_d *insn_p = &insn_data[icode];
34415 enum machine_mode tmode = insn_p->operand[0].mode;
34416 unsigned int nargs_constant = 0;
34417 unsigned int redundant_embed_rnd = 0;
34418
34419 switch ((enum ix86_builtin_func_type) d->flag)
34420 {
34421 case UINT64_FTYPE_V2DF_INT:
34422 case UINT64_FTYPE_V4SF_INT:
34423 case UINT_FTYPE_V2DF_INT:
34424 case UINT_FTYPE_V4SF_INT:
34425 case INT64_FTYPE_V2DF_INT:
34426 case INT64_FTYPE_V4SF_INT:
34427 case INT_FTYPE_V2DF_INT:
34428 case INT_FTYPE_V4SF_INT:
34429 nargs = 2;
34430 break;
34431 case V4SF_FTYPE_V4SF_UINT_INT:
34432 case V4SF_FTYPE_V4SF_UINT64_INT:
34433 case V2DF_FTYPE_V2DF_UINT64_INT:
34434 case V4SF_FTYPE_V4SF_INT_INT:
34435 case V4SF_FTYPE_V4SF_INT64_INT:
34436 case V2DF_FTYPE_V2DF_INT64_INT:
34437 case V4SF_FTYPE_V4SF_V4SF_INT:
34438 case V2DF_FTYPE_V2DF_V2DF_INT:
34439 case V4SF_FTYPE_V4SF_V2DF_INT:
34440 case V2DF_FTYPE_V2DF_V4SF_INT:
34441 nargs = 3;
34442 break;
34443 case V8SF_FTYPE_V8DF_V8SF_QI_INT:
34444 case V8DF_FTYPE_V8DF_V8DF_QI_INT:
34445 case V8SI_FTYPE_V8DF_V8SI_QI_INT:
34446 case V16SF_FTYPE_V16SF_V16SF_HI_INT:
34447 case V16SF_FTYPE_V16SI_V16SF_HI_INT:
34448 case V16SI_FTYPE_V16SF_V16SI_HI_INT:
34449 case V8DF_FTYPE_V8SF_V8DF_QI_INT:
34450 case V16SF_FTYPE_V16HI_V16SF_HI_INT:
34451 case V2DF_FTYPE_V2DF_V2DF_V2DF_INT:
34452 case V4SF_FTYPE_V4SF_V4SF_V4SF_INT:
34453 nargs = 4;
34454 break;
34455 case V4SF_FTYPE_V4SF_V4SF_INT_INT:
34456 case V2DF_FTYPE_V2DF_V2DF_INT_INT:
34457 nargs_constant = 2;
34458 nargs = 4;
34459 break;
34460 case INT_FTYPE_V4SF_V4SF_INT_INT:
34461 case INT_FTYPE_V2DF_V2DF_INT_INT:
34462 return ix86_expand_sse_comi_round (d, exp, target);
34463 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT:
34464 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT:
34465 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT:
34466 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT:
34467 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT:
34468 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT:
34469 nargs = 5;
34470 break;
34471 case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT:
34472 case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT:
34473 nargs_constant = 4;
34474 nargs = 5;
34475 break;
34476 case QI_FTYPE_V8DF_V8DF_INT_QI_INT:
34477 case QI_FTYPE_V2DF_V2DF_INT_QI_INT:
34478 case HI_FTYPE_V16SF_V16SF_INT_HI_INT:
34479 case QI_FTYPE_V4SF_V4SF_INT_QI_INT:
34480 nargs_constant = 3;
34481 nargs = 5;
34482 break;
34483 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT:
34484 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT:
34485 nargs = 6;
34486 nargs_constant = 4;
34487 break;
34488 case V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT:
34489 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT:
34490 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT:
34491 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT:
34492 nargs = 6;
34493 nargs_constant = 3;
34494 break;
34495 default:
34496 gcc_unreachable ();
34497 }
34498 gcc_assert (nargs <= ARRAY_SIZE (args));
34499
34500 if (optimize
34501 || target == 0
34502 || GET_MODE (target) != tmode
34503 || !insn_p->operand[0].predicate (target, tmode))
34504 target = gen_reg_rtx (tmode);
34505
34506 for (i = 0; i < nargs; i++)
34507 {
34508 tree arg = CALL_EXPR_ARG (exp, i);
34509 rtx op = expand_normal (arg);
34510 enum machine_mode mode = insn_p->operand[i + 1].mode;
34511 bool match = insn_p->operand[i + 1].predicate (op, mode);
34512
34513 if (i == nargs - nargs_constant)
34514 {
34515 if (!match)
34516 {
34517 switch (icode)
34518 {
34519 case CODE_FOR_avx512f_getmantv8df_mask_round:
34520 case CODE_FOR_avx512f_getmantv16sf_mask_round:
34521 case CODE_FOR_avx512f_vgetmantv2df_round:
34522 case CODE_FOR_avx512f_vgetmantv4sf_round:
34523 error ("the immediate argument must be a 4-bit immediate");
34524 return const0_rtx;
34525 case CODE_FOR_avx512f_cmpv8df3_mask_round:
34526 case CODE_FOR_avx512f_cmpv16sf3_mask_round:
34527 case CODE_FOR_avx512f_vmcmpv2df3_mask_round:
34528 case CODE_FOR_avx512f_vmcmpv4sf3_mask_round:
34529 error ("the immediate argument must be a 5-bit immediate");
34530 return const0_rtx;
34531 default:
34532 error ("the immediate argument must be an 8-bit immediate");
34533 return const0_rtx;
34534 }
34535 }
34536 }
34537 else if (i == nargs-1)
34538 {
34539 if (!insn_p->operand[nargs].predicate (op, SImode))
34540 {
34541 error ("incorrect rounding operand");
34542 return const0_rtx;
34543 }
34544
34545 /* If there is no rounding use normal version of the pattern. */
34546 if (INTVAL (op) == NO_ROUND)
34547 redundant_embed_rnd = 1;
34548 }
34549 else
34550 {
34551 if (VECTOR_MODE_P (mode))
34552 op = safe_vector_operand (op, mode);
34553
34554 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34555 {
34556 if (optimize || !match)
34557 op = copy_to_mode_reg (mode, op);
34558 }
34559 else
34560 {
34561 op = copy_to_reg (op);
34562 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
34563 }
34564 }
34565
34566 args[i].op = op;
34567 args[i].mode = mode;
34568 }
34569
34570 switch (nargs)
34571 {
34572 case 1:
34573 pat = GEN_FCN (icode) (target, args[0].op);
34574 break;
34575 case 2:
34576 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
34577 break;
34578 case 3:
34579 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34580 args[2].op);
34581 break;
34582 case 4:
34583 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34584 args[2].op, args[3].op);
34585 break;
34586 case 5:
34587 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34588 args[2].op, args[3].op, args[4].op);
34589 case 6:
34590 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34591 args[2].op, args[3].op, args[4].op,
34592 args[5].op);
34593 break;
34594 default:
34595 gcc_unreachable ();
34596 }
34597
34598 if (!pat)
34599 return 0;
34600
34601 if (redundant_embed_rnd)
34602 pat = ix86_erase_embedded_rounding (pat);
34603
34604 emit_insn (pat);
34605 return target;
34606 }
34607
34608 /* Subroutine of ix86_expand_builtin to take care of special insns
34609 with variable number of operands. */
34610
34611 static rtx
34612 ix86_expand_special_args_builtin (const struct builtin_description *d,
34613 tree exp, rtx target)
34614 {
34615 tree arg;
34616 rtx pat, op;
34617 unsigned int i, nargs, arg_adjust, memory;
34618 bool aligned_mem = false;
34619 struct
34620 {
34621 rtx op;
34622 enum machine_mode mode;
34623 } args[3];
34624 enum insn_code icode = d->icode;
34625 bool last_arg_constant = false;
34626 const struct insn_data_d *insn_p = &insn_data[icode];
34627 enum machine_mode tmode = insn_p->operand[0].mode;
34628 enum { load, store } klass;
34629
34630 switch ((enum ix86_builtin_func_type) d->flag)
34631 {
34632 case VOID_FTYPE_VOID:
34633 emit_insn (GEN_FCN (icode) (target));
34634 return 0;
34635 case VOID_FTYPE_UINT64:
34636 case VOID_FTYPE_UNSIGNED:
34637 nargs = 0;
34638 klass = store;
34639 memory = 0;
34640 break;
34641
34642 case INT_FTYPE_VOID:
34643 case USHORT_FTYPE_VOID:
34644 case UINT64_FTYPE_VOID:
34645 case UNSIGNED_FTYPE_VOID:
34646 nargs = 0;
34647 klass = load;
34648 memory = 0;
34649 break;
34650 case UINT64_FTYPE_PUNSIGNED:
34651 case V2DI_FTYPE_PV2DI:
34652 case V4DI_FTYPE_PV4DI:
34653 case V32QI_FTYPE_PCCHAR:
34654 case V16QI_FTYPE_PCCHAR:
34655 case V8SF_FTYPE_PCV4SF:
34656 case V8SF_FTYPE_PCFLOAT:
34657 case V4SF_FTYPE_PCFLOAT:
34658 case V4DF_FTYPE_PCV2DF:
34659 case V4DF_FTYPE_PCDOUBLE:
34660 case V2DF_FTYPE_PCDOUBLE:
34661 case VOID_FTYPE_PVOID:
34662 case V16SI_FTYPE_PV4SI:
34663 case V16SF_FTYPE_PV4SF:
34664 case V8DI_FTYPE_PV4DI:
34665 case V8DI_FTYPE_PV8DI:
34666 case V8DF_FTYPE_PV4DF:
34667 nargs = 1;
34668 klass = load;
34669 memory = 0;
34670 switch (icode)
34671 {
34672 case CODE_FOR_sse4_1_movntdqa:
34673 case CODE_FOR_avx2_movntdqa:
34674 case CODE_FOR_avx512f_movntdqa:
34675 aligned_mem = true;
34676 break;
34677 default:
34678 break;
34679 }
34680 break;
34681 case VOID_FTYPE_PV2SF_V4SF:
34682 case VOID_FTYPE_PV8DI_V8DI:
34683 case VOID_FTYPE_PV4DI_V4DI:
34684 case VOID_FTYPE_PV2DI_V2DI:
34685 case VOID_FTYPE_PCHAR_V32QI:
34686 case VOID_FTYPE_PCHAR_V16QI:
34687 case VOID_FTYPE_PFLOAT_V16SF:
34688 case VOID_FTYPE_PFLOAT_V8SF:
34689 case VOID_FTYPE_PFLOAT_V4SF:
34690 case VOID_FTYPE_PDOUBLE_V8DF:
34691 case VOID_FTYPE_PDOUBLE_V4DF:
34692 case VOID_FTYPE_PDOUBLE_V2DF:
34693 case VOID_FTYPE_PLONGLONG_LONGLONG:
34694 case VOID_FTYPE_PULONGLONG_ULONGLONG:
34695 case VOID_FTYPE_PINT_INT:
34696 nargs = 1;
34697 klass = store;
34698 /* Reserve memory operand for target. */
34699 memory = ARRAY_SIZE (args);
34700 switch (icode)
34701 {
34702 /* These builtins and instructions require the memory
34703 to be properly aligned. */
34704 case CODE_FOR_avx_movntv4di:
34705 case CODE_FOR_sse2_movntv2di:
34706 case CODE_FOR_avx_movntv8sf:
34707 case CODE_FOR_sse_movntv4sf:
34708 case CODE_FOR_sse4a_vmmovntv4sf:
34709 case CODE_FOR_avx_movntv4df:
34710 case CODE_FOR_sse2_movntv2df:
34711 case CODE_FOR_sse4a_vmmovntv2df:
34712 case CODE_FOR_sse2_movntidi:
34713 case CODE_FOR_sse_movntq:
34714 case CODE_FOR_sse2_movntisi:
34715 case CODE_FOR_avx512f_movntv16sf:
34716 case CODE_FOR_avx512f_movntv8df:
34717 case CODE_FOR_avx512f_movntv8di:
34718 aligned_mem = true;
34719 break;
34720 default:
34721 break;
34722 }
34723 break;
34724 case V4SF_FTYPE_V4SF_PCV2SF:
34725 case V2DF_FTYPE_V2DF_PCDOUBLE:
34726 nargs = 2;
34727 klass = load;
34728 memory = 1;
34729 break;
34730 case V8SF_FTYPE_PCV8SF_V8SI:
34731 case V4DF_FTYPE_PCV4DF_V4DI:
34732 case V4SF_FTYPE_PCV4SF_V4SI:
34733 case V2DF_FTYPE_PCV2DF_V2DI:
34734 case V8SI_FTYPE_PCV8SI_V8SI:
34735 case V4DI_FTYPE_PCV4DI_V4DI:
34736 case V4SI_FTYPE_PCV4SI_V4SI:
34737 case V2DI_FTYPE_PCV2DI_V2DI:
34738 nargs = 2;
34739 klass = load;
34740 memory = 0;
34741 break;
34742 case VOID_FTYPE_PV8DF_V8DF_QI:
34743 case VOID_FTYPE_PV16SF_V16SF_HI:
34744 case VOID_FTYPE_PV8DI_V8DI_QI:
34745 case VOID_FTYPE_PV16SI_V16SI_HI:
34746 switch (icode)
34747 {
34748 /* These builtins and instructions require the memory
34749 to be properly aligned. */
34750 case CODE_FOR_avx512f_storev16sf_mask:
34751 case CODE_FOR_avx512f_storev16si_mask:
34752 case CODE_FOR_avx512f_storev8df_mask:
34753 case CODE_FOR_avx512f_storev8di_mask:
34754 case CODE_FOR_avx512vl_storev8sf_mask:
34755 case CODE_FOR_avx512vl_storev8si_mask:
34756 case CODE_FOR_avx512vl_storev4df_mask:
34757 case CODE_FOR_avx512vl_storev4di_mask:
34758 case CODE_FOR_avx512vl_storev4sf_mask:
34759 case CODE_FOR_avx512vl_storev4si_mask:
34760 case CODE_FOR_avx512vl_storev2df_mask:
34761 case CODE_FOR_avx512vl_storev2di_mask:
34762 aligned_mem = true;
34763 break;
34764 default:
34765 break;
34766 }
34767 /* FALLTHRU */
34768 case VOID_FTYPE_PV8SF_V8SI_V8SF:
34769 case VOID_FTYPE_PV4DF_V4DI_V4DF:
34770 case VOID_FTYPE_PV4SF_V4SI_V4SF:
34771 case VOID_FTYPE_PV2DF_V2DI_V2DF:
34772 case VOID_FTYPE_PV8SI_V8SI_V8SI:
34773 case VOID_FTYPE_PV4DI_V4DI_V4DI:
34774 case VOID_FTYPE_PV4SI_V4SI_V4SI:
34775 case VOID_FTYPE_PV2DI_V2DI_V2DI:
34776 case VOID_FTYPE_PDOUBLE_V2DF_QI:
34777 case VOID_FTYPE_PFLOAT_V4SF_QI:
34778 case VOID_FTYPE_PV8SI_V8DI_QI:
34779 case VOID_FTYPE_PV8HI_V8DI_QI:
34780 case VOID_FTYPE_PV16HI_V16SI_HI:
34781 case VOID_FTYPE_PV16QI_V8DI_QI:
34782 case VOID_FTYPE_PV16QI_V16SI_HI:
34783 nargs = 2;
34784 klass = store;
34785 /* Reserve memory operand for target. */
34786 memory = ARRAY_SIZE (args);
34787 break;
34788 case V16SF_FTYPE_PCV16SF_V16SF_HI:
34789 case V16SI_FTYPE_PCV16SI_V16SI_HI:
34790 case V8DF_FTYPE_PCV8DF_V8DF_QI:
34791 case V8DI_FTYPE_PCV8DI_V8DI_QI:
34792 case V2DF_FTYPE_PCDOUBLE_V2DF_QI:
34793 case V4SF_FTYPE_PCFLOAT_V4SF_QI:
34794 nargs = 3;
34795 klass = load;
34796 memory = 0;
34797 switch (icode)
34798 {
34799 /* These builtins and instructions require the memory
34800 to be properly aligned. */
34801 case CODE_FOR_avx512f_loadv16sf_mask:
34802 case CODE_FOR_avx512f_loadv16si_mask:
34803 case CODE_FOR_avx512f_loadv8df_mask:
34804 case CODE_FOR_avx512f_loadv8di_mask:
34805 case CODE_FOR_avx512vl_loadv8sf_mask:
34806 case CODE_FOR_avx512vl_loadv8si_mask:
34807 case CODE_FOR_avx512vl_loadv4df_mask:
34808 case CODE_FOR_avx512vl_loadv4di_mask:
34809 case CODE_FOR_avx512vl_loadv4sf_mask:
34810 case CODE_FOR_avx512vl_loadv4si_mask:
34811 case CODE_FOR_avx512vl_loadv2df_mask:
34812 case CODE_FOR_avx512vl_loadv2di_mask:
34813 case CODE_FOR_avx512bw_loadv64qi_mask:
34814 case CODE_FOR_avx512vl_loadv32qi_mask:
34815 case CODE_FOR_avx512vl_loadv16qi_mask:
34816 case CODE_FOR_avx512bw_loadv32hi_mask:
34817 case CODE_FOR_avx512vl_loadv16hi_mask:
34818 case CODE_FOR_avx512vl_loadv8hi_mask:
34819 aligned_mem = true;
34820 break;
34821 default:
34822 break;
34823 }
34824 break;
34825 case VOID_FTYPE_UINT_UINT_UINT:
34826 case VOID_FTYPE_UINT64_UINT_UINT:
34827 case UCHAR_FTYPE_UINT_UINT_UINT:
34828 case UCHAR_FTYPE_UINT64_UINT_UINT:
34829 nargs = 3;
34830 klass = load;
34831 memory = ARRAY_SIZE (args);
34832 last_arg_constant = true;
34833 break;
34834 default:
34835 gcc_unreachable ();
34836 }
34837
34838 gcc_assert (nargs <= ARRAY_SIZE (args));
34839
34840 if (klass == store)
34841 {
34842 arg = CALL_EXPR_ARG (exp, 0);
34843 op = expand_normal (arg);
34844 gcc_assert (target == 0);
34845 if (memory)
34846 {
34847 op = ix86_zero_extend_to_Pmode (op);
34848 target = gen_rtx_MEM (tmode, op);
34849 /* target at this point has just BITS_PER_UNIT MEM_ALIGN
34850 on it. Try to improve it using get_pointer_alignment,
34851 and if the special builtin is one that requires strict
34852 mode alignment, also from it's GET_MODE_ALIGNMENT.
34853 Failure to do so could lead to ix86_legitimate_combined_insn
34854 rejecting all changes to such insns. */
34855 unsigned int align = get_pointer_alignment (arg);
34856 if (aligned_mem && align < GET_MODE_ALIGNMENT (tmode))
34857 align = GET_MODE_ALIGNMENT (tmode);
34858 if (MEM_ALIGN (target) < align)
34859 set_mem_align (target, align);
34860 }
34861 else
34862 target = force_reg (tmode, op);
34863 arg_adjust = 1;
34864 }
34865 else
34866 {
34867 arg_adjust = 0;
34868 if (optimize
34869 || target == 0
34870 || !register_operand (target, tmode)
34871 || GET_MODE (target) != tmode)
34872 target = gen_reg_rtx (tmode);
34873 }
34874
34875 for (i = 0; i < nargs; i++)
34876 {
34877 enum machine_mode mode = insn_p->operand[i + 1].mode;
34878 bool match;
34879
34880 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
34881 op = expand_normal (arg);
34882 match = insn_p->operand[i + 1].predicate (op, mode);
34883
34884 if (last_arg_constant && (i + 1) == nargs)
34885 {
34886 if (!match)
34887 {
34888 if (icode == CODE_FOR_lwp_lwpvalsi3
34889 || icode == CODE_FOR_lwp_lwpinssi3
34890 || icode == CODE_FOR_lwp_lwpvaldi3
34891 || icode == CODE_FOR_lwp_lwpinsdi3)
34892 error ("the last argument must be a 32-bit immediate");
34893 else
34894 error ("the last argument must be an 8-bit immediate");
34895 return const0_rtx;
34896 }
34897 }
34898 else
34899 {
34900 if (i == memory)
34901 {
34902 /* This must be the memory operand. */
34903 op = ix86_zero_extend_to_Pmode (op);
34904 op = gen_rtx_MEM (mode, op);
34905 /* op at this point has just BITS_PER_UNIT MEM_ALIGN
34906 on it. Try to improve it using get_pointer_alignment,
34907 and if the special builtin is one that requires strict
34908 mode alignment, also from it's GET_MODE_ALIGNMENT.
34909 Failure to do so could lead to ix86_legitimate_combined_insn
34910 rejecting all changes to such insns. */
34911 unsigned int align = get_pointer_alignment (arg);
34912 if (aligned_mem && align < GET_MODE_ALIGNMENT (mode))
34913 align = GET_MODE_ALIGNMENT (mode);
34914 if (MEM_ALIGN (op) < align)
34915 set_mem_align (op, align);
34916 }
34917 else
34918 {
34919 /* This must be register. */
34920 if (VECTOR_MODE_P (mode))
34921 op = safe_vector_operand (op, mode);
34922
34923 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34924 op = copy_to_mode_reg (mode, op);
34925 else
34926 {
34927 op = copy_to_reg (op);
34928 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
34929 }
34930 }
34931 }
34932
34933 args[i].op = op;
34934 args[i].mode = mode;
34935 }
34936
34937 switch (nargs)
34938 {
34939 case 0:
34940 pat = GEN_FCN (icode) (target);
34941 break;
34942 case 1:
34943 pat = GEN_FCN (icode) (target, args[0].op);
34944 break;
34945 case 2:
34946 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
34947 break;
34948 case 3:
34949 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
34950 break;
34951 default:
34952 gcc_unreachable ();
34953 }
34954
34955 if (! pat)
34956 return 0;
34957 emit_insn (pat);
34958 return klass == store ? 0 : target;
34959 }
34960
34961 /* Return the integer constant in ARG. Constrain it to be in the range
34962 of the subparts of VEC_TYPE; issue an error if not. */
34963
34964 static int
34965 get_element_number (tree vec_type, tree arg)
34966 {
34967 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
34968
34969 if (!tree_fits_uhwi_p (arg)
34970 || (elt = tree_to_uhwi (arg), elt > max))
34971 {
34972 error ("selector must be an integer constant in the range 0..%wi", max);
34973 return 0;
34974 }
34975
34976 return elt;
34977 }
34978
34979 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
34980 ix86_expand_vector_init. We DO have language-level syntax for this, in
34981 the form of (type){ init-list }. Except that since we can't place emms
34982 instructions from inside the compiler, we can't allow the use of MMX
34983 registers unless the user explicitly asks for it. So we do *not* define
34984 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
34985 we have builtins invoked by mmintrin.h that gives us license to emit
34986 these sorts of instructions. */
34987
34988 static rtx
34989 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
34990 {
34991 enum machine_mode tmode = TYPE_MODE (type);
34992 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
34993 int i, n_elt = GET_MODE_NUNITS (tmode);
34994 rtvec v = rtvec_alloc (n_elt);
34995
34996 gcc_assert (VECTOR_MODE_P (tmode));
34997 gcc_assert (call_expr_nargs (exp) == n_elt);
34998
34999 for (i = 0; i < n_elt; ++i)
35000 {
35001 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
35002 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
35003 }
35004
35005 if (!target || !register_operand (target, tmode))
35006 target = gen_reg_rtx (tmode);
35007
35008 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
35009 return target;
35010 }
35011
35012 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
35013 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
35014 had a language-level syntax for referencing vector elements. */
35015
35016 static rtx
35017 ix86_expand_vec_ext_builtin (tree exp, rtx target)
35018 {
35019 enum machine_mode tmode, mode0;
35020 tree arg0, arg1;
35021 int elt;
35022 rtx op0;
35023
35024 arg0 = CALL_EXPR_ARG (exp, 0);
35025 arg1 = CALL_EXPR_ARG (exp, 1);
35026
35027 op0 = expand_normal (arg0);
35028 elt = get_element_number (TREE_TYPE (arg0), arg1);
35029
35030 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
35031 mode0 = TYPE_MODE (TREE_TYPE (arg0));
35032 gcc_assert (VECTOR_MODE_P (mode0));
35033
35034 op0 = force_reg (mode0, op0);
35035
35036 if (optimize || !target || !register_operand (target, tmode))
35037 target = gen_reg_rtx (tmode);
35038
35039 ix86_expand_vector_extract (true, target, op0, elt);
35040
35041 return target;
35042 }
35043
35044 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
35045 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
35046 a language-level syntax for referencing vector elements. */
35047
35048 static rtx
35049 ix86_expand_vec_set_builtin (tree exp)
35050 {
35051 enum machine_mode tmode, mode1;
35052 tree arg0, arg1, arg2;
35053 int elt;
35054 rtx op0, op1, target;
35055
35056 arg0 = CALL_EXPR_ARG (exp, 0);
35057 arg1 = CALL_EXPR_ARG (exp, 1);
35058 arg2 = CALL_EXPR_ARG (exp, 2);
35059
35060 tmode = TYPE_MODE (TREE_TYPE (arg0));
35061 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
35062 gcc_assert (VECTOR_MODE_P (tmode));
35063
35064 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
35065 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
35066 elt = get_element_number (TREE_TYPE (arg0), arg2);
35067
35068 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
35069 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
35070
35071 op0 = force_reg (tmode, op0);
35072 op1 = force_reg (mode1, op1);
35073
35074 /* OP0 is the source of these builtin functions and shouldn't be
35075 modified. Create a copy, use it and return it as target. */
35076 target = gen_reg_rtx (tmode);
35077 emit_move_insn (target, op0);
35078 ix86_expand_vector_set (true, target, op1, elt);
35079
35080 return target;
35081 }
35082
35083 /* Expand an expression EXP that calls a built-in function,
35084 with result going to TARGET if that's convenient
35085 (and in mode MODE if that's convenient).
35086 SUBTARGET may be used as the target for computing one of EXP's operands.
35087 IGNORE is nonzero if the value is to be ignored. */
35088
35089 static rtx
35090 ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
35091 enum machine_mode mode, int ignore)
35092 {
35093 const struct builtin_description *d;
35094 size_t i;
35095 enum insn_code icode;
35096 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
35097 tree arg0, arg1, arg2, arg3, arg4;
35098 rtx op0, op1, op2, op3, op4, pat, insn;
35099 enum machine_mode mode0, mode1, mode2, mode3, mode4;
35100 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
35101
35102 /* For CPU builtins that can be folded, fold first and expand the fold. */
35103 switch (fcode)
35104 {
35105 case IX86_BUILTIN_CPU_INIT:
35106 {
35107 /* Make it call __cpu_indicator_init in libgcc. */
35108 tree call_expr, fndecl, type;
35109 type = build_function_type_list (integer_type_node, NULL_TREE);
35110 fndecl = build_fn_decl ("__cpu_indicator_init", type);
35111 call_expr = build_call_expr (fndecl, 0);
35112 return expand_expr (call_expr, target, mode, EXPAND_NORMAL);
35113 }
35114 case IX86_BUILTIN_CPU_IS:
35115 case IX86_BUILTIN_CPU_SUPPORTS:
35116 {
35117 tree arg0 = CALL_EXPR_ARG (exp, 0);
35118 tree fold_expr = fold_builtin_cpu (fndecl, &arg0);
35119 gcc_assert (fold_expr != NULL_TREE);
35120 return expand_expr (fold_expr, target, mode, EXPAND_NORMAL);
35121 }
35122 }
35123
35124 /* Determine whether the builtin function is available under the current ISA.
35125 Originally the builtin was not created if it wasn't applicable to the
35126 current ISA based on the command line switches. With function specific
35127 options, we need to check in the context of the function making the call
35128 whether it is supported. */
35129 if (ix86_builtins_isa[fcode].isa
35130 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
35131 {
35132 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
35133 NULL, (enum fpmath_unit) 0, false);
35134
35135 if (!opts)
35136 error ("%qE needs unknown isa option", fndecl);
35137 else
35138 {
35139 gcc_assert (opts != NULL);
35140 error ("%qE needs isa option %s", fndecl, opts);
35141 free (opts);
35142 }
35143 return const0_rtx;
35144 }
35145
35146 switch (fcode)
35147 {
35148 case IX86_BUILTIN_MASKMOVQ:
35149 case IX86_BUILTIN_MASKMOVDQU:
35150 icode = (fcode == IX86_BUILTIN_MASKMOVQ
35151 ? CODE_FOR_mmx_maskmovq
35152 : CODE_FOR_sse2_maskmovdqu);
35153 /* Note the arg order is different from the operand order. */
35154 arg1 = CALL_EXPR_ARG (exp, 0);
35155 arg2 = CALL_EXPR_ARG (exp, 1);
35156 arg0 = CALL_EXPR_ARG (exp, 2);
35157 op0 = expand_normal (arg0);
35158 op1 = expand_normal (arg1);
35159 op2 = expand_normal (arg2);
35160 mode0 = insn_data[icode].operand[0].mode;
35161 mode1 = insn_data[icode].operand[1].mode;
35162 mode2 = insn_data[icode].operand[2].mode;
35163
35164 op0 = ix86_zero_extend_to_Pmode (op0);
35165 op0 = gen_rtx_MEM (mode1, op0);
35166
35167 if (!insn_data[icode].operand[0].predicate (op0, mode0))
35168 op0 = copy_to_mode_reg (mode0, op0);
35169 if (!insn_data[icode].operand[1].predicate (op1, mode1))
35170 op1 = copy_to_mode_reg (mode1, op1);
35171 if (!insn_data[icode].operand[2].predicate (op2, mode2))
35172 op2 = copy_to_mode_reg (mode2, op2);
35173 pat = GEN_FCN (icode) (op0, op1, op2);
35174 if (! pat)
35175 return 0;
35176 emit_insn (pat);
35177 return 0;
35178
35179 case IX86_BUILTIN_LDMXCSR:
35180 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
35181 target = assign_386_stack_local (SImode, SLOT_TEMP);
35182 emit_move_insn (target, op0);
35183 emit_insn (gen_sse_ldmxcsr (target));
35184 return 0;
35185
35186 case IX86_BUILTIN_STMXCSR:
35187 target = assign_386_stack_local (SImode, SLOT_TEMP);
35188 emit_insn (gen_sse_stmxcsr (target));
35189 return copy_to_mode_reg (SImode, target);
35190
35191 case IX86_BUILTIN_CLFLUSH:
35192 arg0 = CALL_EXPR_ARG (exp, 0);
35193 op0 = expand_normal (arg0);
35194 icode = CODE_FOR_sse2_clflush;
35195 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35196 op0 = ix86_zero_extend_to_Pmode (op0);
35197
35198 emit_insn (gen_sse2_clflush (op0));
35199 return 0;
35200
35201 case IX86_BUILTIN_CLFLUSHOPT:
35202 arg0 = CALL_EXPR_ARG (exp, 0);
35203 op0 = expand_normal (arg0);
35204 icode = CODE_FOR_clflushopt;
35205 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35206 op0 = ix86_zero_extend_to_Pmode (op0);
35207
35208 emit_insn (gen_clflushopt (op0));
35209 return 0;
35210
35211 case IX86_BUILTIN_MONITOR:
35212 arg0 = CALL_EXPR_ARG (exp, 0);
35213 arg1 = CALL_EXPR_ARG (exp, 1);
35214 arg2 = CALL_EXPR_ARG (exp, 2);
35215 op0 = expand_normal (arg0);
35216 op1 = expand_normal (arg1);
35217 op2 = expand_normal (arg2);
35218 if (!REG_P (op0))
35219 op0 = ix86_zero_extend_to_Pmode (op0);
35220 if (!REG_P (op1))
35221 op1 = copy_to_mode_reg (SImode, op1);
35222 if (!REG_P (op2))
35223 op2 = copy_to_mode_reg (SImode, op2);
35224 emit_insn (ix86_gen_monitor (op0, op1, op2));
35225 return 0;
35226
35227 case IX86_BUILTIN_MWAIT:
35228 arg0 = CALL_EXPR_ARG (exp, 0);
35229 arg1 = CALL_EXPR_ARG (exp, 1);
35230 op0 = expand_normal (arg0);
35231 op1 = expand_normal (arg1);
35232 if (!REG_P (op0))
35233 op0 = copy_to_mode_reg (SImode, op0);
35234 if (!REG_P (op1))
35235 op1 = copy_to_mode_reg (SImode, op1);
35236 emit_insn (gen_sse3_mwait (op0, op1));
35237 return 0;
35238
35239 case IX86_BUILTIN_VEC_INIT_V2SI:
35240 case IX86_BUILTIN_VEC_INIT_V4HI:
35241 case IX86_BUILTIN_VEC_INIT_V8QI:
35242 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
35243
35244 case IX86_BUILTIN_VEC_EXT_V2DF:
35245 case IX86_BUILTIN_VEC_EXT_V2DI:
35246 case IX86_BUILTIN_VEC_EXT_V4SF:
35247 case IX86_BUILTIN_VEC_EXT_V4SI:
35248 case IX86_BUILTIN_VEC_EXT_V8HI:
35249 case IX86_BUILTIN_VEC_EXT_V2SI:
35250 case IX86_BUILTIN_VEC_EXT_V4HI:
35251 case IX86_BUILTIN_VEC_EXT_V16QI:
35252 return ix86_expand_vec_ext_builtin (exp, target);
35253
35254 case IX86_BUILTIN_VEC_SET_V2DI:
35255 case IX86_BUILTIN_VEC_SET_V4SF:
35256 case IX86_BUILTIN_VEC_SET_V4SI:
35257 case IX86_BUILTIN_VEC_SET_V8HI:
35258 case IX86_BUILTIN_VEC_SET_V4HI:
35259 case IX86_BUILTIN_VEC_SET_V16QI:
35260 return ix86_expand_vec_set_builtin (exp);
35261
35262 case IX86_BUILTIN_INFQ:
35263 case IX86_BUILTIN_HUGE_VALQ:
35264 {
35265 REAL_VALUE_TYPE inf;
35266 rtx tmp;
35267
35268 real_inf (&inf);
35269 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
35270
35271 tmp = validize_mem (force_const_mem (mode, tmp));
35272
35273 if (target == 0)
35274 target = gen_reg_rtx (mode);
35275
35276 emit_move_insn (target, tmp);
35277 return target;
35278 }
35279
35280 case IX86_BUILTIN_RDPMC:
35281 case IX86_BUILTIN_RDTSC:
35282 case IX86_BUILTIN_RDTSCP:
35283
35284 op0 = gen_reg_rtx (DImode);
35285 op1 = gen_reg_rtx (DImode);
35286
35287 if (fcode == IX86_BUILTIN_RDPMC)
35288 {
35289 arg0 = CALL_EXPR_ARG (exp, 0);
35290 op2 = expand_normal (arg0);
35291 if (!register_operand (op2, SImode))
35292 op2 = copy_to_mode_reg (SImode, op2);
35293
35294 insn = (TARGET_64BIT
35295 ? gen_rdpmc_rex64 (op0, op1, op2)
35296 : gen_rdpmc (op0, op2));
35297 emit_insn (insn);
35298 }
35299 else if (fcode == IX86_BUILTIN_RDTSC)
35300 {
35301 insn = (TARGET_64BIT
35302 ? gen_rdtsc_rex64 (op0, op1)
35303 : gen_rdtsc (op0));
35304 emit_insn (insn);
35305 }
35306 else
35307 {
35308 op2 = gen_reg_rtx (SImode);
35309
35310 insn = (TARGET_64BIT
35311 ? gen_rdtscp_rex64 (op0, op1, op2)
35312 : gen_rdtscp (op0, op2));
35313 emit_insn (insn);
35314
35315 arg0 = CALL_EXPR_ARG (exp, 0);
35316 op4 = expand_normal (arg0);
35317 if (!address_operand (op4, VOIDmode))
35318 {
35319 op4 = convert_memory_address (Pmode, op4);
35320 op4 = copy_addr_to_reg (op4);
35321 }
35322 emit_move_insn (gen_rtx_MEM (SImode, op4), op2);
35323 }
35324
35325 if (target == 0)
35326 {
35327 /* mode is VOIDmode if __builtin_rd* has been called
35328 without lhs. */
35329 if (mode == VOIDmode)
35330 return target;
35331 target = gen_reg_rtx (mode);
35332 }
35333
35334 if (TARGET_64BIT)
35335 {
35336 op1 = expand_simple_binop (DImode, ASHIFT, op1, GEN_INT (32),
35337 op1, 1, OPTAB_DIRECT);
35338 op0 = expand_simple_binop (DImode, IOR, op0, op1,
35339 op0, 1, OPTAB_DIRECT);
35340 }
35341
35342 emit_move_insn (target, op0);
35343 return target;
35344
35345 case IX86_BUILTIN_FXSAVE:
35346 case IX86_BUILTIN_FXRSTOR:
35347 case IX86_BUILTIN_FXSAVE64:
35348 case IX86_BUILTIN_FXRSTOR64:
35349 case IX86_BUILTIN_FNSTENV:
35350 case IX86_BUILTIN_FLDENV:
35351 mode0 = BLKmode;
35352 switch (fcode)
35353 {
35354 case IX86_BUILTIN_FXSAVE:
35355 icode = CODE_FOR_fxsave;
35356 break;
35357 case IX86_BUILTIN_FXRSTOR:
35358 icode = CODE_FOR_fxrstor;
35359 break;
35360 case IX86_BUILTIN_FXSAVE64:
35361 icode = CODE_FOR_fxsave64;
35362 break;
35363 case IX86_BUILTIN_FXRSTOR64:
35364 icode = CODE_FOR_fxrstor64;
35365 break;
35366 case IX86_BUILTIN_FNSTENV:
35367 icode = CODE_FOR_fnstenv;
35368 break;
35369 case IX86_BUILTIN_FLDENV:
35370 icode = CODE_FOR_fldenv;
35371 break;
35372 default:
35373 gcc_unreachable ();
35374 }
35375
35376 arg0 = CALL_EXPR_ARG (exp, 0);
35377 op0 = expand_normal (arg0);
35378
35379 if (!address_operand (op0, VOIDmode))
35380 {
35381 op0 = convert_memory_address (Pmode, op0);
35382 op0 = copy_addr_to_reg (op0);
35383 }
35384 op0 = gen_rtx_MEM (mode0, op0);
35385
35386 pat = GEN_FCN (icode) (op0);
35387 if (pat)
35388 emit_insn (pat);
35389 return 0;
35390
35391 case IX86_BUILTIN_XSAVE:
35392 case IX86_BUILTIN_XRSTOR:
35393 case IX86_BUILTIN_XSAVE64:
35394 case IX86_BUILTIN_XRSTOR64:
35395 case IX86_BUILTIN_XSAVEOPT:
35396 case IX86_BUILTIN_XSAVEOPT64:
35397 case IX86_BUILTIN_XSAVES:
35398 case IX86_BUILTIN_XRSTORS:
35399 case IX86_BUILTIN_XSAVES64:
35400 case IX86_BUILTIN_XRSTORS64:
35401 case IX86_BUILTIN_XSAVEC:
35402 case IX86_BUILTIN_XSAVEC64:
35403 arg0 = CALL_EXPR_ARG (exp, 0);
35404 arg1 = CALL_EXPR_ARG (exp, 1);
35405 op0 = expand_normal (arg0);
35406 op1 = expand_normal (arg1);
35407
35408 if (!address_operand (op0, VOIDmode))
35409 {
35410 op0 = convert_memory_address (Pmode, op0);
35411 op0 = copy_addr_to_reg (op0);
35412 }
35413 op0 = gen_rtx_MEM (BLKmode, op0);
35414
35415 op1 = force_reg (DImode, op1);
35416
35417 if (TARGET_64BIT)
35418 {
35419 op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
35420 NULL, 1, OPTAB_DIRECT);
35421 switch (fcode)
35422 {
35423 case IX86_BUILTIN_XSAVE:
35424 icode = CODE_FOR_xsave_rex64;
35425 break;
35426 case IX86_BUILTIN_XRSTOR:
35427 icode = CODE_FOR_xrstor_rex64;
35428 break;
35429 case IX86_BUILTIN_XSAVE64:
35430 icode = CODE_FOR_xsave64;
35431 break;
35432 case IX86_BUILTIN_XRSTOR64:
35433 icode = CODE_FOR_xrstor64;
35434 break;
35435 case IX86_BUILTIN_XSAVEOPT:
35436 icode = CODE_FOR_xsaveopt_rex64;
35437 break;
35438 case IX86_BUILTIN_XSAVEOPT64:
35439 icode = CODE_FOR_xsaveopt64;
35440 break;
35441 case IX86_BUILTIN_XSAVES:
35442 icode = CODE_FOR_xsaves_rex64;
35443 break;
35444 case IX86_BUILTIN_XRSTORS:
35445 icode = CODE_FOR_xrstors_rex64;
35446 break;
35447 case IX86_BUILTIN_XSAVES64:
35448 icode = CODE_FOR_xsaves64;
35449 break;
35450 case IX86_BUILTIN_XRSTORS64:
35451 icode = CODE_FOR_xrstors64;
35452 break;
35453 case IX86_BUILTIN_XSAVEC:
35454 icode = CODE_FOR_xsavec_rex64;
35455 break;
35456 case IX86_BUILTIN_XSAVEC64:
35457 icode = CODE_FOR_xsavec64;
35458 break;
35459 default:
35460 gcc_unreachable ();
35461 }
35462
35463 op2 = gen_lowpart (SImode, op2);
35464 op1 = gen_lowpart (SImode, op1);
35465 pat = GEN_FCN (icode) (op0, op1, op2);
35466 }
35467 else
35468 {
35469 switch (fcode)
35470 {
35471 case IX86_BUILTIN_XSAVE:
35472 icode = CODE_FOR_xsave;
35473 break;
35474 case IX86_BUILTIN_XRSTOR:
35475 icode = CODE_FOR_xrstor;
35476 break;
35477 case IX86_BUILTIN_XSAVEOPT:
35478 icode = CODE_FOR_xsaveopt;
35479 break;
35480 case IX86_BUILTIN_XSAVES:
35481 icode = CODE_FOR_xsaves;
35482 break;
35483 case IX86_BUILTIN_XRSTORS:
35484 icode = CODE_FOR_xrstors;
35485 break;
35486 case IX86_BUILTIN_XSAVEC:
35487 icode = CODE_FOR_xsavec;
35488 break;
35489 default:
35490 gcc_unreachable ();
35491 }
35492 pat = GEN_FCN (icode) (op0, op1);
35493 }
35494
35495 if (pat)
35496 emit_insn (pat);
35497 return 0;
35498
35499 case IX86_BUILTIN_LLWPCB:
35500 arg0 = CALL_EXPR_ARG (exp, 0);
35501 op0 = expand_normal (arg0);
35502 icode = CODE_FOR_lwp_llwpcb;
35503 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35504 op0 = ix86_zero_extend_to_Pmode (op0);
35505 emit_insn (gen_lwp_llwpcb (op0));
35506 return 0;
35507
35508 case IX86_BUILTIN_SLWPCB:
35509 icode = CODE_FOR_lwp_slwpcb;
35510 if (!target
35511 || !insn_data[icode].operand[0].predicate (target, Pmode))
35512 target = gen_reg_rtx (Pmode);
35513 emit_insn (gen_lwp_slwpcb (target));
35514 return target;
35515
35516 case IX86_BUILTIN_BEXTRI32:
35517 case IX86_BUILTIN_BEXTRI64:
35518 arg0 = CALL_EXPR_ARG (exp, 0);
35519 arg1 = CALL_EXPR_ARG (exp, 1);
35520 op0 = expand_normal (arg0);
35521 op1 = expand_normal (arg1);
35522 icode = (fcode == IX86_BUILTIN_BEXTRI32
35523 ? CODE_FOR_tbm_bextri_si
35524 : CODE_FOR_tbm_bextri_di);
35525 if (!CONST_INT_P (op1))
35526 {
35527 error ("last argument must be an immediate");
35528 return const0_rtx;
35529 }
35530 else
35531 {
35532 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
35533 unsigned char lsb_index = INTVAL (op1) & 0xFF;
35534 op1 = GEN_INT (length);
35535 op2 = GEN_INT (lsb_index);
35536 pat = GEN_FCN (icode) (target, op0, op1, op2);
35537 if (pat)
35538 emit_insn (pat);
35539 return target;
35540 }
35541
35542 case IX86_BUILTIN_RDRAND16_STEP:
35543 icode = CODE_FOR_rdrandhi_1;
35544 mode0 = HImode;
35545 goto rdrand_step;
35546
35547 case IX86_BUILTIN_RDRAND32_STEP:
35548 icode = CODE_FOR_rdrandsi_1;
35549 mode0 = SImode;
35550 goto rdrand_step;
35551
35552 case IX86_BUILTIN_RDRAND64_STEP:
35553 icode = CODE_FOR_rdranddi_1;
35554 mode0 = DImode;
35555
35556 rdrand_step:
35557 op0 = gen_reg_rtx (mode0);
35558 emit_insn (GEN_FCN (icode) (op0));
35559
35560 arg0 = CALL_EXPR_ARG (exp, 0);
35561 op1 = expand_normal (arg0);
35562 if (!address_operand (op1, VOIDmode))
35563 {
35564 op1 = convert_memory_address (Pmode, op1);
35565 op1 = copy_addr_to_reg (op1);
35566 }
35567 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
35568
35569 op1 = gen_reg_rtx (SImode);
35570 emit_move_insn (op1, CONST1_RTX (SImode));
35571
35572 /* Emit SImode conditional move. */
35573 if (mode0 == HImode)
35574 {
35575 op2 = gen_reg_rtx (SImode);
35576 emit_insn (gen_zero_extendhisi2 (op2, op0));
35577 }
35578 else if (mode0 == SImode)
35579 op2 = op0;
35580 else
35581 op2 = gen_rtx_SUBREG (SImode, op0, 0);
35582
35583 if (target == 0
35584 || !register_operand (target, SImode))
35585 target = gen_reg_rtx (SImode);
35586
35587 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
35588 const0_rtx);
35589 emit_insn (gen_rtx_SET (VOIDmode, target,
35590 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
35591 return target;
35592
35593 case IX86_BUILTIN_RDSEED16_STEP:
35594 icode = CODE_FOR_rdseedhi_1;
35595 mode0 = HImode;
35596 goto rdseed_step;
35597
35598 case IX86_BUILTIN_RDSEED32_STEP:
35599 icode = CODE_FOR_rdseedsi_1;
35600 mode0 = SImode;
35601 goto rdseed_step;
35602
35603 case IX86_BUILTIN_RDSEED64_STEP:
35604 icode = CODE_FOR_rdseeddi_1;
35605 mode0 = DImode;
35606
35607 rdseed_step:
35608 op0 = gen_reg_rtx (mode0);
35609 emit_insn (GEN_FCN (icode) (op0));
35610
35611 arg0 = CALL_EXPR_ARG (exp, 0);
35612 op1 = expand_normal (arg0);
35613 if (!address_operand (op1, VOIDmode))
35614 {
35615 op1 = convert_memory_address (Pmode, op1);
35616 op1 = copy_addr_to_reg (op1);
35617 }
35618 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
35619
35620 op2 = gen_reg_rtx (QImode);
35621
35622 pat = gen_rtx_LTU (QImode, gen_rtx_REG (CCCmode, FLAGS_REG),
35623 const0_rtx);
35624 emit_insn (gen_rtx_SET (VOIDmode, op2, pat));
35625
35626 if (target == 0
35627 || !register_operand (target, SImode))
35628 target = gen_reg_rtx (SImode);
35629
35630 emit_insn (gen_zero_extendqisi2 (target, op2));
35631 return target;
35632
35633 case IX86_BUILTIN_SBB32:
35634 icode = CODE_FOR_subsi3_carry;
35635 mode0 = SImode;
35636 goto addcarryx;
35637
35638 case IX86_BUILTIN_SBB64:
35639 icode = CODE_FOR_subdi3_carry;
35640 mode0 = DImode;
35641 goto addcarryx;
35642
35643 case IX86_BUILTIN_ADDCARRYX32:
35644 icode = TARGET_ADX ? CODE_FOR_adcxsi3 : CODE_FOR_addsi3_carry;
35645 mode0 = SImode;
35646 goto addcarryx;
35647
35648 case IX86_BUILTIN_ADDCARRYX64:
35649 icode = TARGET_ADX ? CODE_FOR_adcxdi3 : CODE_FOR_adddi3_carry;
35650 mode0 = DImode;
35651
35652 addcarryx:
35653 arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in. */
35654 arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1. */
35655 arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2. */
35656 arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out. */
35657
35658 op0 = gen_reg_rtx (QImode);
35659
35660 /* Generate CF from input operand. */
35661 op1 = expand_normal (arg0);
35662 op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1));
35663 emit_insn (gen_addqi3_cc (op0, op1, constm1_rtx));
35664
35665 /* Gen ADCX instruction to compute X+Y+CF. */
35666 op2 = expand_normal (arg1);
35667 op3 = expand_normal (arg2);
35668
35669 if (!REG_P (op2))
35670 op2 = copy_to_mode_reg (mode0, op2);
35671 if (!REG_P (op3))
35672 op3 = copy_to_mode_reg (mode0, op3);
35673
35674 op0 = gen_reg_rtx (mode0);
35675
35676 op4 = gen_rtx_REG (CCCmode, FLAGS_REG);
35677 pat = gen_rtx_LTU (VOIDmode, op4, const0_rtx);
35678 emit_insn (GEN_FCN (icode) (op0, op2, op3, op4, pat));
35679
35680 /* Store the result. */
35681 op4 = expand_normal (arg3);
35682 if (!address_operand (op4, VOIDmode))
35683 {
35684 op4 = convert_memory_address (Pmode, op4);
35685 op4 = copy_addr_to_reg (op4);
35686 }
35687 emit_move_insn (gen_rtx_MEM (mode0, op4), op0);
35688
35689 /* Return current CF value. */
35690 if (target == 0)
35691 target = gen_reg_rtx (QImode);
35692
35693 PUT_MODE (pat, QImode);
35694 emit_insn (gen_rtx_SET (VOIDmode, target, pat));
35695 return target;
35696
35697 case IX86_BUILTIN_READ_FLAGS:
35698 emit_insn (gen_push (gen_rtx_REG (word_mode, FLAGS_REG)));
35699
35700 if (optimize
35701 || target == NULL_RTX
35702 || !nonimmediate_operand (target, word_mode)
35703 || GET_MODE (target) != word_mode)
35704 target = gen_reg_rtx (word_mode);
35705
35706 emit_insn (gen_pop (target));
35707 return target;
35708
35709 case IX86_BUILTIN_WRITE_FLAGS:
35710
35711 arg0 = CALL_EXPR_ARG (exp, 0);
35712 op0 = expand_normal (arg0);
35713 if (!general_no_elim_operand (op0, word_mode))
35714 op0 = copy_to_mode_reg (word_mode, op0);
35715
35716 emit_insn (gen_push (op0));
35717 emit_insn (gen_pop (gen_rtx_REG (word_mode, FLAGS_REG)));
35718 return 0;
35719
35720 case IX86_BUILTIN_KORTESTC16:
35721 icode = CODE_FOR_kortestchi;
35722 mode0 = HImode;
35723 mode1 = CCCmode;
35724 goto kortest;
35725
35726 case IX86_BUILTIN_KORTESTZ16:
35727 icode = CODE_FOR_kortestzhi;
35728 mode0 = HImode;
35729 mode1 = CCZmode;
35730
35731 kortest:
35732 arg0 = CALL_EXPR_ARG (exp, 0); /* Mask reg src1. */
35733 arg1 = CALL_EXPR_ARG (exp, 1); /* Mask reg src2. */
35734 op0 = expand_normal (arg0);
35735 op1 = expand_normal (arg1);
35736
35737 op0 = copy_to_reg (op0);
35738 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
35739 op1 = copy_to_reg (op1);
35740 op1 = simplify_gen_subreg (mode0, op1, GET_MODE (op1), 0);
35741
35742 target = gen_reg_rtx (QImode);
35743 emit_insn (gen_rtx_SET (mode0, target, const0_rtx));
35744
35745 /* Emit kortest. */
35746 emit_insn (GEN_FCN (icode) (op0, op1));
35747 /* And use setcc to return result from flags. */
35748 ix86_expand_setcc (target, EQ,
35749 gen_rtx_REG (mode1, FLAGS_REG), const0_rtx);
35750 return target;
35751
35752 case IX86_BUILTIN_GATHERSIV2DF:
35753 icode = CODE_FOR_avx2_gathersiv2df;
35754 goto gather_gen;
35755 case IX86_BUILTIN_GATHERSIV4DF:
35756 icode = CODE_FOR_avx2_gathersiv4df;
35757 goto gather_gen;
35758 case IX86_BUILTIN_GATHERDIV2DF:
35759 icode = CODE_FOR_avx2_gatherdiv2df;
35760 goto gather_gen;
35761 case IX86_BUILTIN_GATHERDIV4DF:
35762 icode = CODE_FOR_avx2_gatherdiv4df;
35763 goto gather_gen;
35764 case IX86_BUILTIN_GATHERSIV4SF:
35765 icode = CODE_FOR_avx2_gathersiv4sf;
35766 goto gather_gen;
35767 case IX86_BUILTIN_GATHERSIV8SF:
35768 icode = CODE_FOR_avx2_gathersiv8sf;
35769 goto gather_gen;
35770 case IX86_BUILTIN_GATHERDIV4SF:
35771 icode = CODE_FOR_avx2_gatherdiv4sf;
35772 goto gather_gen;
35773 case IX86_BUILTIN_GATHERDIV8SF:
35774 icode = CODE_FOR_avx2_gatherdiv8sf;
35775 goto gather_gen;
35776 case IX86_BUILTIN_GATHERSIV2DI:
35777 icode = CODE_FOR_avx2_gathersiv2di;
35778 goto gather_gen;
35779 case IX86_BUILTIN_GATHERSIV4DI:
35780 icode = CODE_FOR_avx2_gathersiv4di;
35781 goto gather_gen;
35782 case IX86_BUILTIN_GATHERDIV2DI:
35783 icode = CODE_FOR_avx2_gatherdiv2di;
35784 goto gather_gen;
35785 case IX86_BUILTIN_GATHERDIV4DI:
35786 icode = CODE_FOR_avx2_gatherdiv4di;
35787 goto gather_gen;
35788 case IX86_BUILTIN_GATHERSIV4SI:
35789 icode = CODE_FOR_avx2_gathersiv4si;
35790 goto gather_gen;
35791 case IX86_BUILTIN_GATHERSIV8SI:
35792 icode = CODE_FOR_avx2_gathersiv8si;
35793 goto gather_gen;
35794 case IX86_BUILTIN_GATHERDIV4SI:
35795 icode = CODE_FOR_avx2_gatherdiv4si;
35796 goto gather_gen;
35797 case IX86_BUILTIN_GATHERDIV8SI:
35798 icode = CODE_FOR_avx2_gatherdiv8si;
35799 goto gather_gen;
35800 case IX86_BUILTIN_GATHERALTSIV4DF:
35801 icode = CODE_FOR_avx2_gathersiv4df;
35802 goto gather_gen;
35803 case IX86_BUILTIN_GATHERALTDIV8SF:
35804 icode = CODE_FOR_avx2_gatherdiv8sf;
35805 goto gather_gen;
35806 case IX86_BUILTIN_GATHERALTSIV4DI:
35807 icode = CODE_FOR_avx2_gathersiv4di;
35808 goto gather_gen;
35809 case IX86_BUILTIN_GATHERALTDIV8SI:
35810 icode = CODE_FOR_avx2_gatherdiv8si;
35811 goto gather_gen;
35812 case IX86_BUILTIN_GATHER3SIV16SF:
35813 icode = CODE_FOR_avx512f_gathersiv16sf;
35814 goto gather_gen;
35815 case IX86_BUILTIN_GATHER3SIV8DF:
35816 icode = CODE_FOR_avx512f_gathersiv8df;
35817 goto gather_gen;
35818 case IX86_BUILTIN_GATHER3DIV16SF:
35819 icode = CODE_FOR_avx512f_gatherdiv16sf;
35820 goto gather_gen;
35821 case IX86_BUILTIN_GATHER3DIV8DF:
35822 icode = CODE_FOR_avx512f_gatherdiv8df;
35823 goto gather_gen;
35824 case IX86_BUILTIN_GATHER3SIV16SI:
35825 icode = CODE_FOR_avx512f_gathersiv16si;
35826 goto gather_gen;
35827 case IX86_BUILTIN_GATHER3SIV8DI:
35828 icode = CODE_FOR_avx512f_gathersiv8di;
35829 goto gather_gen;
35830 case IX86_BUILTIN_GATHER3DIV16SI:
35831 icode = CODE_FOR_avx512f_gatherdiv16si;
35832 goto gather_gen;
35833 case IX86_BUILTIN_GATHER3DIV8DI:
35834 icode = CODE_FOR_avx512f_gatherdiv8di;
35835 goto gather_gen;
35836 case IX86_BUILTIN_GATHER3ALTSIV8DF:
35837 icode = CODE_FOR_avx512f_gathersiv8df;
35838 goto gather_gen;
35839 case IX86_BUILTIN_GATHER3ALTDIV16SF:
35840 icode = CODE_FOR_avx512f_gatherdiv16sf;
35841 goto gather_gen;
35842 case IX86_BUILTIN_GATHER3ALTSIV8DI:
35843 icode = CODE_FOR_avx512f_gathersiv8di;
35844 goto gather_gen;
35845 case IX86_BUILTIN_GATHER3ALTDIV16SI:
35846 icode = CODE_FOR_avx512f_gatherdiv16si;
35847 goto gather_gen;
35848 case IX86_BUILTIN_SCATTERSIV16SF:
35849 icode = CODE_FOR_avx512f_scattersiv16sf;
35850 goto scatter_gen;
35851 case IX86_BUILTIN_SCATTERSIV8DF:
35852 icode = CODE_FOR_avx512f_scattersiv8df;
35853 goto scatter_gen;
35854 case IX86_BUILTIN_SCATTERDIV16SF:
35855 icode = CODE_FOR_avx512f_scatterdiv16sf;
35856 goto scatter_gen;
35857 case IX86_BUILTIN_SCATTERDIV8DF:
35858 icode = CODE_FOR_avx512f_scatterdiv8df;
35859 goto scatter_gen;
35860 case IX86_BUILTIN_SCATTERSIV16SI:
35861 icode = CODE_FOR_avx512f_scattersiv16si;
35862 goto scatter_gen;
35863 case IX86_BUILTIN_SCATTERSIV8DI:
35864 icode = CODE_FOR_avx512f_scattersiv8di;
35865 goto scatter_gen;
35866 case IX86_BUILTIN_SCATTERDIV16SI:
35867 icode = CODE_FOR_avx512f_scatterdiv16si;
35868 goto scatter_gen;
35869 case IX86_BUILTIN_SCATTERDIV8DI:
35870 icode = CODE_FOR_avx512f_scatterdiv8di;
35871 goto scatter_gen;
35872
35873 case IX86_BUILTIN_GATHERPFDPD:
35874 icode = CODE_FOR_avx512pf_gatherpfv8sidf;
35875 goto vec_prefetch_gen;
35876 case IX86_BUILTIN_GATHERPFDPS:
35877 icode = CODE_FOR_avx512pf_gatherpfv16sisf;
35878 goto vec_prefetch_gen;
35879 case IX86_BUILTIN_GATHERPFQPD:
35880 icode = CODE_FOR_avx512pf_gatherpfv8didf;
35881 goto vec_prefetch_gen;
35882 case IX86_BUILTIN_GATHERPFQPS:
35883 icode = CODE_FOR_avx512pf_gatherpfv8disf;
35884 goto vec_prefetch_gen;
35885 case IX86_BUILTIN_SCATTERPFDPD:
35886 icode = CODE_FOR_avx512pf_scatterpfv8sidf;
35887 goto vec_prefetch_gen;
35888 case IX86_BUILTIN_SCATTERPFDPS:
35889 icode = CODE_FOR_avx512pf_scatterpfv16sisf;
35890 goto vec_prefetch_gen;
35891 case IX86_BUILTIN_SCATTERPFQPD:
35892 icode = CODE_FOR_avx512pf_scatterpfv8didf;
35893 goto vec_prefetch_gen;
35894 case IX86_BUILTIN_SCATTERPFQPS:
35895 icode = CODE_FOR_avx512pf_scatterpfv8disf;
35896 goto vec_prefetch_gen;
35897
35898 gather_gen:
35899 rtx half;
35900 rtx (*gen) (rtx, rtx);
35901
35902 arg0 = CALL_EXPR_ARG (exp, 0);
35903 arg1 = CALL_EXPR_ARG (exp, 1);
35904 arg2 = CALL_EXPR_ARG (exp, 2);
35905 arg3 = CALL_EXPR_ARG (exp, 3);
35906 arg4 = CALL_EXPR_ARG (exp, 4);
35907 op0 = expand_normal (arg0);
35908 op1 = expand_normal (arg1);
35909 op2 = expand_normal (arg2);
35910 op3 = expand_normal (arg3);
35911 op4 = expand_normal (arg4);
35912 /* Note the arg order is different from the operand order. */
35913 mode0 = insn_data[icode].operand[1].mode;
35914 mode2 = insn_data[icode].operand[3].mode;
35915 mode3 = insn_data[icode].operand[4].mode;
35916 mode4 = insn_data[icode].operand[5].mode;
35917
35918 if (target == NULL_RTX
35919 || GET_MODE (target) != insn_data[icode].operand[0].mode
35920 || !insn_data[icode].operand[0].predicate (target,
35921 GET_MODE (target)))
35922 subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
35923 else
35924 subtarget = target;
35925
35926 switch (fcode)
35927 {
35928 case IX86_BUILTIN_GATHER3ALTSIV8DF:
35929 case IX86_BUILTIN_GATHER3ALTSIV8DI:
35930 half = gen_reg_rtx (V8SImode);
35931 if (!nonimmediate_operand (op2, V16SImode))
35932 op2 = copy_to_mode_reg (V16SImode, op2);
35933 emit_insn (gen_vec_extract_lo_v16si (half, op2));
35934 op2 = half;
35935 break;
35936 case IX86_BUILTIN_GATHERALTSIV4DF:
35937 case IX86_BUILTIN_GATHERALTSIV4DI:
35938 half = gen_reg_rtx (V4SImode);
35939 if (!nonimmediate_operand (op2, V8SImode))
35940 op2 = copy_to_mode_reg (V8SImode, op2);
35941 emit_insn (gen_vec_extract_lo_v8si (half, op2));
35942 op2 = half;
35943 break;
35944 case IX86_BUILTIN_GATHER3ALTDIV16SF:
35945 case IX86_BUILTIN_GATHER3ALTDIV16SI:
35946 half = gen_reg_rtx (mode0);
35947 if (mode0 == V8SFmode)
35948 gen = gen_vec_extract_lo_v16sf;
35949 else
35950 gen = gen_vec_extract_lo_v16si;
35951 if (!nonimmediate_operand (op0, GET_MODE (op0)))
35952 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
35953 emit_insn (gen (half, op0));
35954 op0 = half;
35955 if (GET_MODE (op3) != VOIDmode)
35956 {
35957 if (!nonimmediate_operand (op3, GET_MODE (op3)))
35958 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
35959 emit_insn (gen (half, op3));
35960 op3 = half;
35961 }
35962 break;
35963 case IX86_BUILTIN_GATHERALTDIV8SF:
35964 case IX86_BUILTIN_GATHERALTDIV8SI:
35965 half = gen_reg_rtx (mode0);
35966 if (mode0 == V4SFmode)
35967 gen = gen_vec_extract_lo_v8sf;
35968 else
35969 gen = gen_vec_extract_lo_v8si;
35970 if (!nonimmediate_operand (op0, GET_MODE (op0)))
35971 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
35972 emit_insn (gen (half, op0));
35973 op0 = half;
35974 if (GET_MODE (op3) != VOIDmode)
35975 {
35976 if (!nonimmediate_operand (op3, GET_MODE (op3)))
35977 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
35978 emit_insn (gen (half, op3));
35979 op3 = half;
35980 }
35981 break;
35982 default:
35983 break;
35984 }
35985
35986 /* Force memory operand only with base register here. But we
35987 don't want to do it on memory operand for other builtin
35988 functions. */
35989 op1 = ix86_zero_extend_to_Pmode (op1);
35990
35991 if (!insn_data[icode].operand[1].predicate (op0, mode0))
35992 op0 = copy_to_mode_reg (mode0, op0);
35993 if (!insn_data[icode].operand[2].predicate (op1, Pmode))
35994 op1 = copy_to_mode_reg (Pmode, op1);
35995 if (!insn_data[icode].operand[3].predicate (op2, mode2))
35996 op2 = copy_to_mode_reg (mode2, op2);
35997 if (GET_MODE (op3) == mode3 || GET_MODE (op3) == VOIDmode)
35998 {
35999 if (!insn_data[icode].operand[4].predicate (op3, mode3))
36000 op3 = copy_to_mode_reg (mode3, op3);
36001 }
36002 else
36003 {
36004 op3 = copy_to_reg (op3);
36005 op3 = simplify_gen_subreg (mode3, op3, GET_MODE (op3), 0);
36006 }
36007 if (!insn_data[icode].operand[5].predicate (op4, mode4))
36008 {
36009 error ("the last argument must be scale 1, 2, 4, 8");
36010 return const0_rtx;
36011 }
36012
36013 /* Optimize. If mask is known to have all high bits set,
36014 replace op0 with pc_rtx to signal that the instruction
36015 overwrites the whole destination and doesn't use its
36016 previous contents. */
36017 if (optimize)
36018 {
36019 if (TREE_CODE (arg3) == INTEGER_CST)
36020 {
36021 if (integer_all_onesp (arg3))
36022 op0 = pc_rtx;
36023 }
36024 else if (TREE_CODE (arg3) == VECTOR_CST)
36025 {
36026 unsigned int negative = 0;
36027 for (i = 0; i < VECTOR_CST_NELTS (arg3); ++i)
36028 {
36029 tree cst = VECTOR_CST_ELT (arg3, i);
36030 if (TREE_CODE (cst) == INTEGER_CST
36031 && tree_int_cst_sign_bit (cst))
36032 negative++;
36033 else if (TREE_CODE (cst) == REAL_CST
36034 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
36035 negative++;
36036 }
36037 if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
36038 op0 = pc_rtx;
36039 }
36040 else if (TREE_CODE (arg3) == SSA_NAME
36041 && TREE_CODE (TREE_TYPE (arg3)) == VECTOR_TYPE)
36042 {
36043 /* Recognize also when mask is like:
36044 __v2df src = _mm_setzero_pd ();
36045 __v2df mask = _mm_cmpeq_pd (src, src);
36046 or
36047 __v8sf src = _mm256_setzero_ps ();
36048 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
36049 as that is a cheaper way to load all ones into
36050 a register than having to load a constant from
36051 memory. */
36052 gimple def_stmt = SSA_NAME_DEF_STMT (arg3);
36053 if (is_gimple_call (def_stmt))
36054 {
36055 tree fndecl = gimple_call_fndecl (def_stmt);
36056 if (fndecl
36057 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
36058 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
36059 {
36060 case IX86_BUILTIN_CMPPD:
36061 case IX86_BUILTIN_CMPPS:
36062 case IX86_BUILTIN_CMPPD256:
36063 case IX86_BUILTIN_CMPPS256:
36064 if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
36065 break;
36066 /* FALLTHRU */
36067 case IX86_BUILTIN_CMPEQPD:
36068 case IX86_BUILTIN_CMPEQPS:
36069 if (initializer_zerop (gimple_call_arg (def_stmt, 0))
36070 && initializer_zerop (gimple_call_arg (def_stmt,
36071 1)))
36072 op0 = pc_rtx;
36073 break;
36074 default:
36075 break;
36076 }
36077 }
36078 }
36079 }
36080
36081 pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
36082 if (! pat)
36083 return const0_rtx;
36084 emit_insn (pat);
36085
36086 switch (fcode)
36087 {
36088 case IX86_BUILTIN_GATHER3DIV16SF:
36089 if (target == NULL_RTX)
36090 target = gen_reg_rtx (V8SFmode);
36091 emit_insn (gen_vec_extract_lo_v16sf (target, subtarget));
36092 break;
36093 case IX86_BUILTIN_GATHER3DIV16SI:
36094 if (target == NULL_RTX)
36095 target = gen_reg_rtx (V8SImode);
36096 emit_insn (gen_vec_extract_lo_v16si (target, subtarget));
36097 break;
36098 case IX86_BUILTIN_GATHERDIV8SF:
36099 if (target == NULL_RTX)
36100 target = gen_reg_rtx (V4SFmode);
36101 emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
36102 break;
36103 case IX86_BUILTIN_GATHERDIV8SI:
36104 if (target == NULL_RTX)
36105 target = gen_reg_rtx (V4SImode);
36106 emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
36107 break;
36108 default:
36109 target = subtarget;
36110 break;
36111 }
36112 return target;
36113
36114 scatter_gen:
36115 arg0 = CALL_EXPR_ARG (exp, 0);
36116 arg1 = CALL_EXPR_ARG (exp, 1);
36117 arg2 = CALL_EXPR_ARG (exp, 2);
36118 arg3 = CALL_EXPR_ARG (exp, 3);
36119 arg4 = CALL_EXPR_ARG (exp, 4);
36120 op0 = expand_normal (arg0);
36121 op1 = expand_normal (arg1);
36122 op2 = expand_normal (arg2);
36123 op3 = expand_normal (arg3);
36124 op4 = expand_normal (arg4);
36125 mode1 = insn_data[icode].operand[1].mode;
36126 mode2 = insn_data[icode].operand[2].mode;
36127 mode3 = insn_data[icode].operand[3].mode;
36128 mode4 = insn_data[icode].operand[4].mode;
36129
36130 /* Force memory operand only with base register here. But we
36131 don't want to do it on memory operand for other builtin
36132 functions. */
36133 op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1));
36134
36135 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
36136 op0 = copy_to_mode_reg (Pmode, op0);
36137
36138 if (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
36139 {
36140 if (!insn_data[icode].operand[1].predicate (op1, mode1))
36141 op1 = copy_to_mode_reg (mode1, op1);
36142 }
36143 else
36144 {
36145 op1 = copy_to_reg (op1);
36146 op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
36147 }
36148
36149 if (!insn_data[icode].operand[2].predicate (op2, mode2))
36150 op2 = copy_to_mode_reg (mode2, op2);
36151
36152 if (!insn_data[icode].operand[3].predicate (op3, mode3))
36153 op3 = copy_to_mode_reg (mode3, op3);
36154
36155 if (!insn_data[icode].operand[4].predicate (op4, mode4))
36156 {
36157 error ("the last argument must be scale 1, 2, 4, 8");
36158 return const0_rtx;
36159 }
36160
36161 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
36162 if (! pat)
36163 return const0_rtx;
36164
36165 emit_insn (pat);
36166 return 0;
36167
36168 vec_prefetch_gen:
36169 arg0 = CALL_EXPR_ARG (exp, 0);
36170 arg1 = CALL_EXPR_ARG (exp, 1);
36171 arg2 = CALL_EXPR_ARG (exp, 2);
36172 arg3 = CALL_EXPR_ARG (exp, 3);
36173 arg4 = CALL_EXPR_ARG (exp, 4);
36174 op0 = expand_normal (arg0);
36175 op1 = expand_normal (arg1);
36176 op2 = expand_normal (arg2);
36177 op3 = expand_normal (arg3);
36178 op4 = expand_normal (arg4);
36179 mode0 = insn_data[icode].operand[0].mode;
36180 mode1 = insn_data[icode].operand[1].mode;
36181 mode3 = insn_data[icode].operand[3].mode;
36182 mode4 = insn_data[icode].operand[4].mode;
36183
36184 if (GET_MODE (op0) == mode0
36185 || (GET_MODE (op0) == VOIDmode && op0 != constm1_rtx))
36186 {
36187 if (!insn_data[icode].operand[0].predicate (op0, mode0))
36188 op0 = copy_to_mode_reg (mode0, op0);
36189 }
36190 else if (op0 != constm1_rtx)
36191 {
36192 op0 = copy_to_reg (op0);
36193 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
36194 }
36195
36196 if (!insn_data[icode].operand[1].predicate (op1, mode1))
36197 op1 = copy_to_mode_reg (mode1, op1);
36198
36199 /* Force memory operand only with base register here. But we
36200 don't want to do it on memory operand for other builtin
36201 functions. */
36202 op2 = force_reg (Pmode, convert_to_mode (Pmode, op2, 1));
36203
36204 if (!insn_data[icode].operand[2].predicate (op2, Pmode))
36205 op2 = copy_to_mode_reg (Pmode, op2);
36206
36207 if (!insn_data[icode].operand[3].predicate (op3, mode3))
36208 {
36209 error ("the forth argument must be scale 1, 2, 4, 8");
36210 return const0_rtx;
36211 }
36212
36213 if (!insn_data[icode].operand[4].predicate (op4, mode4))
36214 {
36215 error ("incorrect hint operand");
36216 return const0_rtx;
36217 }
36218
36219 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
36220 if (! pat)
36221 return const0_rtx;
36222
36223 emit_insn (pat);
36224
36225 return 0;
36226
36227 case IX86_BUILTIN_XABORT:
36228 icode = CODE_FOR_xabort;
36229 arg0 = CALL_EXPR_ARG (exp, 0);
36230 op0 = expand_normal (arg0);
36231 mode0 = insn_data[icode].operand[0].mode;
36232 if (!insn_data[icode].operand[0].predicate (op0, mode0))
36233 {
36234 error ("the xabort's argument must be an 8-bit immediate");
36235 return const0_rtx;
36236 }
36237 emit_insn (gen_xabort (op0));
36238 return 0;
36239
36240 default:
36241 break;
36242 }
36243
36244 for (i = 0, d = bdesc_special_args;
36245 i < ARRAY_SIZE (bdesc_special_args);
36246 i++, d++)
36247 if (d->code == fcode)
36248 return ix86_expand_special_args_builtin (d, exp, target);
36249
36250 for (i = 0, d = bdesc_args;
36251 i < ARRAY_SIZE (bdesc_args);
36252 i++, d++)
36253 if (d->code == fcode)
36254 switch (fcode)
36255 {
36256 case IX86_BUILTIN_FABSQ:
36257 case IX86_BUILTIN_COPYSIGNQ:
36258 if (!TARGET_SSE)
36259 /* Emit a normal call if SSE isn't available. */
36260 return expand_call (exp, target, ignore);
36261 default:
36262 return ix86_expand_args_builtin (d, exp, target);
36263 }
36264
36265 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
36266 if (d->code == fcode)
36267 return ix86_expand_sse_comi (d, exp, target);
36268
36269 for (i = 0, d = bdesc_round_args; i < ARRAY_SIZE (bdesc_round_args); i++, d++)
36270 if (d->code == fcode)
36271 return ix86_expand_round_builtin (d, exp, target);
36272
36273 for (i = 0, d = bdesc_pcmpestr;
36274 i < ARRAY_SIZE (bdesc_pcmpestr);
36275 i++, d++)
36276 if (d->code == fcode)
36277 return ix86_expand_sse_pcmpestr (d, exp, target);
36278
36279 for (i = 0, d = bdesc_pcmpistr;
36280 i < ARRAY_SIZE (bdesc_pcmpistr);
36281 i++, d++)
36282 if (d->code == fcode)
36283 return ix86_expand_sse_pcmpistr (d, exp, target);
36284
36285 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
36286 if (d->code == fcode)
36287 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
36288 (enum ix86_builtin_func_type)
36289 d->flag, d->comparison);
36290
36291 gcc_unreachable ();
36292 }
36293
36294 /* This returns the target-specific builtin with code CODE if
36295 current_function_decl has visibility on this builtin, which is checked
36296 using isa flags. Returns NULL_TREE otherwise. */
36297
36298 static tree ix86_get_builtin (enum ix86_builtins code)
36299 {
36300 struct cl_target_option *opts;
36301 tree target_tree = NULL_TREE;
36302
36303 /* Determine the isa flags of current_function_decl. */
36304
36305 if (current_function_decl)
36306 target_tree = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl);
36307
36308 if (target_tree == NULL)
36309 target_tree = target_option_default_node;
36310
36311 opts = TREE_TARGET_OPTION (target_tree);
36312
36313 if (ix86_builtins_isa[(int) code].isa & opts->x_ix86_isa_flags)
36314 return ix86_builtin_decl (code, true);
36315 else
36316 return NULL_TREE;
36317 }
36318
36319 /* Returns a function decl for a vectorized version of the builtin function
36320 with builtin function code FN and the result vector type TYPE, or NULL_TREE
36321 if it is not available. */
36322
36323 static tree
36324 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
36325 tree type_in)
36326 {
36327 enum machine_mode in_mode, out_mode;
36328 int in_n, out_n;
36329 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
36330
36331 if (TREE_CODE (type_out) != VECTOR_TYPE
36332 || TREE_CODE (type_in) != VECTOR_TYPE
36333 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
36334 return NULL_TREE;
36335
36336 out_mode = TYPE_MODE (TREE_TYPE (type_out));
36337 out_n = TYPE_VECTOR_SUBPARTS (type_out);
36338 in_mode = TYPE_MODE (TREE_TYPE (type_in));
36339 in_n = TYPE_VECTOR_SUBPARTS (type_in);
36340
36341 switch (fn)
36342 {
36343 case BUILT_IN_SQRT:
36344 if (out_mode == DFmode && in_mode == DFmode)
36345 {
36346 if (out_n == 2 && in_n == 2)
36347 return ix86_get_builtin (IX86_BUILTIN_SQRTPD);
36348 else if (out_n == 4 && in_n == 4)
36349 return ix86_get_builtin (IX86_BUILTIN_SQRTPD256);
36350 else if (out_n == 8 && in_n == 8)
36351 return ix86_get_builtin (IX86_BUILTIN_SQRTPD512);
36352 }
36353 break;
36354
36355 case BUILT_IN_EXP2F:
36356 if (out_mode == SFmode && in_mode == SFmode)
36357 {
36358 if (out_n == 16 && in_n == 16)
36359 return ix86_get_builtin (IX86_BUILTIN_EXP2PS);
36360 }
36361 break;
36362
36363 case BUILT_IN_SQRTF:
36364 if (out_mode == SFmode && in_mode == SFmode)
36365 {
36366 if (out_n == 4 && in_n == 4)
36367 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR);
36368 else if (out_n == 8 && in_n == 8)
36369 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR256);
36370 else if (out_n == 16 && in_n == 16)
36371 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR512);
36372 }
36373 break;
36374
36375 case BUILT_IN_IFLOOR:
36376 case BUILT_IN_LFLOOR:
36377 case BUILT_IN_LLFLOOR:
36378 /* The round insn does not trap on denormals. */
36379 if (flag_trapping_math || !TARGET_ROUND)
36380 break;
36381
36382 if (out_mode == SImode && in_mode == DFmode)
36383 {
36384 if (out_n == 4 && in_n == 2)
36385 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX);
36386 else if (out_n == 8 && in_n == 4)
36387 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256);
36388 else if (out_n == 16 && in_n == 8)
36389 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512);
36390 }
36391 break;
36392
36393 case BUILT_IN_IFLOORF:
36394 case BUILT_IN_LFLOORF:
36395 case BUILT_IN_LLFLOORF:
36396 /* The round insn does not trap on denormals. */
36397 if (flag_trapping_math || !TARGET_ROUND)
36398 break;
36399
36400 if (out_mode == SImode && in_mode == SFmode)
36401 {
36402 if (out_n == 4 && in_n == 4)
36403 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX);
36404 else if (out_n == 8 && in_n == 8)
36405 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256);
36406 }
36407 break;
36408
36409 case BUILT_IN_ICEIL:
36410 case BUILT_IN_LCEIL:
36411 case BUILT_IN_LLCEIL:
36412 /* The round insn does not trap on denormals. */
36413 if (flag_trapping_math || !TARGET_ROUND)
36414 break;
36415
36416 if (out_mode == SImode && in_mode == DFmode)
36417 {
36418 if (out_n == 4 && in_n == 2)
36419 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX);
36420 else if (out_n == 8 && in_n == 4)
36421 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256);
36422 else if (out_n == 16 && in_n == 8)
36423 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512);
36424 }
36425 break;
36426
36427 case BUILT_IN_ICEILF:
36428 case BUILT_IN_LCEILF:
36429 case BUILT_IN_LLCEILF:
36430 /* The round insn does not trap on denormals. */
36431 if (flag_trapping_math || !TARGET_ROUND)
36432 break;
36433
36434 if (out_mode == SImode && in_mode == SFmode)
36435 {
36436 if (out_n == 4 && in_n == 4)
36437 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX);
36438 else if (out_n == 8 && in_n == 8)
36439 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256);
36440 }
36441 break;
36442
36443 case BUILT_IN_IRINT:
36444 case BUILT_IN_LRINT:
36445 case BUILT_IN_LLRINT:
36446 if (out_mode == SImode && in_mode == DFmode)
36447 {
36448 if (out_n == 4 && in_n == 2)
36449 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX);
36450 else if (out_n == 8 && in_n == 4)
36451 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256);
36452 }
36453 break;
36454
36455 case BUILT_IN_IRINTF:
36456 case BUILT_IN_LRINTF:
36457 case BUILT_IN_LLRINTF:
36458 if (out_mode == SImode && in_mode == SFmode)
36459 {
36460 if (out_n == 4 && in_n == 4)
36461 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ);
36462 else if (out_n == 8 && in_n == 8)
36463 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256);
36464 }
36465 break;
36466
36467 case BUILT_IN_IROUND:
36468 case BUILT_IN_LROUND:
36469 case BUILT_IN_LLROUND:
36470 /* The round insn does not trap on denormals. */
36471 if (flag_trapping_math || !TARGET_ROUND)
36472 break;
36473
36474 if (out_mode == SImode && in_mode == DFmode)
36475 {
36476 if (out_n == 4 && in_n == 2)
36477 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX);
36478 else if (out_n == 8 && in_n == 4)
36479 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256);
36480 else if (out_n == 16 && in_n == 8)
36481 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512);
36482 }
36483 break;
36484
36485 case BUILT_IN_IROUNDF:
36486 case BUILT_IN_LROUNDF:
36487 case BUILT_IN_LLROUNDF:
36488 /* The round insn does not trap on denormals. */
36489 if (flag_trapping_math || !TARGET_ROUND)
36490 break;
36491
36492 if (out_mode == SImode && in_mode == SFmode)
36493 {
36494 if (out_n == 4 && in_n == 4)
36495 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX);
36496 else if (out_n == 8 && in_n == 8)
36497 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256);
36498 }
36499 break;
36500
36501 case BUILT_IN_COPYSIGN:
36502 if (out_mode == DFmode && in_mode == DFmode)
36503 {
36504 if (out_n == 2 && in_n == 2)
36505 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD);
36506 else if (out_n == 4 && in_n == 4)
36507 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD256);
36508 else if (out_n == 8 && in_n == 8)
36509 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD512);
36510 }
36511 break;
36512
36513 case BUILT_IN_COPYSIGNF:
36514 if (out_mode == SFmode && in_mode == SFmode)
36515 {
36516 if (out_n == 4 && in_n == 4)
36517 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS);
36518 else if (out_n == 8 && in_n == 8)
36519 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS256);
36520 else if (out_n == 16 && in_n == 16)
36521 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS512);
36522 }
36523 break;
36524
36525 case BUILT_IN_FLOOR:
36526 /* The round insn does not trap on denormals. */
36527 if (flag_trapping_math || !TARGET_ROUND)
36528 break;
36529
36530 if (out_mode == DFmode && in_mode == DFmode)
36531 {
36532 if (out_n == 2 && in_n == 2)
36533 return ix86_get_builtin (IX86_BUILTIN_FLOORPD);
36534 else if (out_n == 4 && in_n == 4)
36535 return ix86_get_builtin (IX86_BUILTIN_FLOORPD256);
36536 }
36537 break;
36538
36539 case BUILT_IN_FLOORF:
36540 /* The round insn does not trap on denormals. */
36541 if (flag_trapping_math || !TARGET_ROUND)
36542 break;
36543
36544 if (out_mode == SFmode && in_mode == SFmode)
36545 {
36546 if (out_n == 4 && in_n == 4)
36547 return ix86_get_builtin (IX86_BUILTIN_FLOORPS);
36548 else if (out_n == 8 && in_n == 8)
36549 return ix86_get_builtin (IX86_BUILTIN_FLOORPS256);
36550 }
36551 break;
36552
36553 case BUILT_IN_CEIL:
36554 /* The round insn does not trap on denormals. */
36555 if (flag_trapping_math || !TARGET_ROUND)
36556 break;
36557
36558 if (out_mode == DFmode && in_mode == DFmode)
36559 {
36560 if (out_n == 2 && in_n == 2)
36561 return ix86_get_builtin (IX86_BUILTIN_CEILPD);
36562 else if (out_n == 4 && in_n == 4)
36563 return ix86_get_builtin (IX86_BUILTIN_CEILPD256);
36564 }
36565 break;
36566
36567 case BUILT_IN_CEILF:
36568 /* The round insn does not trap on denormals. */
36569 if (flag_trapping_math || !TARGET_ROUND)
36570 break;
36571
36572 if (out_mode == SFmode && in_mode == SFmode)
36573 {
36574 if (out_n == 4 && in_n == 4)
36575 return ix86_get_builtin (IX86_BUILTIN_CEILPS);
36576 else if (out_n == 8 && in_n == 8)
36577 return ix86_get_builtin (IX86_BUILTIN_CEILPS256);
36578 }
36579 break;
36580
36581 case BUILT_IN_TRUNC:
36582 /* The round insn does not trap on denormals. */
36583 if (flag_trapping_math || !TARGET_ROUND)
36584 break;
36585
36586 if (out_mode == DFmode && in_mode == DFmode)
36587 {
36588 if (out_n == 2 && in_n == 2)
36589 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD);
36590 else if (out_n == 4 && in_n == 4)
36591 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256);
36592 }
36593 break;
36594
36595 case BUILT_IN_TRUNCF:
36596 /* The round insn does not trap on denormals. */
36597 if (flag_trapping_math || !TARGET_ROUND)
36598 break;
36599
36600 if (out_mode == SFmode && in_mode == SFmode)
36601 {
36602 if (out_n == 4 && in_n == 4)
36603 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS);
36604 else if (out_n == 8 && in_n == 8)
36605 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256);
36606 }
36607 break;
36608
36609 case BUILT_IN_RINT:
36610 /* The round insn does not trap on denormals. */
36611 if (flag_trapping_math || !TARGET_ROUND)
36612 break;
36613
36614 if (out_mode == DFmode && in_mode == DFmode)
36615 {
36616 if (out_n == 2 && in_n == 2)
36617 return ix86_get_builtin (IX86_BUILTIN_RINTPD);
36618 else if (out_n == 4 && in_n == 4)
36619 return ix86_get_builtin (IX86_BUILTIN_RINTPD256);
36620 }
36621 break;
36622
36623 case BUILT_IN_RINTF:
36624 /* The round insn does not trap on denormals. */
36625 if (flag_trapping_math || !TARGET_ROUND)
36626 break;
36627
36628 if (out_mode == SFmode && in_mode == SFmode)
36629 {
36630 if (out_n == 4 && in_n == 4)
36631 return ix86_get_builtin (IX86_BUILTIN_RINTPS);
36632 else if (out_n == 8 && in_n == 8)
36633 return ix86_get_builtin (IX86_BUILTIN_RINTPS256);
36634 }
36635 break;
36636
36637 case BUILT_IN_ROUND:
36638 /* The round insn does not trap on denormals. */
36639 if (flag_trapping_math || !TARGET_ROUND)
36640 break;
36641
36642 if (out_mode == DFmode && in_mode == DFmode)
36643 {
36644 if (out_n == 2 && in_n == 2)
36645 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ);
36646 else if (out_n == 4 && in_n == 4)
36647 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ256);
36648 }
36649 break;
36650
36651 case BUILT_IN_ROUNDF:
36652 /* The round insn does not trap on denormals. */
36653 if (flag_trapping_math || !TARGET_ROUND)
36654 break;
36655
36656 if (out_mode == SFmode && in_mode == SFmode)
36657 {
36658 if (out_n == 4 && in_n == 4)
36659 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ);
36660 else if (out_n == 8 && in_n == 8)
36661 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ256);
36662 }
36663 break;
36664
36665 case BUILT_IN_FMA:
36666 if (out_mode == DFmode && in_mode == DFmode)
36667 {
36668 if (out_n == 2 && in_n == 2)
36669 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD);
36670 if (out_n == 4 && in_n == 4)
36671 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD256);
36672 }
36673 break;
36674
36675 case BUILT_IN_FMAF:
36676 if (out_mode == SFmode && in_mode == SFmode)
36677 {
36678 if (out_n == 4 && in_n == 4)
36679 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS);
36680 if (out_n == 8 && in_n == 8)
36681 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS256);
36682 }
36683 break;
36684
36685 default:
36686 break;
36687 }
36688
36689 /* Dispatch to a handler for a vectorization library. */
36690 if (ix86_veclib_handler)
36691 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
36692 type_in);
36693
36694 return NULL_TREE;
36695 }
36696
36697 /* Handler for an SVML-style interface to
36698 a library with vectorized intrinsics. */
36699
36700 static tree
36701 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
36702 {
36703 char name[20];
36704 tree fntype, new_fndecl, args;
36705 unsigned arity;
36706 const char *bname;
36707 enum machine_mode el_mode, in_mode;
36708 int n, in_n;
36709
36710 /* The SVML is suitable for unsafe math only. */
36711 if (!flag_unsafe_math_optimizations)
36712 return NULL_TREE;
36713
36714 el_mode = TYPE_MODE (TREE_TYPE (type_out));
36715 n = TYPE_VECTOR_SUBPARTS (type_out);
36716 in_mode = TYPE_MODE (TREE_TYPE (type_in));
36717 in_n = TYPE_VECTOR_SUBPARTS (type_in);
36718 if (el_mode != in_mode
36719 || n != in_n)
36720 return NULL_TREE;
36721
36722 switch (fn)
36723 {
36724 case BUILT_IN_EXP:
36725 case BUILT_IN_LOG:
36726 case BUILT_IN_LOG10:
36727 case BUILT_IN_POW:
36728 case BUILT_IN_TANH:
36729 case BUILT_IN_TAN:
36730 case BUILT_IN_ATAN:
36731 case BUILT_IN_ATAN2:
36732 case BUILT_IN_ATANH:
36733 case BUILT_IN_CBRT:
36734 case BUILT_IN_SINH:
36735 case BUILT_IN_SIN:
36736 case BUILT_IN_ASINH:
36737 case BUILT_IN_ASIN:
36738 case BUILT_IN_COSH:
36739 case BUILT_IN_COS:
36740 case BUILT_IN_ACOSH:
36741 case BUILT_IN_ACOS:
36742 if (el_mode != DFmode || n != 2)
36743 return NULL_TREE;
36744 break;
36745
36746 case BUILT_IN_EXPF:
36747 case BUILT_IN_LOGF:
36748 case BUILT_IN_LOG10F:
36749 case BUILT_IN_POWF:
36750 case BUILT_IN_TANHF:
36751 case BUILT_IN_TANF:
36752 case BUILT_IN_ATANF:
36753 case BUILT_IN_ATAN2F:
36754 case BUILT_IN_ATANHF:
36755 case BUILT_IN_CBRTF:
36756 case BUILT_IN_SINHF:
36757 case BUILT_IN_SINF:
36758 case BUILT_IN_ASINHF:
36759 case BUILT_IN_ASINF:
36760 case BUILT_IN_COSHF:
36761 case BUILT_IN_COSF:
36762 case BUILT_IN_ACOSHF:
36763 case BUILT_IN_ACOSF:
36764 if (el_mode != SFmode || n != 4)
36765 return NULL_TREE;
36766 break;
36767
36768 default:
36769 return NULL_TREE;
36770 }
36771
36772 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
36773
36774 if (fn == BUILT_IN_LOGF)
36775 strcpy (name, "vmlsLn4");
36776 else if (fn == BUILT_IN_LOG)
36777 strcpy (name, "vmldLn2");
36778 else if (n == 4)
36779 {
36780 sprintf (name, "vmls%s", bname+10);
36781 name[strlen (name)-1] = '4';
36782 }
36783 else
36784 sprintf (name, "vmld%s2", bname+10);
36785
36786 /* Convert to uppercase. */
36787 name[4] &= ~0x20;
36788
36789 arity = 0;
36790 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
36791 args;
36792 args = TREE_CHAIN (args))
36793 arity++;
36794
36795 if (arity == 1)
36796 fntype = build_function_type_list (type_out, type_in, NULL);
36797 else
36798 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
36799
36800 /* Build a function declaration for the vectorized function. */
36801 new_fndecl = build_decl (BUILTINS_LOCATION,
36802 FUNCTION_DECL, get_identifier (name), fntype);
36803 TREE_PUBLIC (new_fndecl) = 1;
36804 DECL_EXTERNAL (new_fndecl) = 1;
36805 DECL_IS_NOVOPS (new_fndecl) = 1;
36806 TREE_READONLY (new_fndecl) = 1;
36807
36808 return new_fndecl;
36809 }
36810
36811 /* Handler for an ACML-style interface to
36812 a library with vectorized intrinsics. */
36813
36814 static tree
36815 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
36816 {
36817 char name[20] = "__vr.._";
36818 tree fntype, new_fndecl, args;
36819 unsigned arity;
36820 const char *bname;
36821 enum machine_mode el_mode, in_mode;
36822 int n, in_n;
36823
36824 /* The ACML is 64bits only and suitable for unsafe math only as
36825 it does not correctly support parts of IEEE with the required
36826 precision such as denormals. */
36827 if (!TARGET_64BIT
36828 || !flag_unsafe_math_optimizations)
36829 return NULL_TREE;
36830
36831 el_mode = TYPE_MODE (TREE_TYPE (type_out));
36832 n = TYPE_VECTOR_SUBPARTS (type_out);
36833 in_mode = TYPE_MODE (TREE_TYPE (type_in));
36834 in_n = TYPE_VECTOR_SUBPARTS (type_in);
36835 if (el_mode != in_mode
36836 || n != in_n)
36837 return NULL_TREE;
36838
36839 switch (fn)
36840 {
36841 case BUILT_IN_SIN:
36842 case BUILT_IN_COS:
36843 case BUILT_IN_EXP:
36844 case BUILT_IN_LOG:
36845 case BUILT_IN_LOG2:
36846 case BUILT_IN_LOG10:
36847 name[4] = 'd';
36848 name[5] = '2';
36849 if (el_mode != DFmode
36850 || n != 2)
36851 return NULL_TREE;
36852 break;
36853
36854 case BUILT_IN_SINF:
36855 case BUILT_IN_COSF:
36856 case BUILT_IN_EXPF:
36857 case BUILT_IN_POWF:
36858 case BUILT_IN_LOGF:
36859 case BUILT_IN_LOG2F:
36860 case BUILT_IN_LOG10F:
36861 name[4] = 's';
36862 name[5] = '4';
36863 if (el_mode != SFmode
36864 || n != 4)
36865 return NULL_TREE;
36866 break;
36867
36868 default:
36869 return NULL_TREE;
36870 }
36871
36872 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
36873 sprintf (name + 7, "%s", bname+10);
36874
36875 arity = 0;
36876 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
36877 args;
36878 args = TREE_CHAIN (args))
36879 arity++;
36880
36881 if (arity == 1)
36882 fntype = build_function_type_list (type_out, type_in, NULL);
36883 else
36884 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
36885
36886 /* Build a function declaration for the vectorized function. */
36887 new_fndecl = build_decl (BUILTINS_LOCATION,
36888 FUNCTION_DECL, get_identifier (name), fntype);
36889 TREE_PUBLIC (new_fndecl) = 1;
36890 DECL_EXTERNAL (new_fndecl) = 1;
36891 DECL_IS_NOVOPS (new_fndecl) = 1;
36892 TREE_READONLY (new_fndecl) = 1;
36893
36894 return new_fndecl;
36895 }
36896
36897 /* Returns a decl of a function that implements gather load with
36898 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
36899 Return NULL_TREE if it is not available. */
36900
36901 static tree
36902 ix86_vectorize_builtin_gather (const_tree mem_vectype,
36903 const_tree index_type, int scale)
36904 {
36905 bool si;
36906 enum ix86_builtins code;
36907
36908 if (! TARGET_AVX2)
36909 return NULL_TREE;
36910
36911 if ((TREE_CODE (index_type) != INTEGER_TYPE
36912 && !POINTER_TYPE_P (index_type))
36913 || (TYPE_MODE (index_type) != SImode
36914 && TYPE_MODE (index_type) != DImode))
36915 return NULL_TREE;
36916
36917 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
36918 return NULL_TREE;
36919
36920 /* v*gather* insn sign extends index to pointer mode. */
36921 if (TYPE_PRECISION (index_type) < POINTER_SIZE
36922 && TYPE_UNSIGNED (index_type))
36923 return NULL_TREE;
36924
36925 if (scale <= 0
36926 || scale > 8
36927 || (scale & (scale - 1)) != 0)
36928 return NULL_TREE;
36929
36930 si = TYPE_MODE (index_type) == SImode;
36931 switch (TYPE_MODE (mem_vectype))
36932 {
36933 case V2DFmode:
36934 code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
36935 break;
36936 case V4DFmode:
36937 code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
36938 break;
36939 case V2DImode:
36940 code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
36941 break;
36942 case V4DImode:
36943 code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
36944 break;
36945 case V4SFmode:
36946 code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
36947 break;
36948 case V8SFmode:
36949 code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
36950 break;
36951 case V4SImode:
36952 code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
36953 break;
36954 case V8SImode:
36955 code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
36956 break;
36957 case V8DFmode:
36958 if (TARGET_AVX512F)
36959 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DF : IX86_BUILTIN_GATHER3DIV8DF;
36960 else
36961 return NULL_TREE;
36962 break;
36963 case V8DImode:
36964 if (TARGET_AVX512F)
36965 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DI : IX86_BUILTIN_GATHER3DIV8DI;
36966 else
36967 return NULL_TREE;
36968 break;
36969 case V16SFmode:
36970 if (TARGET_AVX512F)
36971 code = si ? IX86_BUILTIN_GATHER3SIV16SF : IX86_BUILTIN_GATHER3ALTDIV16SF;
36972 else
36973 return NULL_TREE;
36974 break;
36975 case V16SImode:
36976 if (TARGET_AVX512F)
36977 code = si ? IX86_BUILTIN_GATHER3SIV16SI : IX86_BUILTIN_GATHER3ALTDIV16SI;
36978 else
36979 return NULL_TREE;
36980 break;
36981 default:
36982 return NULL_TREE;
36983 }
36984
36985 return ix86_get_builtin (code);
36986 }
36987
36988 /* Returns a code for a target-specific builtin that implements
36989 reciprocal of the function, or NULL_TREE if not available. */
36990
36991 static tree
36992 ix86_builtin_reciprocal (unsigned int fn, bool md_fn, bool)
36993 {
36994 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
36995 && flag_finite_math_only && !flag_trapping_math
36996 && flag_unsafe_math_optimizations))
36997 return NULL_TREE;
36998
36999 if (md_fn)
37000 /* Machine dependent builtins. */
37001 switch (fn)
37002 {
37003 /* Vectorized version of sqrt to rsqrt conversion. */
37004 case IX86_BUILTIN_SQRTPS_NR:
37005 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR);
37006
37007 case IX86_BUILTIN_SQRTPS_NR256:
37008 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256);
37009
37010 default:
37011 return NULL_TREE;
37012 }
37013 else
37014 /* Normal builtins. */
37015 switch (fn)
37016 {
37017 /* Sqrt to rsqrt conversion. */
37018 case BUILT_IN_SQRTF:
37019 return ix86_get_builtin (IX86_BUILTIN_RSQRTF);
37020
37021 default:
37022 return NULL_TREE;
37023 }
37024 }
37025 \f
37026 /* Helper for avx_vpermilps256_operand et al. This is also used by
37027 the expansion functions to turn the parallel back into a mask.
37028 The return value is 0 for no match and the imm8+1 for a match. */
37029
37030 int
37031 avx_vpermilp_parallel (rtx par, enum machine_mode mode)
37032 {
37033 unsigned i, nelt = GET_MODE_NUNITS (mode);
37034 unsigned mask = 0;
37035 unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
37036
37037 if (XVECLEN (par, 0) != (int) nelt)
37038 return 0;
37039
37040 /* Validate that all of the elements are constants, and not totally
37041 out of range. Copy the data into an integral array to make the
37042 subsequent checks easier. */
37043 for (i = 0; i < nelt; ++i)
37044 {
37045 rtx er = XVECEXP (par, 0, i);
37046 unsigned HOST_WIDE_INT ei;
37047
37048 if (!CONST_INT_P (er))
37049 return 0;
37050 ei = INTVAL (er);
37051 if (ei >= nelt)
37052 return 0;
37053 ipar[i] = ei;
37054 }
37055
37056 switch (mode)
37057 {
37058 case V8DFmode:
37059 /* In the 512-bit DFmode case, we can only move elements within
37060 a 128-bit lane. First fill the second part of the mask,
37061 then fallthru. */
37062 for (i = 4; i < 6; ++i)
37063 {
37064 if (ipar[i] < 4 || ipar[i] >= 6)
37065 return 0;
37066 mask |= (ipar[i] - 4) << i;
37067 }
37068 for (i = 6; i < 8; ++i)
37069 {
37070 if (ipar[i] < 6)
37071 return 0;
37072 mask |= (ipar[i] - 6) << i;
37073 }
37074 /* FALLTHRU */
37075
37076 case V4DFmode:
37077 /* In the 256-bit DFmode case, we can only move elements within
37078 a 128-bit lane. */
37079 for (i = 0; i < 2; ++i)
37080 {
37081 if (ipar[i] >= 2)
37082 return 0;
37083 mask |= ipar[i] << i;
37084 }
37085 for (i = 2; i < 4; ++i)
37086 {
37087 if (ipar[i] < 2)
37088 return 0;
37089 mask |= (ipar[i] - 2) << i;
37090 }
37091 break;
37092
37093 case V16SFmode:
37094 /* In 512 bit SFmode case, permutation in the upper 256 bits
37095 must mirror the permutation in the lower 256-bits. */
37096 for (i = 0; i < 8; ++i)
37097 if (ipar[i] + 8 != ipar[i + 8])
37098 return 0;
37099 /* FALLTHRU */
37100
37101 case V8SFmode:
37102 /* In 256 bit SFmode case, we have full freedom of
37103 movement within the low 128-bit lane, but the high 128-bit
37104 lane must mirror the exact same pattern. */
37105 for (i = 0; i < 4; ++i)
37106 if (ipar[i] + 4 != ipar[i + 4])
37107 return 0;
37108 nelt = 4;
37109 /* FALLTHRU */
37110
37111 case V2DFmode:
37112 case V4SFmode:
37113 /* In the 128-bit case, we've full freedom in the placement of
37114 the elements from the source operand. */
37115 for (i = 0; i < nelt; ++i)
37116 mask |= ipar[i] << (i * (nelt / 2));
37117 break;
37118
37119 default:
37120 gcc_unreachable ();
37121 }
37122
37123 /* Make sure success has a non-zero value by adding one. */
37124 return mask + 1;
37125 }
37126
37127 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
37128 the expansion functions to turn the parallel back into a mask.
37129 The return value is 0 for no match and the imm8+1 for a match. */
37130
37131 int
37132 avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
37133 {
37134 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
37135 unsigned mask = 0;
37136 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
37137
37138 if (XVECLEN (par, 0) != (int) nelt)
37139 return 0;
37140
37141 /* Validate that all of the elements are constants, and not totally
37142 out of range. Copy the data into an integral array to make the
37143 subsequent checks easier. */
37144 for (i = 0; i < nelt; ++i)
37145 {
37146 rtx er = XVECEXP (par, 0, i);
37147 unsigned HOST_WIDE_INT ei;
37148
37149 if (!CONST_INT_P (er))
37150 return 0;
37151 ei = INTVAL (er);
37152 if (ei >= 2 * nelt)
37153 return 0;
37154 ipar[i] = ei;
37155 }
37156
37157 /* Validate that the halves of the permute are halves. */
37158 for (i = 0; i < nelt2 - 1; ++i)
37159 if (ipar[i] + 1 != ipar[i + 1])
37160 return 0;
37161 for (i = nelt2; i < nelt - 1; ++i)
37162 if (ipar[i] + 1 != ipar[i + 1])
37163 return 0;
37164
37165 /* Reconstruct the mask. */
37166 for (i = 0; i < 2; ++i)
37167 {
37168 unsigned e = ipar[i * nelt2];
37169 if (e % nelt2)
37170 return 0;
37171 e /= nelt2;
37172 mask |= e << (i * 4);
37173 }
37174
37175 /* Make sure success has a non-zero value by adding one. */
37176 return mask + 1;
37177 }
37178 \f
37179 /* Return a register priority for hard reg REGNO. */
37180 static int
37181 ix86_register_priority (int hard_regno)
37182 {
37183 /* ebp and r13 as the base always wants a displacement, r12 as the
37184 base always wants an index. So discourage their usage in an
37185 address. */
37186 if (hard_regno == R12_REG || hard_regno == R13_REG)
37187 return 0;
37188 if (hard_regno == BP_REG)
37189 return 1;
37190 /* New x86-64 int registers result in bigger code size. Discourage
37191 them. */
37192 if (FIRST_REX_INT_REG <= hard_regno && hard_regno <= LAST_REX_INT_REG)
37193 return 2;
37194 /* New x86-64 SSE registers result in bigger code size. Discourage
37195 them. */
37196 if (FIRST_REX_SSE_REG <= hard_regno && hard_regno <= LAST_REX_SSE_REG)
37197 return 2;
37198 /* Usage of AX register results in smaller code. Prefer it. */
37199 if (hard_regno == 0)
37200 return 4;
37201 return 3;
37202 }
37203
37204 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
37205
37206 Put float CONST_DOUBLE in the constant pool instead of fp regs.
37207 QImode must go into class Q_REGS.
37208 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
37209 movdf to do mem-to-mem moves through integer regs. */
37210
37211 static reg_class_t
37212 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
37213 {
37214 enum machine_mode mode = GET_MODE (x);
37215
37216 /* We're only allowed to return a subclass of CLASS. Many of the
37217 following checks fail for NO_REGS, so eliminate that early. */
37218 if (regclass == NO_REGS)
37219 return NO_REGS;
37220
37221 /* All classes can load zeros. */
37222 if (x == CONST0_RTX (mode))
37223 return regclass;
37224
37225 /* Force constants into memory if we are loading a (nonzero) constant into
37226 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
37227 instructions to load from a constant. */
37228 if (CONSTANT_P (x)
37229 && (MAYBE_MMX_CLASS_P (regclass)
37230 || MAYBE_SSE_CLASS_P (regclass)
37231 || MAYBE_MASK_CLASS_P (regclass)))
37232 return NO_REGS;
37233
37234 /* Prefer SSE regs only, if we can use them for math. */
37235 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
37236 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
37237
37238 /* Floating-point constants need more complex checks. */
37239 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
37240 {
37241 /* General regs can load everything. */
37242 if (reg_class_subset_p (regclass, GENERAL_REGS))
37243 return regclass;
37244
37245 /* Floats can load 0 and 1 plus some others. Note that we eliminated
37246 zero above. We only want to wind up preferring 80387 registers if
37247 we plan on doing computation with them. */
37248 if (TARGET_80387
37249 && standard_80387_constant_p (x) > 0)
37250 {
37251 /* Limit class to non-sse. */
37252 if (regclass == FLOAT_SSE_REGS)
37253 return FLOAT_REGS;
37254 if (regclass == FP_TOP_SSE_REGS)
37255 return FP_TOP_REG;
37256 if (regclass == FP_SECOND_SSE_REGS)
37257 return FP_SECOND_REG;
37258 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
37259 return regclass;
37260 }
37261
37262 return NO_REGS;
37263 }
37264
37265 /* Generally when we see PLUS here, it's the function invariant
37266 (plus soft-fp const_int). Which can only be computed into general
37267 regs. */
37268 if (GET_CODE (x) == PLUS)
37269 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
37270
37271 /* QImode constants are easy to load, but non-constant QImode data
37272 must go into Q_REGS. */
37273 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
37274 {
37275 if (reg_class_subset_p (regclass, Q_REGS))
37276 return regclass;
37277 if (reg_class_subset_p (Q_REGS, regclass))
37278 return Q_REGS;
37279 return NO_REGS;
37280 }
37281
37282 return regclass;
37283 }
37284
37285 /* Discourage putting floating-point values in SSE registers unless
37286 SSE math is being used, and likewise for the 387 registers. */
37287 static reg_class_t
37288 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
37289 {
37290 enum machine_mode mode = GET_MODE (x);
37291
37292 /* Restrict the output reload class to the register bank that we are doing
37293 math on. If we would like not to return a subset of CLASS, reject this
37294 alternative: if reload cannot do this, it will still use its choice. */
37295 mode = GET_MODE (x);
37296 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
37297 return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
37298
37299 if (X87_FLOAT_MODE_P (mode))
37300 {
37301 if (regclass == FP_TOP_SSE_REGS)
37302 return FP_TOP_REG;
37303 else if (regclass == FP_SECOND_SSE_REGS)
37304 return FP_SECOND_REG;
37305 else
37306 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
37307 }
37308
37309 return regclass;
37310 }
37311
37312 static reg_class_t
37313 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
37314 enum machine_mode mode, secondary_reload_info *sri)
37315 {
37316 /* Double-word spills from general registers to non-offsettable memory
37317 references (zero-extended addresses) require special handling. */
37318 if (TARGET_64BIT
37319 && MEM_P (x)
37320 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
37321 && INTEGER_CLASS_P (rclass)
37322 && !offsettable_memref_p (x))
37323 {
37324 sri->icode = (in_p
37325 ? CODE_FOR_reload_noff_load
37326 : CODE_FOR_reload_noff_store);
37327 /* Add the cost of moving address to a temporary. */
37328 sri->extra_cost = 1;
37329
37330 return NO_REGS;
37331 }
37332
37333 /* QImode spills from non-QI registers require
37334 intermediate register on 32bit targets. */
37335 if (mode == QImode
37336 && (MAYBE_MASK_CLASS_P (rclass)
37337 || (!TARGET_64BIT && !in_p
37338 && INTEGER_CLASS_P (rclass)
37339 && MAYBE_NON_Q_CLASS_P (rclass))))
37340 {
37341 int regno;
37342
37343 if (REG_P (x))
37344 regno = REGNO (x);
37345 else
37346 regno = -1;
37347
37348 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
37349 regno = true_regnum (x);
37350
37351 /* Return Q_REGS if the operand is in memory. */
37352 if (regno == -1)
37353 return Q_REGS;
37354 }
37355
37356 /* This condition handles corner case where an expression involving
37357 pointers gets vectorized. We're trying to use the address of a
37358 stack slot as a vector initializer.
37359
37360 (set (reg:V2DI 74 [ vect_cst_.2 ])
37361 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
37362
37363 Eventually frame gets turned into sp+offset like this:
37364
37365 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37366 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
37367 (const_int 392 [0x188]))))
37368
37369 That later gets turned into:
37370
37371 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37372 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
37373 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
37374
37375 We'll have the following reload recorded:
37376
37377 Reload 0: reload_in (DI) =
37378 (plus:DI (reg/f:DI 7 sp)
37379 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
37380 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37381 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
37382 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
37383 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37384 reload_reg_rtx: (reg:V2DI 22 xmm1)
37385
37386 Which isn't going to work since SSE instructions can't handle scalar
37387 additions. Returning GENERAL_REGS forces the addition into integer
37388 register and reload can handle subsequent reloads without problems. */
37389
37390 if (in_p && GET_CODE (x) == PLUS
37391 && SSE_CLASS_P (rclass)
37392 && SCALAR_INT_MODE_P (mode))
37393 return GENERAL_REGS;
37394
37395 return NO_REGS;
37396 }
37397
37398 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
37399
37400 static bool
37401 ix86_class_likely_spilled_p (reg_class_t rclass)
37402 {
37403 switch (rclass)
37404 {
37405 case AREG:
37406 case DREG:
37407 case CREG:
37408 case BREG:
37409 case AD_REGS:
37410 case SIREG:
37411 case DIREG:
37412 case SSE_FIRST_REG:
37413 case FP_TOP_REG:
37414 case FP_SECOND_REG:
37415 return true;
37416
37417 default:
37418 break;
37419 }
37420
37421 return false;
37422 }
37423
37424 /* If we are copying between general and FP registers, we need a memory
37425 location. The same is true for SSE and MMX registers.
37426
37427 To optimize register_move_cost performance, allow inline variant.
37428
37429 The macro can't work reliably when one of the CLASSES is class containing
37430 registers from multiple units (SSE, MMX, integer). We avoid this by never
37431 combining those units in single alternative in the machine description.
37432 Ensure that this constraint holds to avoid unexpected surprises.
37433
37434 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
37435 enforce these sanity checks. */
37436
37437 static inline bool
37438 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
37439 enum machine_mode mode, int strict)
37440 {
37441 if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
37442 return false;
37443 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
37444 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
37445 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
37446 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
37447 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
37448 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
37449 {
37450 gcc_assert (!strict || lra_in_progress);
37451 return true;
37452 }
37453
37454 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
37455 return true;
37456
37457 /* Between mask and general, we have moves no larger than word size. */
37458 if ((MAYBE_MASK_CLASS_P (class1) != MAYBE_MASK_CLASS_P (class2))
37459 && (GET_MODE_SIZE (mode) > UNITS_PER_WORD))
37460 return true;
37461
37462 /* ??? This is a lie. We do have moves between mmx/general, and for
37463 mmx/sse2. But by saying we need secondary memory we discourage the
37464 register allocator from using the mmx registers unless needed. */
37465 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
37466 return true;
37467
37468 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
37469 {
37470 /* SSE1 doesn't have any direct moves from other classes. */
37471 if (!TARGET_SSE2)
37472 return true;
37473
37474 /* If the target says that inter-unit moves are more expensive
37475 than moving through memory, then don't generate them. */
37476 if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
37477 || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
37478 return true;
37479
37480 /* Between SSE and general, we have moves no larger than word size. */
37481 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
37482 return true;
37483 }
37484
37485 return false;
37486 }
37487
37488 bool
37489 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
37490 enum machine_mode mode, int strict)
37491 {
37492 return inline_secondary_memory_needed (class1, class2, mode, strict);
37493 }
37494
37495 /* Implement the TARGET_CLASS_MAX_NREGS hook.
37496
37497 On the 80386, this is the size of MODE in words,
37498 except in the FP regs, where a single reg is always enough. */
37499
37500 static unsigned char
37501 ix86_class_max_nregs (reg_class_t rclass, enum machine_mode mode)
37502 {
37503 if (MAYBE_INTEGER_CLASS_P (rclass))
37504 {
37505 if (mode == XFmode)
37506 return (TARGET_64BIT ? 2 : 3);
37507 else if (mode == XCmode)
37508 return (TARGET_64BIT ? 4 : 6);
37509 else
37510 return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
37511 }
37512 else
37513 {
37514 if (COMPLEX_MODE_P (mode))
37515 return 2;
37516 else
37517 return 1;
37518 }
37519 }
37520
37521 /* Return true if the registers in CLASS cannot represent the change from
37522 modes FROM to TO. */
37523
37524 bool
37525 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
37526 enum reg_class regclass)
37527 {
37528 if (from == to)
37529 return false;
37530
37531 /* x87 registers can't do subreg at all, as all values are reformatted
37532 to extended precision. */
37533 if (MAYBE_FLOAT_CLASS_P (regclass))
37534 return true;
37535
37536 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
37537 {
37538 /* Vector registers do not support QI or HImode loads. If we don't
37539 disallow a change to these modes, reload will assume it's ok to
37540 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
37541 the vec_dupv4hi pattern. */
37542 if (GET_MODE_SIZE (from) < 4)
37543 return true;
37544 }
37545
37546 return false;
37547 }
37548
37549 /* Return the cost of moving data of mode M between a
37550 register and memory. A value of 2 is the default; this cost is
37551 relative to those in `REGISTER_MOVE_COST'.
37552
37553 This function is used extensively by register_move_cost that is used to
37554 build tables at startup. Make it inline in this case.
37555 When IN is 2, return maximum of in and out move cost.
37556
37557 If moving between registers and memory is more expensive than
37558 between two registers, you should define this macro to express the
37559 relative cost.
37560
37561 Model also increased moving costs of QImode registers in non
37562 Q_REGS classes.
37563 */
37564 static inline int
37565 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
37566 int in)
37567 {
37568 int cost;
37569 if (FLOAT_CLASS_P (regclass))
37570 {
37571 int index;
37572 switch (mode)
37573 {
37574 case SFmode:
37575 index = 0;
37576 break;
37577 case DFmode:
37578 index = 1;
37579 break;
37580 case XFmode:
37581 index = 2;
37582 break;
37583 default:
37584 return 100;
37585 }
37586 if (in == 2)
37587 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
37588 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
37589 }
37590 if (SSE_CLASS_P (regclass))
37591 {
37592 int index;
37593 switch (GET_MODE_SIZE (mode))
37594 {
37595 case 4:
37596 index = 0;
37597 break;
37598 case 8:
37599 index = 1;
37600 break;
37601 case 16:
37602 index = 2;
37603 break;
37604 default:
37605 return 100;
37606 }
37607 if (in == 2)
37608 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
37609 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
37610 }
37611 if (MMX_CLASS_P (regclass))
37612 {
37613 int index;
37614 switch (GET_MODE_SIZE (mode))
37615 {
37616 case 4:
37617 index = 0;
37618 break;
37619 case 8:
37620 index = 1;
37621 break;
37622 default:
37623 return 100;
37624 }
37625 if (in)
37626 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
37627 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
37628 }
37629 switch (GET_MODE_SIZE (mode))
37630 {
37631 case 1:
37632 if (Q_CLASS_P (regclass) || TARGET_64BIT)
37633 {
37634 if (!in)
37635 return ix86_cost->int_store[0];
37636 if (TARGET_PARTIAL_REG_DEPENDENCY
37637 && optimize_function_for_speed_p (cfun))
37638 cost = ix86_cost->movzbl_load;
37639 else
37640 cost = ix86_cost->int_load[0];
37641 if (in == 2)
37642 return MAX (cost, ix86_cost->int_store[0]);
37643 return cost;
37644 }
37645 else
37646 {
37647 if (in == 2)
37648 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
37649 if (in)
37650 return ix86_cost->movzbl_load;
37651 else
37652 return ix86_cost->int_store[0] + 4;
37653 }
37654 break;
37655 case 2:
37656 if (in == 2)
37657 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
37658 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
37659 default:
37660 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
37661 if (mode == TFmode)
37662 mode = XFmode;
37663 if (in == 2)
37664 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
37665 else if (in)
37666 cost = ix86_cost->int_load[2];
37667 else
37668 cost = ix86_cost->int_store[2];
37669 return (cost * (((int) GET_MODE_SIZE (mode)
37670 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
37671 }
37672 }
37673
37674 static int
37675 ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
37676 bool in)
37677 {
37678 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
37679 }
37680
37681
37682 /* Return the cost of moving data from a register in class CLASS1 to
37683 one in class CLASS2.
37684
37685 It is not required that the cost always equal 2 when FROM is the same as TO;
37686 on some machines it is expensive to move between registers if they are not
37687 general registers. */
37688
37689 static int
37690 ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
37691 reg_class_t class2_i)
37692 {
37693 enum reg_class class1 = (enum reg_class) class1_i;
37694 enum reg_class class2 = (enum reg_class) class2_i;
37695
37696 /* In case we require secondary memory, compute cost of the store followed
37697 by load. In order to avoid bad register allocation choices, we need
37698 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
37699
37700 if (inline_secondary_memory_needed (class1, class2, mode, 0))
37701 {
37702 int cost = 1;
37703
37704 cost += inline_memory_move_cost (mode, class1, 2);
37705 cost += inline_memory_move_cost (mode, class2, 2);
37706
37707 /* In case of copying from general_purpose_register we may emit multiple
37708 stores followed by single load causing memory size mismatch stall.
37709 Count this as arbitrarily high cost of 20. */
37710 if (targetm.class_max_nregs (class1, mode)
37711 > targetm.class_max_nregs (class2, mode))
37712 cost += 20;
37713
37714 /* In the case of FP/MMX moves, the registers actually overlap, and we
37715 have to switch modes in order to treat them differently. */
37716 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
37717 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
37718 cost += 20;
37719
37720 return cost;
37721 }
37722
37723 /* Moves between SSE/MMX and integer unit are expensive. */
37724 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
37725 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
37726
37727 /* ??? By keeping returned value relatively high, we limit the number
37728 of moves between integer and MMX/SSE registers for all targets.
37729 Additionally, high value prevents problem with x86_modes_tieable_p(),
37730 where integer modes in MMX/SSE registers are not tieable
37731 because of missing QImode and HImode moves to, from or between
37732 MMX/SSE registers. */
37733 return MAX (8, ix86_cost->mmxsse_to_integer);
37734
37735 if (MAYBE_FLOAT_CLASS_P (class1))
37736 return ix86_cost->fp_move;
37737 if (MAYBE_SSE_CLASS_P (class1))
37738 return ix86_cost->sse_move;
37739 if (MAYBE_MMX_CLASS_P (class1))
37740 return ix86_cost->mmx_move;
37741 return 2;
37742 }
37743
37744 /* Return TRUE if hard register REGNO can hold a value of machine-mode
37745 MODE. */
37746
37747 bool
37748 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
37749 {
37750 /* Flags and only flags can only hold CCmode values. */
37751 if (CC_REGNO_P (regno))
37752 return GET_MODE_CLASS (mode) == MODE_CC;
37753 if (GET_MODE_CLASS (mode) == MODE_CC
37754 || GET_MODE_CLASS (mode) == MODE_RANDOM
37755 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
37756 return false;
37757 if (STACK_REGNO_P (regno))
37758 return VALID_FP_MODE_P (mode);
37759 if (MASK_REGNO_P (regno))
37760 return (VALID_MASK_REG_MODE (mode)
37761 || (TARGET_AVX512BW && VALID_MASK_AVX512BW_MODE (mode)));
37762 if (SSE_REGNO_P (regno))
37763 {
37764 /* We implement the move patterns for all vector modes into and
37765 out of SSE registers, even when no operation instructions
37766 are available. */
37767
37768 /* For AVX-512 we allow, regardless of regno:
37769 - XI mode
37770 - any of 512-bit wide vector mode
37771 - any scalar mode. */
37772 if (TARGET_AVX512F
37773 && (mode == XImode
37774 || VALID_AVX512F_REG_MODE (mode)
37775 || VALID_AVX512F_SCALAR_MODE (mode)))
37776 return true;
37777
37778 /* TODO check for QI/HI scalars. */
37779 /* AVX512VL allows sse regs16+ for 128/256 bit modes. */
37780 if (TARGET_AVX512VL
37781 && (mode == OImode
37782 || mode == TImode
37783 || VALID_AVX256_REG_MODE (mode)
37784 || VALID_AVX512VL_128_REG_MODE (mode)))
37785 return true;
37786
37787 /* xmm16-xmm31 are only available for AVX-512. */
37788 if (EXT_REX_SSE_REGNO_P (regno))
37789 return false;
37790
37791 /* OImode and AVX modes are available only when AVX is enabled. */
37792 return ((TARGET_AVX
37793 && VALID_AVX256_REG_OR_OI_MODE (mode))
37794 || VALID_SSE_REG_MODE (mode)
37795 || VALID_SSE2_REG_MODE (mode)
37796 || VALID_MMX_REG_MODE (mode)
37797 || VALID_MMX_REG_MODE_3DNOW (mode));
37798 }
37799 if (MMX_REGNO_P (regno))
37800 {
37801 /* We implement the move patterns for 3DNOW modes even in MMX mode,
37802 so if the register is available at all, then we can move data of
37803 the given mode into or out of it. */
37804 return (VALID_MMX_REG_MODE (mode)
37805 || VALID_MMX_REG_MODE_3DNOW (mode));
37806 }
37807
37808 if (mode == QImode)
37809 {
37810 /* Take care for QImode values - they can be in non-QI regs,
37811 but then they do cause partial register stalls. */
37812 if (ANY_QI_REGNO_P (regno))
37813 return true;
37814 if (!TARGET_PARTIAL_REG_STALL)
37815 return true;
37816 /* LRA checks if the hard register is OK for the given mode.
37817 QImode values can live in non-QI regs, so we allow all
37818 registers here. */
37819 if (lra_in_progress)
37820 return true;
37821 return !can_create_pseudo_p ();
37822 }
37823 /* We handle both integer and floats in the general purpose registers. */
37824 else if (VALID_INT_MODE_P (mode))
37825 return true;
37826 else if (VALID_FP_MODE_P (mode))
37827 return true;
37828 else if (VALID_DFP_MODE_P (mode))
37829 return true;
37830 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
37831 on to use that value in smaller contexts, this can easily force a
37832 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
37833 supporting DImode, allow it. */
37834 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
37835 return true;
37836
37837 return false;
37838 }
37839
37840 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
37841 tieable integer mode. */
37842
37843 static bool
37844 ix86_tieable_integer_mode_p (enum machine_mode mode)
37845 {
37846 switch (mode)
37847 {
37848 case HImode:
37849 case SImode:
37850 return true;
37851
37852 case QImode:
37853 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
37854
37855 case DImode:
37856 return TARGET_64BIT;
37857
37858 default:
37859 return false;
37860 }
37861 }
37862
37863 /* Return true if MODE1 is accessible in a register that can hold MODE2
37864 without copying. That is, all register classes that can hold MODE2
37865 can also hold MODE1. */
37866
37867 bool
37868 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
37869 {
37870 if (mode1 == mode2)
37871 return true;
37872
37873 if (ix86_tieable_integer_mode_p (mode1)
37874 && ix86_tieable_integer_mode_p (mode2))
37875 return true;
37876
37877 /* MODE2 being XFmode implies fp stack or general regs, which means we
37878 can tie any smaller floating point modes to it. Note that we do not
37879 tie this with TFmode. */
37880 if (mode2 == XFmode)
37881 return mode1 == SFmode || mode1 == DFmode;
37882
37883 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
37884 that we can tie it with SFmode. */
37885 if (mode2 == DFmode)
37886 return mode1 == SFmode;
37887
37888 /* If MODE2 is only appropriate for an SSE register, then tie with
37889 any other mode acceptable to SSE registers. */
37890 if (GET_MODE_SIZE (mode2) == 32
37891 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
37892 return (GET_MODE_SIZE (mode1) == 32
37893 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
37894 if (GET_MODE_SIZE (mode2) == 16
37895 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
37896 return (GET_MODE_SIZE (mode1) == 16
37897 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
37898
37899 /* If MODE2 is appropriate for an MMX register, then tie
37900 with any other mode acceptable to MMX registers. */
37901 if (GET_MODE_SIZE (mode2) == 8
37902 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
37903 return (GET_MODE_SIZE (mode1) == 8
37904 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
37905
37906 return false;
37907 }
37908
37909 /* Return the cost of moving between two registers of mode MODE. */
37910
37911 static int
37912 ix86_set_reg_reg_cost (enum machine_mode mode)
37913 {
37914 unsigned int units = UNITS_PER_WORD;
37915
37916 switch (GET_MODE_CLASS (mode))
37917 {
37918 default:
37919 break;
37920
37921 case MODE_CC:
37922 units = GET_MODE_SIZE (CCmode);
37923 break;
37924
37925 case MODE_FLOAT:
37926 if ((TARGET_SSE && mode == TFmode)
37927 || (TARGET_80387 && mode == XFmode)
37928 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
37929 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
37930 units = GET_MODE_SIZE (mode);
37931 break;
37932
37933 case MODE_COMPLEX_FLOAT:
37934 if ((TARGET_SSE && mode == TCmode)
37935 || (TARGET_80387 && mode == XCmode)
37936 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
37937 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
37938 units = GET_MODE_SIZE (mode);
37939 break;
37940
37941 case MODE_VECTOR_INT:
37942 case MODE_VECTOR_FLOAT:
37943 if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
37944 || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
37945 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
37946 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
37947 || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
37948 units = GET_MODE_SIZE (mode);
37949 }
37950
37951 /* Return the cost of moving between two registers of mode MODE,
37952 assuming that the move will be in pieces of at most UNITS bytes. */
37953 return COSTS_N_INSNS ((GET_MODE_SIZE (mode) + units - 1) / units);
37954 }
37955
37956 /* Compute a (partial) cost for rtx X. Return true if the complete
37957 cost has been computed, and false if subexpressions should be
37958 scanned. In either case, *TOTAL contains the cost result. */
37959
37960 static bool
37961 ix86_rtx_costs (rtx x, int code_i, int outer_code_i, int opno, int *total,
37962 bool speed)
37963 {
37964 rtx mask;
37965 enum rtx_code code = (enum rtx_code) code_i;
37966 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
37967 enum machine_mode mode = GET_MODE (x);
37968 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
37969
37970 switch (code)
37971 {
37972 case SET:
37973 if (register_operand (SET_DEST (x), VOIDmode)
37974 && reg_or_0_operand (SET_SRC (x), VOIDmode))
37975 {
37976 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
37977 return true;
37978 }
37979 return false;
37980
37981 case CONST_INT:
37982 case CONST:
37983 case LABEL_REF:
37984 case SYMBOL_REF:
37985 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
37986 *total = 3;
37987 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
37988 *total = 2;
37989 else if (flag_pic && SYMBOLIC_CONST (x)
37990 && !(TARGET_64BIT
37991 && (GET_CODE (x) == LABEL_REF
37992 || (GET_CODE (x) == SYMBOL_REF
37993 && SYMBOL_REF_LOCAL_P (x)))))
37994 *total = 1;
37995 else
37996 *total = 0;
37997 return true;
37998
37999 case CONST_DOUBLE:
38000 if (mode == VOIDmode)
38001 {
38002 *total = 0;
38003 return true;
38004 }
38005 switch (standard_80387_constant_p (x))
38006 {
38007 case 1: /* 0.0 */
38008 *total = 1;
38009 return true;
38010 default: /* Other constants */
38011 *total = 2;
38012 return true;
38013 case 0:
38014 case -1:
38015 break;
38016 }
38017 if (SSE_FLOAT_MODE_P (mode))
38018 {
38019 case CONST_VECTOR:
38020 switch (standard_sse_constant_p (x))
38021 {
38022 case 0:
38023 break;
38024 case 1: /* 0: xor eliminates false dependency */
38025 *total = 0;
38026 return true;
38027 default: /* -1: cmp contains false dependency */
38028 *total = 1;
38029 return true;
38030 }
38031 }
38032 /* Fall back to (MEM (SYMBOL_REF)), since that's where
38033 it'll probably end up. Add a penalty for size. */
38034 *total = (COSTS_N_INSNS (1)
38035 + (flag_pic != 0 && !TARGET_64BIT)
38036 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
38037 return true;
38038
38039 case ZERO_EXTEND:
38040 /* The zero extensions is often completely free on x86_64, so make
38041 it as cheap as possible. */
38042 if (TARGET_64BIT && mode == DImode
38043 && GET_MODE (XEXP (x, 0)) == SImode)
38044 *total = 1;
38045 else if (TARGET_ZERO_EXTEND_WITH_AND)
38046 *total = cost->add;
38047 else
38048 *total = cost->movzx;
38049 return false;
38050
38051 case SIGN_EXTEND:
38052 *total = cost->movsx;
38053 return false;
38054
38055 case ASHIFT:
38056 if (SCALAR_INT_MODE_P (mode)
38057 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
38058 && CONST_INT_P (XEXP (x, 1)))
38059 {
38060 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
38061 if (value == 1)
38062 {
38063 *total = cost->add;
38064 return false;
38065 }
38066 if ((value == 2 || value == 3)
38067 && cost->lea <= cost->shift_const)
38068 {
38069 *total = cost->lea;
38070 return false;
38071 }
38072 }
38073 /* FALLTHRU */
38074
38075 case ROTATE:
38076 case ASHIFTRT:
38077 case LSHIFTRT:
38078 case ROTATERT:
38079 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
38080 {
38081 /* ??? Should be SSE vector operation cost. */
38082 /* At least for published AMD latencies, this really is the same
38083 as the latency for a simple fpu operation like fabs. */
38084 /* V*QImode is emulated with 1-11 insns. */
38085 if (mode == V16QImode || mode == V32QImode)
38086 {
38087 int count = 11;
38088 if (TARGET_XOP && mode == V16QImode)
38089 {
38090 /* For XOP we use vpshab, which requires a broadcast of the
38091 value to the variable shift insn. For constants this
38092 means a V16Q const in mem; even when we can perform the
38093 shift with one insn set the cost to prefer paddb. */
38094 if (CONSTANT_P (XEXP (x, 1)))
38095 {
38096 *total = (cost->fabs
38097 + rtx_cost (XEXP (x, 0), code, 0, speed)
38098 + (speed ? 2 : COSTS_N_BYTES (16)));
38099 return true;
38100 }
38101 count = 3;
38102 }
38103 else if (TARGET_SSSE3)
38104 count = 7;
38105 *total = cost->fabs * count;
38106 }
38107 else
38108 *total = cost->fabs;
38109 }
38110 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
38111 {
38112 if (CONST_INT_P (XEXP (x, 1)))
38113 {
38114 if (INTVAL (XEXP (x, 1)) > 32)
38115 *total = cost->shift_const + COSTS_N_INSNS (2);
38116 else
38117 *total = cost->shift_const * 2;
38118 }
38119 else
38120 {
38121 if (GET_CODE (XEXP (x, 1)) == AND)
38122 *total = cost->shift_var * 2;
38123 else
38124 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
38125 }
38126 }
38127 else
38128 {
38129 if (CONST_INT_P (XEXP (x, 1)))
38130 *total = cost->shift_const;
38131 else if (GET_CODE (XEXP (x, 1)) == SUBREG
38132 && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND)
38133 {
38134 /* Return the cost after shift-and truncation. */
38135 *total = cost->shift_var;
38136 return true;
38137 }
38138 else
38139 *total = cost->shift_var;
38140 }
38141 return false;
38142
38143 case FMA:
38144 {
38145 rtx sub;
38146
38147 gcc_assert (FLOAT_MODE_P (mode));
38148 gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
38149
38150 /* ??? SSE scalar/vector cost should be used here. */
38151 /* ??? Bald assumption that fma has the same cost as fmul. */
38152 *total = cost->fmul;
38153 *total += rtx_cost (XEXP (x, 1), FMA, 1, speed);
38154
38155 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
38156 sub = XEXP (x, 0);
38157 if (GET_CODE (sub) == NEG)
38158 sub = XEXP (sub, 0);
38159 *total += rtx_cost (sub, FMA, 0, speed);
38160
38161 sub = XEXP (x, 2);
38162 if (GET_CODE (sub) == NEG)
38163 sub = XEXP (sub, 0);
38164 *total += rtx_cost (sub, FMA, 2, speed);
38165 return true;
38166 }
38167
38168 case MULT:
38169 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38170 {
38171 /* ??? SSE scalar cost should be used here. */
38172 *total = cost->fmul;
38173 return false;
38174 }
38175 else if (X87_FLOAT_MODE_P (mode))
38176 {
38177 *total = cost->fmul;
38178 return false;
38179 }
38180 else if (FLOAT_MODE_P (mode))
38181 {
38182 /* ??? SSE vector cost should be used here. */
38183 *total = cost->fmul;
38184 return false;
38185 }
38186 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
38187 {
38188 /* V*QImode is emulated with 7-13 insns. */
38189 if (mode == V16QImode || mode == V32QImode)
38190 {
38191 int extra = 11;
38192 if (TARGET_XOP && mode == V16QImode)
38193 extra = 5;
38194 else if (TARGET_SSSE3)
38195 extra = 6;
38196 *total = cost->fmul * 2 + cost->fabs * extra;
38197 }
38198 /* V*DImode is emulated with 5-8 insns. */
38199 else if (mode == V2DImode || mode == V4DImode)
38200 {
38201 if (TARGET_XOP && mode == V2DImode)
38202 *total = cost->fmul * 2 + cost->fabs * 3;
38203 else
38204 *total = cost->fmul * 3 + cost->fabs * 5;
38205 }
38206 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
38207 insns, including two PMULUDQ. */
38208 else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
38209 *total = cost->fmul * 2 + cost->fabs * 5;
38210 else
38211 *total = cost->fmul;
38212 return false;
38213 }
38214 else
38215 {
38216 rtx op0 = XEXP (x, 0);
38217 rtx op1 = XEXP (x, 1);
38218 int nbits;
38219 if (CONST_INT_P (XEXP (x, 1)))
38220 {
38221 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
38222 for (nbits = 0; value != 0; value &= value - 1)
38223 nbits++;
38224 }
38225 else
38226 /* This is arbitrary. */
38227 nbits = 7;
38228
38229 /* Compute costs correctly for widening multiplication. */
38230 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
38231 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
38232 == GET_MODE_SIZE (mode))
38233 {
38234 int is_mulwiden = 0;
38235 enum machine_mode inner_mode = GET_MODE (op0);
38236
38237 if (GET_CODE (op0) == GET_CODE (op1))
38238 is_mulwiden = 1, op1 = XEXP (op1, 0);
38239 else if (CONST_INT_P (op1))
38240 {
38241 if (GET_CODE (op0) == SIGN_EXTEND)
38242 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
38243 == INTVAL (op1);
38244 else
38245 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
38246 }
38247
38248 if (is_mulwiden)
38249 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
38250 }
38251
38252 *total = (cost->mult_init[MODE_INDEX (mode)]
38253 + nbits * cost->mult_bit
38254 + rtx_cost (op0, outer_code, opno, speed)
38255 + rtx_cost (op1, outer_code, opno, speed));
38256
38257 return true;
38258 }
38259
38260 case DIV:
38261 case UDIV:
38262 case MOD:
38263 case UMOD:
38264 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38265 /* ??? SSE cost should be used here. */
38266 *total = cost->fdiv;
38267 else if (X87_FLOAT_MODE_P (mode))
38268 *total = cost->fdiv;
38269 else if (FLOAT_MODE_P (mode))
38270 /* ??? SSE vector cost should be used here. */
38271 *total = cost->fdiv;
38272 else
38273 *total = cost->divide[MODE_INDEX (mode)];
38274 return false;
38275
38276 case PLUS:
38277 if (GET_MODE_CLASS (mode) == MODE_INT
38278 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
38279 {
38280 if (GET_CODE (XEXP (x, 0)) == PLUS
38281 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
38282 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
38283 && CONSTANT_P (XEXP (x, 1)))
38284 {
38285 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
38286 if (val == 2 || val == 4 || val == 8)
38287 {
38288 *total = cost->lea;
38289 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
38290 outer_code, opno, speed);
38291 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
38292 outer_code, opno, speed);
38293 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
38294 return true;
38295 }
38296 }
38297 else if (GET_CODE (XEXP (x, 0)) == MULT
38298 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
38299 {
38300 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
38301 if (val == 2 || val == 4 || val == 8)
38302 {
38303 *total = cost->lea;
38304 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
38305 outer_code, opno, speed);
38306 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
38307 return true;
38308 }
38309 }
38310 else if (GET_CODE (XEXP (x, 0)) == PLUS)
38311 {
38312 *total = cost->lea;
38313 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
38314 outer_code, opno, speed);
38315 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
38316 outer_code, opno, speed);
38317 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
38318 return true;
38319 }
38320 }
38321 /* FALLTHRU */
38322
38323 case MINUS:
38324 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38325 {
38326 /* ??? SSE cost should be used here. */
38327 *total = cost->fadd;
38328 return false;
38329 }
38330 else if (X87_FLOAT_MODE_P (mode))
38331 {
38332 *total = cost->fadd;
38333 return false;
38334 }
38335 else if (FLOAT_MODE_P (mode))
38336 {
38337 /* ??? SSE vector cost should be used here. */
38338 *total = cost->fadd;
38339 return false;
38340 }
38341 /* FALLTHRU */
38342
38343 case AND:
38344 case IOR:
38345 case XOR:
38346 if (GET_MODE_CLASS (mode) == MODE_INT
38347 && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
38348 {
38349 *total = (cost->add * 2
38350 + (rtx_cost (XEXP (x, 0), outer_code, opno, speed)
38351 << (GET_MODE (XEXP (x, 0)) != DImode))
38352 + (rtx_cost (XEXP (x, 1), outer_code, opno, speed)
38353 << (GET_MODE (XEXP (x, 1)) != DImode)));
38354 return true;
38355 }
38356 /* FALLTHRU */
38357
38358 case NEG:
38359 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38360 {
38361 /* ??? SSE cost should be used here. */
38362 *total = cost->fchs;
38363 return false;
38364 }
38365 else if (X87_FLOAT_MODE_P (mode))
38366 {
38367 *total = cost->fchs;
38368 return false;
38369 }
38370 else if (FLOAT_MODE_P (mode))
38371 {
38372 /* ??? SSE vector cost should be used here. */
38373 *total = cost->fchs;
38374 return false;
38375 }
38376 /* FALLTHRU */
38377
38378 case NOT:
38379 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
38380 {
38381 /* ??? Should be SSE vector operation cost. */
38382 /* At least for published AMD latencies, this really is the same
38383 as the latency for a simple fpu operation like fabs. */
38384 *total = cost->fabs;
38385 }
38386 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
38387 *total = cost->add * 2;
38388 else
38389 *total = cost->add;
38390 return false;
38391
38392 case COMPARE:
38393 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
38394 && XEXP (XEXP (x, 0), 1) == const1_rtx
38395 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
38396 && XEXP (x, 1) == const0_rtx)
38397 {
38398 /* This kind of construct is implemented using test[bwl].
38399 Treat it as if we had an AND. */
38400 *total = (cost->add
38401 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, opno, speed)
38402 + rtx_cost (const1_rtx, outer_code, opno, speed));
38403 return true;
38404 }
38405 return false;
38406
38407 case FLOAT_EXTEND:
38408 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
38409 *total = 0;
38410 return false;
38411
38412 case ABS:
38413 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38414 /* ??? SSE cost should be used here. */
38415 *total = cost->fabs;
38416 else if (X87_FLOAT_MODE_P (mode))
38417 *total = cost->fabs;
38418 else if (FLOAT_MODE_P (mode))
38419 /* ??? SSE vector cost should be used here. */
38420 *total = cost->fabs;
38421 return false;
38422
38423 case SQRT:
38424 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38425 /* ??? SSE cost should be used here. */
38426 *total = cost->fsqrt;
38427 else if (X87_FLOAT_MODE_P (mode))
38428 *total = cost->fsqrt;
38429 else if (FLOAT_MODE_P (mode))
38430 /* ??? SSE vector cost should be used here. */
38431 *total = cost->fsqrt;
38432 return false;
38433
38434 case UNSPEC:
38435 if (XINT (x, 1) == UNSPEC_TP)
38436 *total = 0;
38437 return false;
38438
38439 case VEC_SELECT:
38440 case VEC_CONCAT:
38441 case VEC_DUPLICATE:
38442 /* ??? Assume all of these vector manipulation patterns are
38443 recognizable. In which case they all pretty much have the
38444 same cost. */
38445 *total = cost->fabs;
38446 return true;
38447 case VEC_MERGE:
38448 mask = XEXP (x, 2);
38449 /* This is masked instruction, assume the same cost,
38450 as nonmasked variant. */
38451 if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
38452 *total = rtx_cost (XEXP (x, 0), outer_code, opno, speed);
38453 else
38454 *total = cost->fabs;
38455 return true;
38456
38457 default:
38458 return false;
38459 }
38460 }
38461
38462 #if TARGET_MACHO
38463
38464 static int current_machopic_label_num;
38465
38466 /* Given a symbol name and its associated stub, write out the
38467 definition of the stub. */
38468
38469 void
38470 machopic_output_stub (FILE *file, const char *symb, const char *stub)
38471 {
38472 unsigned int length;
38473 char *binder_name, *symbol_name, lazy_ptr_name[32];
38474 int label = ++current_machopic_label_num;
38475
38476 /* For 64-bit we shouldn't get here. */
38477 gcc_assert (!TARGET_64BIT);
38478
38479 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
38480 symb = targetm.strip_name_encoding (symb);
38481
38482 length = strlen (stub);
38483 binder_name = XALLOCAVEC (char, length + 32);
38484 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
38485
38486 length = strlen (symb);
38487 symbol_name = XALLOCAVEC (char, length + 32);
38488 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
38489
38490 sprintf (lazy_ptr_name, "L%d$lz", label);
38491
38492 if (MACHOPIC_ATT_STUB)
38493 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
38494 else if (MACHOPIC_PURE)
38495 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
38496 else
38497 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
38498
38499 fprintf (file, "%s:\n", stub);
38500 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
38501
38502 if (MACHOPIC_ATT_STUB)
38503 {
38504 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
38505 }
38506 else if (MACHOPIC_PURE)
38507 {
38508 /* PIC stub. */
38509 /* 25-byte PIC stub using "CALL get_pc_thunk". */
38510 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
38511 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
38512 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
38513 label, lazy_ptr_name, label);
38514 fprintf (file, "\tjmp\t*%%ecx\n");
38515 }
38516 else
38517 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
38518
38519 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
38520 it needs no stub-binding-helper. */
38521 if (MACHOPIC_ATT_STUB)
38522 return;
38523
38524 fprintf (file, "%s:\n", binder_name);
38525
38526 if (MACHOPIC_PURE)
38527 {
38528 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
38529 fprintf (file, "\tpushl\t%%ecx\n");
38530 }
38531 else
38532 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
38533
38534 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
38535
38536 /* N.B. Keep the correspondence of these
38537 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
38538 old-pic/new-pic/non-pic stubs; altering this will break
38539 compatibility with existing dylibs. */
38540 if (MACHOPIC_PURE)
38541 {
38542 /* 25-byte PIC stub using "CALL get_pc_thunk". */
38543 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
38544 }
38545 else
38546 /* 16-byte -mdynamic-no-pic stub. */
38547 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
38548
38549 fprintf (file, "%s:\n", lazy_ptr_name);
38550 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
38551 fprintf (file, ASM_LONG "%s\n", binder_name);
38552 }
38553 #endif /* TARGET_MACHO */
38554
38555 /* Order the registers for register allocator. */
38556
38557 void
38558 x86_order_regs_for_local_alloc (void)
38559 {
38560 int pos = 0;
38561 int i;
38562
38563 /* First allocate the local general purpose registers. */
38564 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
38565 if (GENERAL_REGNO_P (i) && call_used_regs[i])
38566 reg_alloc_order [pos++] = i;
38567
38568 /* Global general purpose registers. */
38569 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
38570 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
38571 reg_alloc_order [pos++] = i;
38572
38573 /* x87 registers come first in case we are doing FP math
38574 using them. */
38575 if (!TARGET_SSE_MATH)
38576 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
38577 reg_alloc_order [pos++] = i;
38578
38579 /* SSE registers. */
38580 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
38581 reg_alloc_order [pos++] = i;
38582 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
38583 reg_alloc_order [pos++] = i;
38584
38585 /* Extended REX SSE registers. */
38586 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
38587 reg_alloc_order [pos++] = i;
38588
38589 /* Mask register. */
38590 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
38591 reg_alloc_order [pos++] = i;
38592
38593 /* x87 registers. */
38594 if (TARGET_SSE_MATH)
38595 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
38596 reg_alloc_order [pos++] = i;
38597
38598 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
38599 reg_alloc_order [pos++] = i;
38600
38601 /* Initialize the rest of array as we do not allocate some registers
38602 at all. */
38603 while (pos < FIRST_PSEUDO_REGISTER)
38604 reg_alloc_order [pos++] = 0;
38605 }
38606
38607 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
38608 in struct attribute_spec handler. */
38609 static tree
38610 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
38611 tree args,
38612 int,
38613 bool *no_add_attrs)
38614 {
38615 if (TREE_CODE (*node) != FUNCTION_TYPE
38616 && TREE_CODE (*node) != METHOD_TYPE
38617 && TREE_CODE (*node) != FIELD_DECL
38618 && TREE_CODE (*node) != TYPE_DECL)
38619 {
38620 warning (OPT_Wattributes, "%qE attribute only applies to functions",
38621 name);
38622 *no_add_attrs = true;
38623 return NULL_TREE;
38624 }
38625 if (TARGET_64BIT)
38626 {
38627 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
38628 name);
38629 *no_add_attrs = true;
38630 return NULL_TREE;
38631 }
38632 if (is_attribute_p ("callee_pop_aggregate_return", name))
38633 {
38634 tree cst;
38635
38636 cst = TREE_VALUE (args);
38637 if (TREE_CODE (cst) != INTEGER_CST)
38638 {
38639 warning (OPT_Wattributes,
38640 "%qE attribute requires an integer constant argument",
38641 name);
38642 *no_add_attrs = true;
38643 }
38644 else if (compare_tree_int (cst, 0) != 0
38645 && compare_tree_int (cst, 1) != 0)
38646 {
38647 warning (OPT_Wattributes,
38648 "argument to %qE attribute is neither zero, nor one",
38649 name);
38650 *no_add_attrs = true;
38651 }
38652
38653 return NULL_TREE;
38654 }
38655
38656 return NULL_TREE;
38657 }
38658
38659 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
38660 struct attribute_spec.handler. */
38661 static tree
38662 ix86_handle_abi_attribute (tree *node, tree name, tree, int,
38663 bool *no_add_attrs)
38664 {
38665 if (TREE_CODE (*node) != FUNCTION_TYPE
38666 && TREE_CODE (*node) != METHOD_TYPE
38667 && TREE_CODE (*node) != FIELD_DECL
38668 && TREE_CODE (*node) != TYPE_DECL)
38669 {
38670 warning (OPT_Wattributes, "%qE attribute only applies to functions",
38671 name);
38672 *no_add_attrs = true;
38673 return NULL_TREE;
38674 }
38675
38676 /* Can combine regparm with all attributes but fastcall. */
38677 if (is_attribute_p ("ms_abi", name))
38678 {
38679 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
38680 {
38681 error ("ms_abi and sysv_abi attributes are not compatible");
38682 }
38683
38684 return NULL_TREE;
38685 }
38686 else if (is_attribute_p ("sysv_abi", name))
38687 {
38688 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
38689 {
38690 error ("ms_abi and sysv_abi attributes are not compatible");
38691 }
38692
38693 return NULL_TREE;
38694 }
38695
38696 return NULL_TREE;
38697 }
38698
38699 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
38700 struct attribute_spec.handler. */
38701 static tree
38702 ix86_handle_struct_attribute (tree *node, tree name, tree, int,
38703 bool *no_add_attrs)
38704 {
38705 tree *type = NULL;
38706 if (DECL_P (*node))
38707 {
38708 if (TREE_CODE (*node) == TYPE_DECL)
38709 type = &TREE_TYPE (*node);
38710 }
38711 else
38712 type = node;
38713
38714 if (!(type && RECORD_OR_UNION_TYPE_P (*type)))
38715 {
38716 warning (OPT_Wattributes, "%qE attribute ignored",
38717 name);
38718 *no_add_attrs = true;
38719 }
38720
38721 else if ((is_attribute_p ("ms_struct", name)
38722 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
38723 || ((is_attribute_p ("gcc_struct", name)
38724 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
38725 {
38726 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
38727 name);
38728 *no_add_attrs = true;
38729 }
38730
38731 return NULL_TREE;
38732 }
38733
38734 static tree
38735 ix86_handle_fndecl_attribute (tree *node, tree name, tree, int,
38736 bool *no_add_attrs)
38737 {
38738 if (TREE_CODE (*node) != FUNCTION_DECL)
38739 {
38740 warning (OPT_Wattributes, "%qE attribute only applies to functions",
38741 name);
38742 *no_add_attrs = true;
38743 }
38744 return NULL_TREE;
38745 }
38746
38747 static bool
38748 ix86_ms_bitfield_layout_p (const_tree record_type)
38749 {
38750 return ((TARGET_MS_BITFIELD_LAYOUT
38751 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
38752 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
38753 }
38754
38755 /* Returns an expression indicating where the this parameter is
38756 located on entry to the FUNCTION. */
38757
38758 static rtx
38759 x86_this_parameter (tree function)
38760 {
38761 tree type = TREE_TYPE (function);
38762 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
38763 int nregs;
38764
38765 if (TARGET_64BIT)
38766 {
38767 const int *parm_regs;
38768
38769 if (ix86_function_type_abi (type) == MS_ABI)
38770 parm_regs = x86_64_ms_abi_int_parameter_registers;
38771 else
38772 parm_regs = x86_64_int_parameter_registers;
38773 return gen_rtx_REG (Pmode, parm_regs[aggr]);
38774 }
38775
38776 nregs = ix86_function_regparm (type, function);
38777
38778 if (nregs > 0 && !stdarg_p (type))
38779 {
38780 int regno;
38781 unsigned int ccvt = ix86_get_callcvt (type);
38782
38783 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
38784 regno = aggr ? DX_REG : CX_REG;
38785 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
38786 {
38787 regno = CX_REG;
38788 if (aggr)
38789 return gen_rtx_MEM (SImode,
38790 plus_constant (Pmode, stack_pointer_rtx, 4));
38791 }
38792 else
38793 {
38794 regno = AX_REG;
38795 if (aggr)
38796 {
38797 regno = DX_REG;
38798 if (nregs == 1)
38799 return gen_rtx_MEM (SImode,
38800 plus_constant (Pmode,
38801 stack_pointer_rtx, 4));
38802 }
38803 }
38804 return gen_rtx_REG (SImode, regno);
38805 }
38806
38807 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
38808 aggr ? 8 : 4));
38809 }
38810
38811 /* Determine whether x86_output_mi_thunk can succeed. */
38812
38813 static bool
38814 x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
38815 const_tree function)
38816 {
38817 /* 64-bit can handle anything. */
38818 if (TARGET_64BIT)
38819 return true;
38820
38821 /* For 32-bit, everything's fine if we have one free register. */
38822 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
38823 return true;
38824
38825 /* Need a free register for vcall_offset. */
38826 if (vcall_offset)
38827 return false;
38828
38829 /* Need a free register for GOT references. */
38830 if (flag_pic && !targetm.binds_local_p (function))
38831 return false;
38832
38833 /* Otherwise ok. */
38834 return true;
38835 }
38836
38837 /* Output the assembler code for a thunk function. THUNK_DECL is the
38838 declaration for the thunk function itself, FUNCTION is the decl for
38839 the target function. DELTA is an immediate constant offset to be
38840 added to THIS. If VCALL_OFFSET is nonzero, the word at
38841 *(*this + vcall_offset) should be added to THIS. */
38842
38843 static void
38844 x86_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
38845 HOST_WIDE_INT vcall_offset, tree function)
38846 {
38847 rtx this_param = x86_this_parameter (function);
38848 rtx this_reg, tmp, fnaddr;
38849 unsigned int tmp_regno;
38850 rtx_insn *insn;
38851
38852 if (TARGET_64BIT)
38853 tmp_regno = R10_REG;
38854 else
38855 {
38856 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
38857 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
38858 tmp_regno = AX_REG;
38859 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
38860 tmp_regno = DX_REG;
38861 else
38862 tmp_regno = CX_REG;
38863 }
38864
38865 emit_note (NOTE_INSN_PROLOGUE_END);
38866
38867 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
38868 pull it in now and let DELTA benefit. */
38869 if (REG_P (this_param))
38870 this_reg = this_param;
38871 else if (vcall_offset)
38872 {
38873 /* Put the this parameter into %eax. */
38874 this_reg = gen_rtx_REG (Pmode, AX_REG);
38875 emit_move_insn (this_reg, this_param);
38876 }
38877 else
38878 this_reg = NULL_RTX;
38879
38880 /* Adjust the this parameter by a fixed constant. */
38881 if (delta)
38882 {
38883 rtx delta_rtx = GEN_INT (delta);
38884 rtx delta_dst = this_reg ? this_reg : this_param;
38885
38886 if (TARGET_64BIT)
38887 {
38888 if (!x86_64_general_operand (delta_rtx, Pmode))
38889 {
38890 tmp = gen_rtx_REG (Pmode, tmp_regno);
38891 emit_move_insn (tmp, delta_rtx);
38892 delta_rtx = tmp;
38893 }
38894 }
38895
38896 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
38897 }
38898
38899 /* Adjust the this parameter by a value stored in the vtable. */
38900 if (vcall_offset)
38901 {
38902 rtx vcall_addr, vcall_mem, this_mem;
38903
38904 tmp = gen_rtx_REG (Pmode, tmp_regno);
38905
38906 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
38907 if (Pmode != ptr_mode)
38908 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
38909 emit_move_insn (tmp, this_mem);
38910
38911 /* Adjust the this parameter. */
38912 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
38913 if (TARGET_64BIT
38914 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
38915 {
38916 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
38917 emit_move_insn (tmp2, GEN_INT (vcall_offset));
38918 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
38919 }
38920
38921 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
38922 if (Pmode != ptr_mode)
38923 emit_insn (gen_addsi_1_zext (this_reg,
38924 gen_rtx_REG (ptr_mode,
38925 REGNO (this_reg)),
38926 vcall_mem));
38927 else
38928 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
38929 }
38930
38931 /* If necessary, drop THIS back to its stack slot. */
38932 if (this_reg && this_reg != this_param)
38933 emit_move_insn (this_param, this_reg);
38934
38935 fnaddr = XEXP (DECL_RTL (function), 0);
38936 if (TARGET_64BIT)
38937 {
38938 if (!flag_pic || targetm.binds_local_p (function)
38939 || TARGET_PECOFF)
38940 ;
38941 else
38942 {
38943 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
38944 tmp = gen_rtx_CONST (Pmode, tmp);
38945 fnaddr = gen_const_mem (Pmode, tmp);
38946 }
38947 }
38948 else
38949 {
38950 if (!flag_pic || targetm.binds_local_p (function))
38951 ;
38952 #if TARGET_MACHO
38953 else if (TARGET_MACHO)
38954 {
38955 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
38956 fnaddr = XEXP (fnaddr, 0);
38957 }
38958 #endif /* TARGET_MACHO */
38959 else
38960 {
38961 tmp = gen_rtx_REG (Pmode, CX_REG);
38962 output_set_got (tmp, NULL_RTX);
38963
38964 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
38965 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
38966 fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
38967 fnaddr = gen_const_mem (Pmode, fnaddr);
38968 }
38969 }
38970
38971 /* Our sibling call patterns do not allow memories, because we have no
38972 predicate that can distinguish between frame and non-frame memory.
38973 For our purposes here, we can get away with (ab)using a jump pattern,
38974 because we're going to do no optimization. */
38975 if (MEM_P (fnaddr))
38976 {
38977 if (sibcall_insn_operand (fnaddr, word_mode))
38978 {
38979 tmp = gen_rtx_CALL (VOIDmode, fnaddr, const0_rtx);
38980 tmp = emit_call_insn (tmp);
38981 SIBLING_CALL_P (tmp) = 1;
38982 }
38983 else
38984 emit_jump_insn (gen_indirect_jump (fnaddr));
38985 }
38986 else
38987 {
38988 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
38989 fnaddr = legitimize_pic_address (fnaddr,
38990 gen_rtx_REG (Pmode, tmp_regno));
38991
38992 if (!sibcall_insn_operand (fnaddr, word_mode))
38993 {
38994 tmp = gen_rtx_REG (word_mode, tmp_regno);
38995 if (GET_MODE (fnaddr) != word_mode)
38996 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
38997 emit_move_insn (tmp, fnaddr);
38998 fnaddr = tmp;
38999 }
39000
39001 tmp = gen_rtx_MEM (QImode, fnaddr);
39002 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
39003 tmp = emit_call_insn (tmp);
39004 SIBLING_CALL_P (tmp) = 1;
39005 }
39006 emit_barrier ();
39007
39008 /* Emit just enough of rest_of_compilation to get the insns emitted.
39009 Note that use_thunk calls assemble_start_function et al. */
39010 insn = get_insns ();
39011 shorten_branches (insn);
39012 final_start_function (insn, file, 1);
39013 final (insn, file, 1);
39014 final_end_function ();
39015 }
39016
39017 static void
39018 x86_file_start (void)
39019 {
39020 default_file_start ();
39021 if (TARGET_16BIT)
39022 fputs ("\t.code16gcc\n", asm_out_file);
39023 #if TARGET_MACHO
39024 darwin_file_start ();
39025 #endif
39026 if (X86_FILE_START_VERSION_DIRECTIVE)
39027 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
39028 if (X86_FILE_START_FLTUSED)
39029 fputs ("\t.global\t__fltused\n", asm_out_file);
39030 if (ix86_asm_dialect == ASM_INTEL)
39031 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
39032 }
39033
39034 int
39035 x86_field_alignment (tree field, int computed)
39036 {
39037 enum machine_mode mode;
39038 tree type = TREE_TYPE (field);
39039
39040 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
39041 return computed;
39042 mode = TYPE_MODE (strip_array_types (type));
39043 if (mode == DFmode || mode == DCmode
39044 || GET_MODE_CLASS (mode) == MODE_INT
39045 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
39046 return MIN (32, computed);
39047 return computed;
39048 }
39049
39050 /* Print call to TARGET to FILE. */
39051
39052 static void
39053 x86_print_call_or_nop (FILE *file, const char *target)
39054 {
39055 if (flag_nop_mcount)
39056 fprintf (file, "1:\tnopl 0x00(%%eax,%%eax,1)\n"); /* 5 byte nop. */
39057 else
39058 fprintf (file, "1:\tcall\t%s\n", target);
39059 }
39060
39061 /* Output assembler code to FILE to increment profiler label # LABELNO
39062 for profiling a function entry. */
39063 void
39064 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
39065 {
39066 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
39067 : MCOUNT_NAME);
39068 if (TARGET_64BIT)
39069 {
39070 #ifndef NO_PROFILE_COUNTERS
39071 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
39072 #endif
39073
39074 if (!TARGET_PECOFF && flag_pic)
39075 fprintf (file, "1:\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
39076 else
39077 x86_print_call_or_nop (file, mcount_name);
39078 }
39079 else if (flag_pic)
39080 {
39081 #ifndef NO_PROFILE_COUNTERS
39082 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
39083 LPREFIX, labelno);
39084 #endif
39085 fprintf (file, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
39086 }
39087 else
39088 {
39089 #ifndef NO_PROFILE_COUNTERS
39090 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
39091 LPREFIX, labelno);
39092 #endif
39093 x86_print_call_or_nop (file, mcount_name);
39094 }
39095
39096 if (flag_record_mcount)
39097 {
39098 fprintf (file, "\t.section __mcount_loc, \"a\",@progbits\n");
39099 fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
39100 fprintf (file, "\t.previous\n");
39101 }
39102 }
39103
39104 /* We don't have exact information about the insn sizes, but we may assume
39105 quite safely that we are informed about all 1 byte insns and memory
39106 address sizes. This is enough to eliminate unnecessary padding in
39107 99% of cases. */
39108
39109 static int
39110 min_insn_size (rtx_insn *insn)
39111 {
39112 int l = 0, len;
39113
39114 if (!INSN_P (insn) || !active_insn_p (insn))
39115 return 0;
39116
39117 /* Discard alignments we've emit and jump instructions. */
39118 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
39119 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
39120 return 0;
39121
39122 /* Important case - calls are always 5 bytes.
39123 It is common to have many calls in the row. */
39124 if (CALL_P (insn)
39125 && symbolic_reference_mentioned_p (PATTERN (insn))
39126 && !SIBLING_CALL_P (insn))
39127 return 5;
39128 len = get_attr_length (insn);
39129 if (len <= 1)
39130 return 1;
39131
39132 /* For normal instructions we rely on get_attr_length being exact,
39133 with a few exceptions. */
39134 if (!JUMP_P (insn))
39135 {
39136 enum attr_type type = get_attr_type (insn);
39137
39138 switch (type)
39139 {
39140 case TYPE_MULTI:
39141 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
39142 || asm_noperands (PATTERN (insn)) >= 0)
39143 return 0;
39144 break;
39145 case TYPE_OTHER:
39146 case TYPE_FCMP:
39147 break;
39148 default:
39149 /* Otherwise trust get_attr_length. */
39150 return len;
39151 }
39152
39153 l = get_attr_length_address (insn);
39154 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
39155 l = 4;
39156 }
39157 if (l)
39158 return 1+l;
39159 else
39160 return 2;
39161 }
39162
39163 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
39164
39165 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
39166 window. */
39167
39168 static void
39169 ix86_avoid_jump_mispredicts (void)
39170 {
39171 rtx_insn *insn, *start = get_insns ();
39172 int nbytes = 0, njumps = 0;
39173 int isjump = 0;
39174
39175 /* Look for all minimal intervals of instructions containing 4 jumps.
39176 The intervals are bounded by START and INSN. NBYTES is the total
39177 size of instructions in the interval including INSN and not including
39178 START. When the NBYTES is smaller than 16 bytes, it is possible
39179 that the end of START and INSN ends up in the same 16byte page.
39180
39181 The smallest offset in the page INSN can start is the case where START
39182 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
39183 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
39184
39185 Don't consider asm goto as jump, while it can contain a jump, it doesn't
39186 have to, control transfer to label(s) can be performed through other
39187 means, and also we estimate minimum length of all asm stmts as 0. */
39188 for (insn = start; insn; insn = NEXT_INSN (insn))
39189 {
39190 int min_size;
39191
39192 if (LABEL_P (insn))
39193 {
39194 int align = label_to_alignment (insn);
39195 int max_skip = label_to_max_skip (insn);
39196
39197 if (max_skip > 15)
39198 max_skip = 15;
39199 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
39200 already in the current 16 byte page, because otherwise
39201 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
39202 bytes to reach 16 byte boundary. */
39203 if (align <= 0
39204 || (align <= 3 && max_skip != (1 << align) - 1))
39205 max_skip = 0;
39206 if (dump_file)
39207 fprintf (dump_file, "Label %i with max_skip %i\n",
39208 INSN_UID (insn), max_skip);
39209 if (max_skip)
39210 {
39211 while (nbytes + max_skip >= 16)
39212 {
39213 start = NEXT_INSN (start);
39214 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
39215 || CALL_P (start))
39216 njumps--, isjump = 1;
39217 else
39218 isjump = 0;
39219 nbytes -= min_insn_size (start);
39220 }
39221 }
39222 continue;
39223 }
39224
39225 min_size = min_insn_size (insn);
39226 nbytes += min_size;
39227 if (dump_file)
39228 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
39229 INSN_UID (insn), min_size);
39230 if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
39231 || CALL_P (insn))
39232 njumps++;
39233 else
39234 continue;
39235
39236 while (njumps > 3)
39237 {
39238 start = NEXT_INSN (start);
39239 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
39240 || CALL_P (start))
39241 njumps--, isjump = 1;
39242 else
39243 isjump = 0;
39244 nbytes -= min_insn_size (start);
39245 }
39246 gcc_assert (njumps >= 0);
39247 if (dump_file)
39248 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
39249 INSN_UID (start), INSN_UID (insn), nbytes);
39250
39251 if (njumps == 3 && isjump && nbytes < 16)
39252 {
39253 int padsize = 15 - nbytes + min_insn_size (insn);
39254
39255 if (dump_file)
39256 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
39257 INSN_UID (insn), padsize);
39258 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
39259 }
39260 }
39261 }
39262 #endif
39263
39264 /* AMD Athlon works faster
39265 when RET is not destination of conditional jump or directly preceded
39266 by other jump instruction. We avoid the penalty by inserting NOP just
39267 before the RET instructions in such cases. */
39268 static void
39269 ix86_pad_returns (void)
39270 {
39271 edge e;
39272 edge_iterator ei;
39273
39274 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
39275 {
39276 basic_block bb = e->src;
39277 rtx_insn *ret = BB_END (bb);
39278 rtx_insn *prev;
39279 bool replace = false;
39280
39281 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
39282 || optimize_bb_for_size_p (bb))
39283 continue;
39284 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
39285 if (active_insn_p (prev) || LABEL_P (prev))
39286 break;
39287 if (prev && LABEL_P (prev))
39288 {
39289 edge e;
39290 edge_iterator ei;
39291
39292 FOR_EACH_EDGE (e, ei, bb->preds)
39293 if (EDGE_FREQUENCY (e) && e->src->index >= 0
39294 && !(e->flags & EDGE_FALLTHRU))
39295 {
39296 replace = true;
39297 break;
39298 }
39299 }
39300 if (!replace)
39301 {
39302 prev = prev_active_insn (ret);
39303 if (prev
39304 && ((JUMP_P (prev) && any_condjump_p (prev))
39305 || CALL_P (prev)))
39306 replace = true;
39307 /* Empty functions get branch mispredict even when
39308 the jump destination is not visible to us. */
39309 if (!prev && !optimize_function_for_size_p (cfun))
39310 replace = true;
39311 }
39312 if (replace)
39313 {
39314 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
39315 delete_insn (ret);
39316 }
39317 }
39318 }
39319
39320 /* Count the minimum number of instructions in BB. Return 4 if the
39321 number of instructions >= 4. */
39322
39323 static int
39324 ix86_count_insn_bb (basic_block bb)
39325 {
39326 rtx_insn *insn;
39327 int insn_count = 0;
39328
39329 /* Count number of instructions in this block. Return 4 if the number
39330 of instructions >= 4. */
39331 FOR_BB_INSNS (bb, insn)
39332 {
39333 /* Only happen in exit blocks. */
39334 if (JUMP_P (insn)
39335 && ANY_RETURN_P (PATTERN (insn)))
39336 break;
39337
39338 if (NONDEBUG_INSN_P (insn)
39339 && GET_CODE (PATTERN (insn)) != USE
39340 && GET_CODE (PATTERN (insn)) != CLOBBER)
39341 {
39342 insn_count++;
39343 if (insn_count >= 4)
39344 return insn_count;
39345 }
39346 }
39347
39348 return insn_count;
39349 }
39350
39351
39352 /* Count the minimum number of instructions in code path in BB.
39353 Return 4 if the number of instructions >= 4. */
39354
39355 static int
39356 ix86_count_insn (basic_block bb)
39357 {
39358 edge e;
39359 edge_iterator ei;
39360 int min_prev_count;
39361
39362 /* Only bother counting instructions along paths with no
39363 more than 2 basic blocks between entry and exit. Given
39364 that BB has an edge to exit, determine if a predecessor
39365 of BB has an edge from entry. If so, compute the number
39366 of instructions in the predecessor block. If there
39367 happen to be multiple such blocks, compute the minimum. */
39368 min_prev_count = 4;
39369 FOR_EACH_EDGE (e, ei, bb->preds)
39370 {
39371 edge prev_e;
39372 edge_iterator prev_ei;
39373
39374 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
39375 {
39376 min_prev_count = 0;
39377 break;
39378 }
39379 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
39380 {
39381 if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
39382 {
39383 int count = ix86_count_insn_bb (e->src);
39384 if (count < min_prev_count)
39385 min_prev_count = count;
39386 break;
39387 }
39388 }
39389 }
39390
39391 if (min_prev_count < 4)
39392 min_prev_count += ix86_count_insn_bb (bb);
39393
39394 return min_prev_count;
39395 }
39396
39397 /* Pad short function to 4 instructions. */
39398
39399 static void
39400 ix86_pad_short_function (void)
39401 {
39402 edge e;
39403 edge_iterator ei;
39404
39405 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
39406 {
39407 rtx_insn *ret = BB_END (e->src);
39408 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
39409 {
39410 int insn_count = ix86_count_insn (e->src);
39411
39412 /* Pad short function. */
39413 if (insn_count < 4)
39414 {
39415 rtx_insn *insn = ret;
39416
39417 /* Find epilogue. */
39418 while (insn
39419 && (!NOTE_P (insn)
39420 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
39421 insn = PREV_INSN (insn);
39422
39423 if (!insn)
39424 insn = ret;
39425
39426 /* Two NOPs count as one instruction. */
39427 insn_count = 2 * (4 - insn_count);
39428 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
39429 }
39430 }
39431 }
39432 }
39433
39434 /* Fix up a Windows system unwinder issue. If an EH region falls through into
39435 the epilogue, the Windows system unwinder will apply epilogue logic and
39436 produce incorrect offsets. This can be avoided by adding a nop between
39437 the last insn that can throw and the first insn of the epilogue. */
39438
39439 static void
39440 ix86_seh_fixup_eh_fallthru (void)
39441 {
39442 edge e;
39443 edge_iterator ei;
39444
39445 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
39446 {
39447 rtx_insn *insn, *next;
39448
39449 /* Find the beginning of the epilogue. */
39450 for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
39451 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
39452 break;
39453 if (insn == NULL)
39454 continue;
39455
39456 /* We only care about preceding insns that can throw. */
39457 insn = prev_active_insn (insn);
39458 if (insn == NULL || !can_throw_internal (insn))
39459 continue;
39460
39461 /* Do not separate calls from their debug information. */
39462 for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
39463 if (NOTE_P (next)
39464 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
39465 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION))
39466 insn = next;
39467 else
39468 break;
39469
39470 emit_insn_after (gen_nops (const1_rtx), insn);
39471 }
39472 }
39473
39474 /* Implement machine specific optimizations. We implement padding of returns
39475 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
39476 static void
39477 ix86_reorg (void)
39478 {
39479 /* We are freeing block_for_insn in the toplev to keep compatibility
39480 with old MDEP_REORGS that are not CFG based. Recompute it now. */
39481 compute_bb_for_insn ();
39482
39483 if (TARGET_SEH && current_function_has_exception_handlers ())
39484 ix86_seh_fixup_eh_fallthru ();
39485
39486 if (optimize && optimize_function_for_speed_p (cfun))
39487 {
39488 if (TARGET_PAD_SHORT_FUNCTION)
39489 ix86_pad_short_function ();
39490 else if (TARGET_PAD_RETURNS)
39491 ix86_pad_returns ();
39492 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
39493 if (TARGET_FOUR_JUMP_LIMIT)
39494 ix86_avoid_jump_mispredicts ();
39495 #endif
39496 }
39497 }
39498
39499 /* Return nonzero when QImode register that must be represented via REX prefix
39500 is used. */
39501 bool
39502 x86_extended_QIreg_mentioned_p (rtx_insn *insn)
39503 {
39504 int i;
39505 extract_insn_cached (insn);
39506 for (i = 0; i < recog_data.n_operands; i++)
39507 if (GENERAL_REG_P (recog_data.operand[i])
39508 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
39509 return true;
39510 return false;
39511 }
39512
39513 /* Return nonzero when P points to register encoded via REX prefix.
39514 Called via for_each_rtx. */
39515 static int
39516 extended_reg_mentioned_1 (rtx *p, void *)
39517 {
39518 unsigned int regno;
39519 if (!REG_P (*p))
39520 return 0;
39521 regno = REGNO (*p);
39522 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
39523 }
39524
39525 /* Return true when INSN mentions register that must be encoded using REX
39526 prefix. */
39527 bool
39528 x86_extended_reg_mentioned_p (rtx insn)
39529 {
39530 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
39531 extended_reg_mentioned_1, NULL);
39532 }
39533
39534 /* If profitable, negate (without causing overflow) integer constant
39535 of mode MODE at location LOC. Return true in this case. */
39536 bool
39537 x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
39538 {
39539 HOST_WIDE_INT val;
39540
39541 if (!CONST_INT_P (*loc))
39542 return false;
39543
39544 switch (mode)
39545 {
39546 case DImode:
39547 /* DImode x86_64 constants must fit in 32 bits. */
39548 gcc_assert (x86_64_immediate_operand (*loc, mode));
39549
39550 mode = SImode;
39551 break;
39552
39553 case SImode:
39554 case HImode:
39555 case QImode:
39556 break;
39557
39558 default:
39559 gcc_unreachable ();
39560 }
39561
39562 /* Avoid overflows. */
39563 if (mode_signbit_p (mode, *loc))
39564 return false;
39565
39566 val = INTVAL (*loc);
39567
39568 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
39569 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
39570 if ((val < 0 && val != -128)
39571 || val == 128)
39572 {
39573 *loc = GEN_INT (-val);
39574 return true;
39575 }
39576
39577 return false;
39578 }
39579
39580 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
39581 optabs would emit if we didn't have TFmode patterns. */
39582
39583 void
39584 x86_emit_floatuns (rtx operands[2])
39585 {
39586 rtx_code_label *neglab, *donelab;
39587 rtx i0, i1, f0, in, out;
39588 enum machine_mode mode, inmode;
39589
39590 inmode = GET_MODE (operands[1]);
39591 gcc_assert (inmode == SImode || inmode == DImode);
39592
39593 out = operands[0];
39594 in = force_reg (inmode, operands[1]);
39595 mode = GET_MODE (out);
39596 neglab = gen_label_rtx ();
39597 donelab = gen_label_rtx ();
39598 f0 = gen_reg_rtx (mode);
39599
39600 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
39601
39602 expand_float (out, in, 0);
39603
39604 emit_jump_insn (gen_jump (donelab));
39605 emit_barrier ();
39606
39607 emit_label (neglab);
39608
39609 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
39610 1, OPTAB_DIRECT);
39611 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
39612 1, OPTAB_DIRECT);
39613 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
39614
39615 expand_float (f0, i0, 0);
39616
39617 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
39618
39619 emit_label (donelab);
39620 }
39621 \f
39622 /* AVX512F does support 64-byte integer vector operations,
39623 thus the longest vector we are faced with is V64QImode. */
39624 #define MAX_VECT_LEN 64
39625
39626 struct expand_vec_perm_d
39627 {
39628 rtx target, op0, op1;
39629 unsigned char perm[MAX_VECT_LEN];
39630 enum machine_mode vmode;
39631 unsigned char nelt;
39632 bool one_operand_p;
39633 bool testing_p;
39634 };
39635
39636 static bool canonicalize_perm (struct expand_vec_perm_d *d);
39637 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
39638 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
39639
39640 /* Get a vector mode of the same size as the original but with elements
39641 twice as wide. This is only guaranteed to apply to integral vectors. */
39642
39643 static inline enum machine_mode
39644 get_mode_wider_vector (enum machine_mode o)
39645 {
39646 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
39647 enum machine_mode n = GET_MODE_WIDER_MODE (o);
39648 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
39649 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
39650 return n;
39651 }
39652
39653 /* A subroutine of ix86_expand_vector_init_duplicate. Tries to
39654 fill target with val via vec_duplicate. */
39655
39656 static bool
39657 ix86_vector_duplicate_value (enum machine_mode mode, rtx target, rtx val)
39658 {
39659 bool ok;
39660 rtx_insn *insn;
39661 rtx dup;
39662
39663 /* First attempt to recognize VAL as-is. */
39664 dup = gen_rtx_VEC_DUPLICATE (mode, val);
39665 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
39666 if (recog_memoized (insn) < 0)
39667 {
39668 rtx_insn *seq;
39669 /* If that fails, force VAL into a register. */
39670
39671 start_sequence ();
39672 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
39673 seq = get_insns ();
39674 end_sequence ();
39675 if (seq)
39676 emit_insn_before (seq, insn);
39677
39678 ok = recog_memoized (insn) >= 0;
39679 gcc_assert (ok);
39680 }
39681 return true;
39682 }
39683
39684 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
39685 with all elements equal to VAR. Return true if successful. */
39686
39687 static bool
39688 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
39689 rtx target, rtx val)
39690 {
39691 bool ok;
39692
39693 switch (mode)
39694 {
39695 case V2SImode:
39696 case V2SFmode:
39697 if (!mmx_ok)
39698 return false;
39699 /* FALLTHRU */
39700
39701 case V4DFmode:
39702 case V4DImode:
39703 case V8SFmode:
39704 case V8SImode:
39705 case V2DFmode:
39706 case V2DImode:
39707 case V4SFmode:
39708 case V4SImode:
39709 case V16SImode:
39710 case V8DImode:
39711 case V16SFmode:
39712 case V8DFmode:
39713 return ix86_vector_duplicate_value (mode, target, val);
39714
39715 case V4HImode:
39716 if (!mmx_ok)
39717 return false;
39718 if (TARGET_SSE || TARGET_3DNOW_A)
39719 {
39720 rtx x;
39721
39722 val = gen_lowpart (SImode, val);
39723 x = gen_rtx_TRUNCATE (HImode, val);
39724 x = gen_rtx_VEC_DUPLICATE (mode, x);
39725 emit_insn (gen_rtx_SET (VOIDmode, target, x));
39726 return true;
39727 }
39728 goto widen;
39729
39730 case V8QImode:
39731 if (!mmx_ok)
39732 return false;
39733 goto widen;
39734
39735 case V8HImode:
39736 if (TARGET_SSE2)
39737 {
39738 struct expand_vec_perm_d dperm;
39739 rtx tmp1, tmp2;
39740
39741 permute:
39742 memset (&dperm, 0, sizeof (dperm));
39743 dperm.target = target;
39744 dperm.vmode = mode;
39745 dperm.nelt = GET_MODE_NUNITS (mode);
39746 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
39747 dperm.one_operand_p = true;
39748
39749 /* Extend to SImode using a paradoxical SUBREG. */
39750 tmp1 = gen_reg_rtx (SImode);
39751 emit_move_insn (tmp1, gen_lowpart (SImode, val));
39752
39753 /* Insert the SImode value as low element of a V4SImode vector. */
39754 tmp2 = gen_reg_rtx (V4SImode);
39755 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
39756 emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2));
39757
39758 ok = (expand_vec_perm_1 (&dperm)
39759 || expand_vec_perm_broadcast_1 (&dperm));
39760 gcc_assert (ok);
39761 return ok;
39762 }
39763 goto widen;
39764
39765 case V16QImode:
39766 if (TARGET_SSE2)
39767 goto permute;
39768 goto widen;
39769
39770 widen:
39771 /* Replicate the value once into the next wider mode and recurse. */
39772 {
39773 enum machine_mode smode, wsmode, wvmode;
39774 rtx x;
39775
39776 smode = GET_MODE_INNER (mode);
39777 wvmode = get_mode_wider_vector (mode);
39778 wsmode = GET_MODE_INNER (wvmode);
39779
39780 val = convert_modes (wsmode, smode, val, true);
39781 x = expand_simple_binop (wsmode, ASHIFT, val,
39782 GEN_INT (GET_MODE_BITSIZE (smode)),
39783 NULL_RTX, 1, OPTAB_LIB_WIDEN);
39784 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
39785
39786 x = gen_reg_rtx (wvmode);
39787 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
39788 gcc_assert (ok);
39789 emit_move_insn (target, gen_lowpart (GET_MODE (target), x));
39790 return ok;
39791 }
39792
39793 case V16HImode:
39794 case V32QImode:
39795 {
39796 enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
39797 rtx x = gen_reg_rtx (hvmode);
39798
39799 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
39800 gcc_assert (ok);
39801
39802 x = gen_rtx_VEC_CONCAT (mode, x, x);
39803 emit_insn (gen_rtx_SET (VOIDmode, target, x));
39804 }
39805 return true;
39806
39807 default:
39808 return false;
39809 }
39810 }
39811
39812 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
39813 whose ONE_VAR element is VAR, and other elements are zero. Return true
39814 if successful. */
39815
39816 static bool
39817 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
39818 rtx target, rtx var, int one_var)
39819 {
39820 enum machine_mode vsimode;
39821 rtx new_target;
39822 rtx x, tmp;
39823 bool use_vector_set = false;
39824
39825 switch (mode)
39826 {
39827 case V2DImode:
39828 /* For SSE4.1, we normally use vector set. But if the second
39829 element is zero and inter-unit moves are OK, we use movq
39830 instead. */
39831 use_vector_set = (TARGET_64BIT && TARGET_SSE4_1
39832 && !(TARGET_INTER_UNIT_MOVES_TO_VEC
39833 && one_var == 0));
39834 break;
39835 case V16QImode:
39836 case V4SImode:
39837 case V4SFmode:
39838 use_vector_set = TARGET_SSE4_1;
39839 break;
39840 case V8HImode:
39841 use_vector_set = TARGET_SSE2;
39842 break;
39843 case V4HImode:
39844 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
39845 break;
39846 case V32QImode:
39847 case V16HImode:
39848 case V8SImode:
39849 case V8SFmode:
39850 case V4DFmode:
39851 use_vector_set = TARGET_AVX;
39852 break;
39853 case V4DImode:
39854 /* Use ix86_expand_vector_set in 64bit mode only. */
39855 use_vector_set = TARGET_AVX && TARGET_64BIT;
39856 break;
39857 default:
39858 break;
39859 }
39860
39861 if (use_vector_set)
39862 {
39863 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
39864 var = force_reg (GET_MODE_INNER (mode), var);
39865 ix86_expand_vector_set (mmx_ok, target, var, one_var);
39866 return true;
39867 }
39868
39869 switch (mode)
39870 {
39871 case V2SFmode:
39872 case V2SImode:
39873 if (!mmx_ok)
39874 return false;
39875 /* FALLTHRU */
39876
39877 case V2DFmode:
39878 case V2DImode:
39879 if (one_var != 0)
39880 return false;
39881 var = force_reg (GET_MODE_INNER (mode), var);
39882 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
39883 emit_insn (gen_rtx_SET (VOIDmode, target, x));
39884 return true;
39885
39886 case V4SFmode:
39887 case V4SImode:
39888 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
39889 new_target = gen_reg_rtx (mode);
39890 else
39891 new_target = target;
39892 var = force_reg (GET_MODE_INNER (mode), var);
39893 x = gen_rtx_VEC_DUPLICATE (mode, var);
39894 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
39895 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
39896 if (one_var != 0)
39897 {
39898 /* We need to shuffle the value to the correct position, so
39899 create a new pseudo to store the intermediate result. */
39900
39901 /* With SSE2, we can use the integer shuffle insns. */
39902 if (mode != V4SFmode && TARGET_SSE2)
39903 {
39904 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
39905 const1_rtx,
39906 GEN_INT (one_var == 1 ? 0 : 1),
39907 GEN_INT (one_var == 2 ? 0 : 1),
39908 GEN_INT (one_var == 3 ? 0 : 1)));
39909 if (target != new_target)
39910 emit_move_insn (target, new_target);
39911 return true;
39912 }
39913
39914 /* Otherwise convert the intermediate result to V4SFmode and
39915 use the SSE1 shuffle instructions. */
39916 if (mode != V4SFmode)
39917 {
39918 tmp = gen_reg_rtx (V4SFmode);
39919 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
39920 }
39921 else
39922 tmp = new_target;
39923
39924 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
39925 const1_rtx,
39926 GEN_INT (one_var == 1 ? 0 : 1),
39927 GEN_INT (one_var == 2 ? 0+4 : 1+4),
39928 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
39929
39930 if (mode != V4SFmode)
39931 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
39932 else if (tmp != target)
39933 emit_move_insn (target, tmp);
39934 }
39935 else if (target != new_target)
39936 emit_move_insn (target, new_target);
39937 return true;
39938
39939 case V8HImode:
39940 case V16QImode:
39941 vsimode = V4SImode;
39942 goto widen;
39943 case V4HImode:
39944 case V8QImode:
39945 if (!mmx_ok)
39946 return false;
39947 vsimode = V2SImode;
39948 goto widen;
39949 widen:
39950 if (one_var != 0)
39951 return false;
39952
39953 /* Zero extend the variable element to SImode and recurse. */
39954 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
39955
39956 x = gen_reg_rtx (vsimode);
39957 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
39958 var, one_var))
39959 gcc_unreachable ();
39960
39961 emit_move_insn (target, gen_lowpart (mode, x));
39962 return true;
39963
39964 default:
39965 return false;
39966 }
39967 }
39968
39969 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
39970 consisting of the values in VALS. It is known that all elements
39971 except ONE_VAR are constants. Return true if successful. */
39972
39973 static bool
39974 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
39975 rtx target, rtx vals, int one_var)
39976 {
39977 rtx var = XVECEXP (vals, 0, one_var);
39978 enum machine_mode wmode;
39979 rtx const_vec, x;
39980
39981 const_vec = copy_rtx (vals);
39982 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
39983 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
39984
39985 switch (mode)
39986 {
39987 case V2DFmode:
39988 case V2DImode:
39989 case V2SFmode:
39990 case V2SImode:
39991 /* For the two element vectors, it's just as easy to use
39992 the general case. */
39993 return false;
39994
39995 case V4DImode:
39996 /* Use ix86_expand_vector_set in 64bit mode only. */
39997 if (!TARGET_64BIT)
39998 return false;
39999 case V4DFmode:
40000 case V8SFmode:
40001 case V8SImode:
40002 case V16HImode:
40003 case V32QImode:
40004 case V4SFmode:
40005 case V4SImode:
40006 case V8HImode:
40007 case V4HImode:
40008 break;
40009
40010 case V16QImode:
40011 if (TARGET_SSE4_1)
40012 break;
40013 wmode = V8HImode;
40014 goto widen;
40015 case V8QImode:
40016 wmode = V4HImode;
40017 goto widen;
40018 widen:
40019 /* There's no way to set one QImode entry easily. Combine
40020 the variable value with its adjacent constant value, and
40021 promote to an HImode set. */
40022 x = XVECEXP (vals, 0, one_var ^ 1);
40023 if (one_var & 1)
40024 {
40025 var = convert_modes (HImode, QImode, var, true);
40026 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
40027 NULL_RTX, 1, OPTAB_LIB_WIDEN);
40028 x = GEN_INT (INTVAL (x) & 0xff);
40029 }
40030 else
40031 {
40032 var = convert_modes (HImode, QImode, var, true);
40033 x = gen_int_mode (INTVAL (x) << 8, HImode);
40034 }
40035 if (x != const0_rtx)
40036 var = expand_simple_binop (HImode, IOR, var, x, var,
40037 1, OPTAB_LIB_WIDEN);
40038
40039 x = gen_reg_rtx (wmode);
40040 emit_move_insn (x, gen_lowpart (wmode, const_vec));
40041 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
40042
40043 emit_move_insn (target, gen_lowpart (mode, x));
40044 return true;
40045
40046 default:
40047 return false;
40048 }
40049
40050 emit_move_insn (target, const_vec);
40051 ix86_expand_vector_set (mmx_ok, target, var, one_var);
40052 return true;
40053 }
40054
40055 /* A subroutine of ix86_expand_vector_init_general. Use vector
40056 concatenate to handle the most general case: all values variable,
40057 and none identical. */
40058
40059 static void
40060 ix86_expand_vector_init_concat (enum machine_mode mode,
40061 rtx target, rtx *ops, int n)
40062 {
40063 enum machine_mode cmode, hmode = VOIDmode, gmode = VOIDmode;
40064 rtx first[16], second[8], third[4];
40065 rtvec v;
40066 int i, j;
40067
40068 switch (n)
40069 {
40070 case 2:
40071 switch (mode)
40072 {
40073 case V16SImode:
40074 cmode = V8SImode;
40075 break;
40076 case V16SFmode:
40077 cmode = V8SFmode;
40078 break;
40079 case V8DImode:
40080 cmode = V4DImode;
40081 break;
40082 case V8DFmode:
40083 cmode = V4DFmode;
40084 break;
40085 case V8SImode:
40086 cmode = V4SImode;
40087 break;
40088 case V8SFmode:
40089 cmode = V4SFmode;
40090 break;
40091 case V4DImode:
40092 cmode = V2DImode;
40093 break;
40094 case V4DFmode:
40095 cmode = V2DFmode;
40096 break;
40097 case V4SImode:
40098 cmode = V2SImode;
40099 break;
40100 case V4SFmode:
40101 cmode = V2SFmode;
40102 break;
40103 case V2DImode:
40104 cmode = DImode;
40105 break;
40106 case V2SImode:
40107 cmode = SImode;
40108 break;
40109 case V2DFmode:
40110 cmode = DFmode;
40111 break;
40112 case V2SFmode:
40113 cmode = SFmode;
40114 break;
40115 default:
40116 gcc_unreachable ();
40117 }
40118
40119 if (!register_operand (ops[1], cmode))
40120 ops[1] = force_reg (cmode, ops[1]);
40121 if (!register_operand (ops[0], cmode))
40122 ops[0] = force_reg (cmode, ops[0]);
40123 emit_insn (gen_rtx_SET (VOIDmode, target,
40124 gen_rtx_VEC_CONCAT (mode, ops[0],
40125 ops[1])));
40126 break;
40127
40128 case 4:
40129 switch (mode)
40130 {
40131 case V4DImode:
40132 cmode = V2DImode;
40133 break;
40134 case V4DFmode:
40135 cmode = V2DFmode;
40136 break;
40137 case V4SImode:
40138 cmode = V2SImode;
40139 break;
40140 case V4SFmode:
40141 cmode = V2SFmode;
40142 break;
40143 default:
40144 gcc_unreachable ();
40145 }
40146 goto half;
40147
40148 case 8:
40149 switch (mode)
40150 {
40151 case V8DImode:
40152 cmode = V2DImode;
40153 hmode = V4DImode;
40154 break;
40155 case V8DFmode:
40156 cmode = V2DFmode;
40157 hmode = V4DFmode;
40158 break;
40159 case V8SImode:
40160 cmode = V2SImode;
40161 hmode = V4SImode;
40162 break;
40163 case V8SFmode:
40164 cmode = V2SFmode;
40165 hmode = V4SFmode;
40166 break;
40167 default:
40168 gcc_unreachable ();
40169 }
40170 goto half;
40171
40172 case 16:
40173 switch (mode)
40174 {
40175 case V16SImode:
40176 cmode = V2SImode;
40177 hmode = V4SImode;
40178 gmode = V8SImode;
40179 break;
40180 case V16SFmode:
40181 cmode = V2SFmode;
40182 hmode = V4SFmode;
40183 gmode = V8SFmode;
40184 break;
40185 default:
40186 gcc_unreachable ();
40187 }
40188 goto half;
40189
40190 half:
40191 /* FIXME: We process inputs backward to help RA. PR 36222. */
40192 i = n - 1;
40193 j = (n >> 1) - 1;
40194 for (; i > 0; i -= 2, j--)
40195 {
40196 first[j] = gen_reg_rtx (cmode);
40197 v = gen_rtvec (2, ops[i - 1], ops[i]);
40198 ix86_expand_vector_init (false, first[j],
40199 gen_rtx_PARALLEL (cmode, v));
40200 }
40201
40202 n >>= 1;
40203 if (n > 4)
40204 {
40205 gcc_assert (hmode != VOIDmode);
40206 gcc_assert (gmode != VOIDmode);
40207 for (i = j = 0; i < n; i += 2, j++)
40208 {
40209 second[j] = gen_reg_rtx (hmode);
40210 ix86_expand_vector_init_concat (hmode, second [j],
40211 &first [i], 2);
40212 }
40213 n >>= 1;
40214 for (i = j = 0; i < n; i += 2, j++)
40215 {
40216 third[j] = gen_reg_rtx (gmode);
40217 ix86_expand_vector_init_concat (gmode, third[j],
40218 &second[i], 2);
40219 }
40220 n >>= 1;
40221 ix86_expand_vector_init_concat (mode, target, third, n);
40222 }
40223 else if (n > 2)
40224 {
40225 gcc_assert (hmode != VOIDmode);
40226 for (i = j = 0; i < n; i += 2, j++)
40227 {
40228 second[j] = gen_reg_rtx (hmode);
40229 ix86_expand_vector_init_concat (hmode, second [j],
40230 &first [i], 2);
40231 }
40232 n >>= 1;
40233 ix86_expand_vector_init_concat (mode, target, second, n);
40234 }
40235 else
40236 ix86_expand_vector_init_concat (mode, target, first, n);
40237 break;
40238
40239 default:
40240 gcc_unreachable ();
40241 }
40242 }
40243
40244 /* A subroutine of ix86_expand_vector_init_general. Use vector
40245 interleave to handle the most general case: all values variable,
40246 and none identical. */
40247
40248 static void
40249 ix86_expand_vector_init_interleave (enum machine_mode mode,
40250 rtx target, rtx *ops, int n)
40251 {
40252 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
40253 int i, j;
40254 rtx op0, op1;
40255 rtx (*gen_load_even) (rtx, rtx, rtx);
40256 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
40257 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
40258
40259 switch (mode)
40260 {
40261 case V8HImode:
40262 gen_load_even = gen_vec_setv8hi;
40263 gen_interleave_first_low = gen_vec_interleave_lowv4si;
40264 gen_interleave_second_low = gen_vec_interleave_lowv2di;
40265 inner_mode = HImode;
40266 first_imode = V4SImode;
40267 second_imode = V2DImode;
40268 third_imode = VOIDmode;
40269 break;
40270 case V16QImode:
40271 gen_load_even = gen_vec_setv16qi;
40272 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
40273 gen_interleave_second_low = gen_vec_interleave_lowv4si;
40274 inner_mode = QImode;
40275 first_imode = V8HImode;
40276 second_imode = V4SImode;
40277 third_imode = V2DImode;
40278 break;
40279 default:
40280 gcc_unreachable ();
40281 }
40282
40283 for (i = 0; i < n; i++)
40284 {
40285 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
40286 op0 = gen_reg_rtx (SImode);
40287 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
40288
40289 /* Insert the SImode value as low element of V4SImode vector. */
40290 op1 = gen_reg_rtx (V4SImode);
40291 op0 = gen_rtx_VEC_MERGE (V4SImode,
40292 gen_rtx_VEC_DUPLICATE (V4SImode,
40293 op0),
40294 CONST0_RTX (V4SImode),
40295 const1_rtx);
40296 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
40297
40298 /* Cast the V4SImode vector back to a vector in orignal mode. */
40299 op0 = gen_reg_rtx (mode);
40300 emit_move_insn (op0, gen_lowpart (mode, op1));
40301
40302 /* Load even elements into the second position. */
40303 emit_insn (gen_load_even (op0,
40304 force_reg (inner_mode,
40305 ops [i + i + 1]),
40306 const1_rtx));
40307
40308 /* Cast vector to FIRST_IMODE vector. */
40309 ops[i] = gen_reg_rtx (first_imode);
40310 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
40311 }
40312
40313 /* Interleave low FIRST_IMODE vectors. */
40314 for (i = j = 0; i < n; i += 2, j++)
40315 {
40316 op0 = gen_reg_rtx (first_imode);
40317 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
40318
40319 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
40320 ops[j] = gen_reg_rtx (second_imode);
40321 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
40322 }
40323
40324 /* Interleave low SECOND_IMODE vectors. */
40325 switch (second_imode)
40326 {
40327 case V4SImode:
40328 for (i = j = 0; i < n / 2; i += 2, j++)
40329 {
40330 op0 = gen_reg_rtx (second_imode);
40331 emit_insn (gen_interleave_second_low (op0, ops[i],
40332 ops[i + 1]));
40333
40334 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
40335 vector. */
40336 ops[j] = gen_reg_rtx (third_imode);
40337 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
40338 }
40339 second_imode = V2DImode;
40340 gen_interleave_second_low = gen_vec_interleave_lowv2di;
40341 /* FALLTHRU */
40342
40343 case V2DImode:
40344 op0 = gen_reg_rtx (second_imode);
40345 emit_insn (gen_interleave_second_low (op0, ops[0],
40346 ops[1]));
40347
40348 /* Cast the SECOND_IMODE vector back to a vector on original
40349 mode. */
40350 emit_insn (gen_rtx_SET (VOIDmode, target,
40351 gen_lowpart (mode, op0)));
40352 break;
40353
40354 default:
40355 gcc_unreachable ();
40356 }
40357 }
40358
40359 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
40360 all values variable, and none identical. */
40361
40362 static void
40363 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
40364 rtx target, rtx vals)
40365 {
40366 rtx ops[64], op0, op1;
40367 enum machine_mode half_mode = VOIDmode;
40368 int n, i;
40369
40370 switch (mode)
40371 {
40372 case V2SFmode:
40373 case V2SImode:
40374 if (!mmx_ok && !TARGET_SSE)
40375 break;
40376 /* FALLTHRU */
40377
40378 case V16SImode:
40379 case V16SFmode:
40380 case V8DFmode:
40381 case V8DImode:
40382 case V8SFmode:
40383 case V8SImode:
40384 case V4DFmode:
40385 case V4DImode:
40386 case V4SFmode:
40387 case V4SImode:
40388 case V2DFmode:
40389 case V2DImode:
40390 n = GET_MODE_NUNITS (mode);
40391 for (i = 0; i < n; i++)
40392 ops[i] = XVECEXP (vals, 0, i);
40393 ix86_expand_vector_init_concat (mode, target, ops, n);
40394 return;
40395
40396 case V32QImode:
40397 half_mode = V16QImode;
40398 goto half;
40399
40400 case V16HImode:
40401 half_mode = V8HImode;
40402 goto half;
40403
40404 half:
40405 n = GET_MODE_NUNITS (mode);
40406 for (i = 0; i < n; i++)
40407 ops[i] = XVECEXP (vals, 0, i);
40408 op0 = gen_reg_rtx (half_mode);
40409 op1 = gen_reg_rtx (half_mode);
40410 ix86_expand_vector_init_interleave (half_mode, op0, ops,
40411 n >> 2);
40412 ix86_expand_vector_init_interleave (half_mode, op1,
40413 &ops [n >> 1], n >> 2);
40414 emit_insn (gen_rtx_SET (VOIDmode, target,
40415 gen_rtx_VEC_CONCAT (mode, op0, op1)));
40416 return;
40417
40418 case V16QImode:
40419 if (!TARGET_SSE4_1)
40420 break;
40421 /* FALLTHRU */
40422
40423 case V8HImode:
40424 if (!TARGET_SSE2)
40425 break;
40426
40427 /* Don't use ix86_expand_vector_init_interleave if we can't
40428 move from GPR to SSE register directly. */
40429 if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
40430 break;
40431
40432 n = GET_MODE_NUNITS (mode);
40433 for (i = 0; i < n; i++)
40434 ops[i] = XVECEXP (vals, 0, i);
40435 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
40436 return;
40437
40438 case V4HImode:
40439 case V8QImode:
40440 break;
40441
40442 default:
40443 gcc_unreachable ();
40444 }
40445
40446 {
40447 int i, j, n_elts, n_words, n_elt_per_word;
40448 enum machine_mode inner_mode;
40449 rtx words[4], shift;
40450
40451 inner_mode = GET_MODE_INNER (mode);
40452 n_elts = GET_MODE_NUNITS (mode);
40453 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
40454 n_elt_per_word = n_elts / n_words;
40455 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
40456
40457 for (i = 0; i < n_words; ++i)
40458 {
40459 rtx word = NULL_RTX;
40460
40461 for (j = 0; j < n_elt_per_word; ++j)
40462 {
40463 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
40464 elt = convert_modes (word_mode, inner_mode, elt, true);
40465
40466 if (j == 0)
40467 word = elt;
40468 else
40469 {
40470 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
40471 word, 1, OPTAB_LIB_WIDEN);
40472 word = expand_simple_binop (word_mode, IOR, word, elt,
40473 word, 1, OPTAB_LIB_WIDEN);
40474 }
40475 }
40476
40477 words[i] = word;
40478 }
40479
40480 if (n_words == 1)
40481 emit_move_insn (target, gen_lowpart (mode, words[0]));
40482 else if (n_words == 2)
40483 {
40484 rtx tmp = gen_reg_rtx (mode);
40485 emit_clobber (tmp);
40486 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
40487 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
40488 emit_move_insn (target, tmp);
40489 }
40490 else if (n_words == 4)
40491 {
40492 rtx tmp = gen_reg_rtx (V4SImode);
40493 gcc_assert (word_mode == SImode);
40494 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
40495 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
40496 emit_move_insn (target, gen_lowpart (mode, tmp));
40497 }
40498 else
40499 gcc_unreachable ();
40500 }
40501 }
40502
40503 /* Initialize vector TARGET via VALS. Suppress the use of MMX
40504 instructions unless MMX_OK is true. */
40505
40506 void
40507 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
40508 {
40509 enum machine_mode mode = GET_MODE (target);
40510 enum machine_mode inner_mode = GET_MODE_INNER (mode);
40511 int n_elts = GET_MODE_NUNITS (mode);
40512 int n_var = 0, one_var = -1;
40513 bool all_same = true, all_const_zero = true;
40514 int i;
40515 rtx x;
40516
40517 for (i = 0; i < n_elts; ++i)
40518 {
40519 x = XVECEXP (vals, 0, i);
40520 if (!(CONST_INT_P (x)
40521 || GET_CODE (x) == CONST_DOUBLE
40522 || GET_CODE (x) == CONST_FIXED))
40523 n_var++, one_var = i;
40524 else if (x != CONST0_RTX (inner_mode))
40525 all_const_zero = false;
40526 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
40527 all_same = false;
40528 }
40529
40530 /* Constants are best loaded from the constant pool. */
40531 if (n_var == 0)
40532 {
40533 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
40534 return;
40535 }
40536
40537 /* If all values are identical, broadcast the value. */
40538 if (all_same
40539 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
40540 XVECEXP (vals, 0, 0)))
40541 return;
40542
40543 /* Values where only one field is non-constant are best loaded from
40544 the pool and overwritten via move later. */
40545 if (n_var == 1)
40546 {
40547 if (all_const_zero
40548 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
40549 XVECEXP (vals, 0, one_var),
40550 one_var))
40551 return;
40552
40553 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
40554 return;
40555 }
40556
40557 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
40558 }
40559
40560 void
40561 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
40562 {
40563 enum machine_mode mode = GET_MODE (target);
40564 enum machine_mode inner_mode = GET_MODE_INNER (mode);
40565 enum machine_mode half_mode;
40566 bool use_vec_merge = false;
40567 rtx tmp;
40568 static rtx (*gen_extract[6][2]) (rtx, rtx)
40569 = {
40570 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
40571 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
40572 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
40573 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
40574 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
40575 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
40576 };
40577 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
40578 = {
40579 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
40580 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
40581 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
40582 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
40583 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
40584 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
40585 };
40586 int i, j, n;
40587
40588 switch (mode)
40589 {
40590 case V2SFmode:
40591 case V2SImode:
40592 if (mmx_ok)
40593 {
40594 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
40595 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
40596 if (elt == 0)
40597 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
40598 else
40599 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
40600 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40601 return;
40602 }
40603 break;
40604
40605 case V2DImode:
40606 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
40607 if (use_vec_merge)
40608 break;
40609
40610 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
40611 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
40612 if (elt == 0)
40613 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
40614 else
40615 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
40616 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40617 return;
40618
40619 case V2DFmode:
40620 {
40621 rtx op0, op1;
40622
40623 /* For the two element vectors, we implement a VEC_CONCAT with
40624 the extraction of the other element. */
40625
40626 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
40627 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
40628
40629 if (elt == 0)
40630 op0 = val, op1 = tmp;
40631 else
40632 op0 = tmp, op1 = val;
40633
40634 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
40635 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40636 }
40637 return;
40638
40639 case V4SFmode:
40640 use_vec_merge = TARGET_SSE4_1;
40641 if (use_vec_merge)
40642 break;
40643
40644 switch (elt)
40645 {
40646 case 0:
40647 use_vec_merge = true;
40648 break;
40649
40650 case 1:
40651 /* tmp = target = A B C D */
40652 tmp = copy_to_reg (target);
40653 /* target = A A B B */
40654 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
40655 /* target = X A B B */
40656 ix86_expand_vector_set (false, target, val, 0);
40657 /* target = A X C D */
40658 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
40659 const1_rtx, const0_rtx,
40660 GEN_INT (2+4), GEN_INT (3+4)));
40661 return;
40662
40663 case 2:
40664 /* tmp = target = A B C D */
40665 tmp = copy_to_reg (target);
40666 /* tmp = X B C D */
40667 ix86_expand_vector_set (false, tmp, val, 0);
40668 /* target = A B X D */
40669 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
40670 const0_rtx, const1_rtx,
40671 GEN_INT (0+4), GEN_INT (3+4)));
40672 return;
40673
40674 case 3:
40675 /* tmp = target = A B C D */
40676 tmp = copy_to_reg (target);
40677 /* tmp = X B C D */
40678 ix86_expand_vector_set (false, tmp, val, 0);
40679 /* target = A B X D */
40680 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
40681 const0_rtx, const1_rtx,
40682 GEN_INT (2+4), GEN_INT (0+4)));
40683 return;
40684
40685 default:
40686 gcc_unreachable ();
40687 }
40688 break;
40689
40690 case V4SImode:
40691 use_vec_merge = TARGET_SSE4_1;
40692 if (use_vec_merge)
40693 break;
40694
40695 /* Element 0 handled by vec_merge below. */
40696 if (elt == 0)
40697 {
40698 use_vec_merge = true;
40699 break;
40700 }
40701
40702 if (TARGET_SSE2)
40703 {
40704 /* With SSE2, use integer shuffles to swap element 0 and ELT,
40705 store into element 0, then shuffle them back. */
40706
40707 rtx order[4];
40708
40709 order[0] = GEN_INT (elt);
40710 order[1] = const1_rtx;
40711 order[2] = const2_rtx;
40712 order[3] = GEN_INT (3);
40713 order[elt] = const0_rtx;
40714
40715 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
40716 order[1], order[2], order[3]));
40717
40718 ix86_expand_vector_set (false, target, val, 0);
40719
40720 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
40721 order[1], order[2], order[3]));
40722 }
40723 else
40724 {
40725 /* For SSE1, we have to reuse the V4SF code. */
40726 rtx t = gen_reg_rtx (V4SFmode);
40727 ix86_expand_vector_set (false, t, gen_lowpart (SFmode, val), elt);
40728 emit_move_insn (target, gen_lowpart (mode, t));
40729 }
40730 return;
40731
40732 case V8HImode:
40733 use_vec_merge = TARGET_SSE2;
40734 break;
40735 case V4HImode:
40736 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
40737 break;
40738
40739 case V16QImode:
40740 use_vec_merge = TARGET_SSE4_1;
40741 break;
40742
40743 case V8QImode:
40744 break;
40745
40746 case V32QImode:
40747 half_mode = V16QImode;
40748 j = 0;
40749 n = 16;
40750 goto half;
40751
40752 case V16HImode:
40753 half_mode = V8HImode;
40754 j = 1;
40755 n = 8;
40756 goto half;
40757
40758 case V8SImode:
40759 half_mode = V4SImode;
40760 j = 2;
40761 n = 4;
40762 goto half;
40763
40764 case V4DImode:
40765 half_mode = V2DImode;
40766 j = 3;
40767 n = 2;
40768 goto half;
40769
40770 case V8SFmode:
40771 half_mode = V4SFmode;
40772 j = 4;
40773 n = 4;
40774 goto half;
40775
40776 case V4DFmode:
40777 half_mode = V2DFmode;
40778 j = 5;
40779 n = 2;
40780 goto half;
40781
40782 half:
40783 /* Compute offset. */
40784 i = elt / n;
40785 elt %= n;
40786
40787 gcc_assert (i <= 1);
40788
40789 /* Extract the half. */
40790 tmp = gen_reg_rtx (half_mode);
40791 emit_insn (gen_extract[j][i] (tmp, target));
40792
40793 /* Put val in tmp at elt. */
40794 ix86_expand_vector_set (false, tmp, val, elt);
40795
40796 /* Put it back. */
40797 emit_insn (gen_insert[j][i] (target, target, tmp));
40798 return;
40799
40800 default:
40801 break;
40802 }
40803
40804 if (use_vec_merge)
40805 {
40806 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
40807 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
40808 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40809 }
40810 else
40811 {
40812 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
40813
40814 emit_move_insn (mem, target);
40815
40816 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
40817 emit_move_insn (tmp, val);
40818
40819 emit_move_insn (target, mem);
40820 }
40821 }
40822
40823 void
40824 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
40825 {
40826 enum machine_mode mode = GET_MODE (vec);
40827 enum machine_mode inner_mode = GET_MODE_INNER (mode);
40828 bool use_vec_extr = false;
40829 rtx tmp;
40830
40831 switch (mode)
40832 {
40833 case V2SImode:
40834 case V2SFmode:
40835 if (!mmx_ok)
40836 break;
40837 /* FALLTHRU */
40838
40839 case V2DFmode:
40840 case V2DImode:
40841 use_vec_extr = true;
40842 break;
40843
40844 case V4SFmode:
40845 use_vec_extr = TARGET_SSE4_1;
40846 if (use_vec_extr)
40847 break;
40848
40849 switch (elt)
40850 {
40851 case 0:
40852 tmp = vec;
40853 break;
40854
40855 case 1:
40856 case 3:
40857 tmp = gen_reg_rtx (mode);
40858 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
40859 GEN_INT (elt), GEN_INT (elt),
40860 GEN_INT (elt+4), GEN_INT (elt+4)));
40861 break;
40862
40863 case 2:
40864 tmp = gen_reg_rtx (mode);
40865 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
40866 break;
40867
40868 default:
40869 gcc_unreachable ();
40870 }
40871 vec = tmp;
40872 use_vec_extr = true;
40873 elt = 0;
40874 break;
40875
40876 case V4SImode:
40877 use_vec_extr = TARGET_SSE4_1;
40878 if (use_vec_extr)
40879 break;
40880
40881 if (TARGET_SSE2)
40882 {
40883 switch (elt)
40884 {
40885 case 0:
40886 tmp = vec;
40887 break;
40888
40889 case 1:
40890 case 3:
40891 tmp = gen_reg_rtx (mode);
40892 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
40893 GEN_INT (elt), GEN_INT (elt),
40894 GEN_INT (elt), GEN_INT (elt)));
40895 break;
40896
40897 case 2:
40898 tmp = gen_reg_rtx (mode);
40899 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
40900 break;
40901
40902 default:
40903 gcc_unreachable ();
40904 }
40905 vec = tmp;
40906 use_vec_extr = true;
40907 elt = 0;
40908 }
40909 else
40910 {
40911 /* For SSE1, we have to reuse the V4SF code. */
40912 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
40913 gen_lowpart (V4SFmode, vec), elt);
40914 return;
40915 }
40916 break;
40917
40918 case V8HImode:
40919 use_vec_extr = TARGET_SSE2;
40920 break;
40921 case V4HImode:
40922 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
40923 break;
40924
40925 case V16QImode:
40926 use_vec_extr = TARGET_SSE4_1;
40927 break;
40928
40929 case V8SFmode:
40930 if (TARGET_AVX)
40931 {
40932 tmp = gen_reg_rtx (V4SFmode);
40933 if (elt < 4)
40934 emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
40935 else
40936 emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
40937 ix86_expand_vector_extract (false, target, tmp, elt & 3);
40938 return;
40939 }
40940 break;
40941
40942 case V4DFmode:
40943 if (TARGET_AVX)
40944 {
40945 tmp = gen_reg_rtx (V2DFmode);
40946 if (elt < 2)
40947 emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
40948 else
40949 emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
40950 ix86_expand_vector_extract (false, target, tmp, elt & 1);
40951 return;
40952 }
40953 break;
40954
40955 case V32QImode:
40956 if (TARGET_AVX)
40957 {
40958 tmp = gen_reg_rtx (V16QImode);
40959 if (elt < 16)
40960 emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
40961 else
40962 emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
40963 ix86_expand_vector_extract (false, target, tmp, elt & 15);
40964 return;
40965 }
40966 break;
40967
40968 case V16HImode:
40969 if (TARGET_AVX)
40970 {
40971 tmp = gen_reg_rtx (V8HImode);
40972 if (elt < 8)
40973 emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
40974 else
40975 emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
40976 ix86_expand_vector_extract (false, target, tmp, elt & 7);
40977 return;
40978 }
40979 break;
40980
40981 case V8SImode:
40982 if (TARGET_AVX)
40983 {
40984 tmp = gen_reg_rtx (V4SImode);
40985 if (elt < 4)
40986 emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
40987 else
40988 emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
40989 ix86_expand_vector_extract (false, target, tmp, elt & 3);
40990 return;
40991 }
40992 break;
40993
40994 case V4DImode:
40995 if (TARGET_AVX)
40996 {
40997 tmp = gen_reg_rtx (V2DImode);
40998 if (elt < 2)
40999 emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
41000 else
41001 emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
41002 ix86_expand_vector_extract (false, target, tmp, elt & 1);
41003 return;
41004 }
41005 break;
41006
41007 case V32HImode:
41008 if (TARGET_AVX512BW)
41009 {
41010 tmp = gen_reg_rtx (V16HImode);
41011 if (elt < 16)
41012 emit_insn (gen_vec_extract_lo_v32hi (tmp, vec));
41013 else
41014 emit_insn (gen_vec_extract_hi_v32hi (tmp, vec));
41015 ix86_expand_vector_extract (false, target, tmp, elt & 15);
41016 return;
41017 }
41018 break;
41019
41020 case V64QImode:
41021 if (TARGET_AVX512BW)
41022 {
41023 tmp = gen_reg_rtx (V32QImode);
41024 if (elt < 32)
41025 emit_insn (gen_vec_extract_lo_v64qi (tmp, vec));
41026 else
41027 emit_insn (gen_vec_extract_hi_v64qi (tmp, vec));
41028 ix86_expand_vector_extract (false, target, tmp, elt & 31);
41029 return;
41030 }
41031 break;
41032
41033 case V16SFmode:
41034 tmp = gen_reg_rtx (V8SFmode);
41035 if (elt < 8)
41036 emit_insn (gen_vec_extract_lo_v16sf (tmp, vec));
41037 else
41038 emit_insn (gen_vec_extract_hi_v16sf (tmp, vec));
41039 ix86_expand_vector_extract (false, target, tmp, elt & 7);
41040 return;
41041
41042 case V8DFmode:
41043 tmp = gen_reg_rtx (V4DFmode);
41044 if (elt < 4)
41045 emit_insn (gen_vec_extract_lo_v8df (tmp, vec));
41046 else
41047 emit_insn (gen_vec_extract_hi_v8df (tmp, vec));
41048 ix86_expand_vector_extract (false, target, tmp, elt & 3);
41049 return;
41050
41051 case V16SImode:
41052 tmp = gen_reg_rtx (V8SImode);
41053 if (elt < 8)
41054 emit_insn (gen_vec_extract_lo_v16si (tmp, vec));
41055 else
41056 emit_insn (gen_vec_extract_hi_v16si (tmp, vec));
41057 ix86_expand_vector_extract (false, target, tmp, elt & 7);
41058 return;
41059
41060 case V8DImode:
41061 tmp = gen_reg_rtx (V4DImode);
41062 if (elt < 4)
41063 emit_insn (gen_vec_extract_lo_v8di (tmp, vec));
41064 else
41065 emit_insn (gen_vec_extract_hi_v8di (tmp, vec));
41066 ix86_expand_vector_extract (false, target, tmp, elt & 3);
41067 return;
41068
41069 case V8QImode:
41070 /* ??? Could extract the appropriate HImode element and shift. */
41071 default:
41072 break;
41073 }
41074
41075 if (use_vec_extr)
41076 {
41077 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
41078 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
41079
41080 /* Let the rtl optimizers know about the zero extension performed. */
41081 if (inner_mode == QImode || inner_mode == HImode)
41082 {
41083 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
41084 target = gen_lowpart (SImode, target);
41085 }
41086
41087 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
41088 }
41089 else
41090 {
41091 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
41092
41093 emit_move_insn (mem, vec);
41094
41095 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
41096 emit_move_insn (target, tmp);
41097 }
41098 }
41099
41100 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
41101 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
41102 The upper bits of DEST are undefined, though they shouldn't cause
41103 exceptions (some bits from src or all zeros are ok). */
41104
41105 static void
41106 emit_reduc_half (rtx dest, rtx src, int i)
41107 {
41108 rtx tem, d = dest;
41109 switch (GET_MODE (src))
41110 {
41111 case V4SFmode:
41112 if (i == 128)
41113 tem = gen_sse_movhlps (dest, src, src);
41114 else
41115 tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx,
41116 GEN_INT (1 + 4), GEN_INT (1 + 4));
41117 break;
41118 case V2DFmode:
41119 tem = gen_vec_interleave_highv2df (dest, src, src);
41120 break;
41121 case V16QImode:
41122 case V8HImode:
41123 case V4SImode:
41124 case V2DImode:
41125 d = gen_reg_rtx (V1TImode);
41126 tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src),
41127 GEN_INT (i / 2));
41128 break;
41129 case V8SFmode:
41130 if (i == 256)
41131 tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx);
41132 else
41133 tem = gen_avx_shufps256 (dest, src, src,
41134 GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
41135 break;
41136 case V4DFmode:
41137 if (i == 256)
41138 tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx);
41139 else
41140 tem = gen_avx_shufpd256 (dest, src, src, const1_rtx);
41141 break;
41142 case V32QImode:
41143 case V16HImode:
41144 case V8SImode:
41145 case V4DImode:
41146 if (i == 256)
41147 {
41148 if (GET_MODE (dest) != V4DImode)
41149 d = gen_reg_rtx (V4DImode);
41150 tem = gen_avx2_permv2ti (d, gen_lowpart (V4DImode, src),
41151 gen_lowpart (V4DImode, src),
41152 const1_rtx);
41153 }
41154 else
41155 {
41156 d = gen_reg_rtx (V2TImode);
41157 tem = gen_avx2_lshrv2ti3 (d, gen_lowpart (V2TImode, src),
41158 GEN_INT (i / 2));
41159 }
41160 break;
41161 case V16SImode:
41162 case V16SFmode:
41163 case V8DImode:
41164 case V8DFmode:
41165 if (i > 128)
41166 tem = gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode, dest),
41167 gen_lowpart (V16SImode, src),
41168 gen_lowpart (V16SImode, src),
41169 GEN_INT (0x4 + (i == 512 ? 4 : 0)),
41170 GEN_INT (0x5 + (i == 512 ? 4 : 0)),
41171 GEN_INT (0x6 + (i == 512 ? 4 : 0)),
41172 GEN_INT (0x7 + (i == 512 ? 4 : 0)),
41173 GEN_INT (0xC), GEN_INT (0xD),
41174 GEN_INT (0xE), GEN_INT (0xF),
41175 GEN_INT (0x10), GEN_INT (0x11),
41176 GEN_INT (0x12), GEN_INT (0x13),
41177 GEN_INT (0x14), GEN_INT (0x15),
41178 GEN_INT (0x16), GEN_INT (0x17));
41179 else
41180 tem = gen_avx512f_pshufd_1 (gen_lowpart (V16SImode, dest),
41181 gen_lowpart (V16SImode, src),
41182 GEN_INT (i == 128 ? 0x2 : 0x1),
41183 GEN_INT (0x3),
41184 GEN_INT (0x3),
41185 GEN_INT (0x3),
41186 GEN_INT (i == 128 ? 0x6 : 0x5),
41187 GEN_INT (0x7),
41188 GEN_INT (0x7),
41189 GEN_INT (0x7),
41190 GEN_INT (i == 128 ? 0xA : 0x9),
41191 GEN_INT (0xB),
41192 GEN_INT (0xB),
41193 GEN_INT (0xB),
41194 GEN_INT (i == 128 ? 0xE : 0xD),
41195 GEN_INT (0xF),
41196 GEN_INT (0xF),
41197 GEN_INT (0xF));
41198 break;
41199 default:
41200 gcc_unreachable ();
41201 }
41202 emit_insn (tem);
41203 if (d != dest)
41204 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
41205 }
41206
41207 /* Expand a vector reduction. FN is the binary pattern to reduce;
41208 DEST is the destination; IN is the input vector. */
41209
41210 void
41211 ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
41212 {
41213 rtx half, dst, vec = in;
41214 enum machine_mode mode = GET_MODE (in);
41215 int i;
41216
41217 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
41218 if (TARGET_SSE4_1
41219 && mode == V8HImode
41220 && fn == gen_uminv8hi3)
41221 {
41222 emit_insn (gen_sse4_1_phminposuw (dest, in));
41223 return;
41224 }
41225
41226 for (i = GET_MODE_BITSIZE (mode);
41227 i > GET_MODE_BITSIZE (GET_MODE_INNER (mode));
41228 i >>= 1)
41229 {
41230 half = gen_reg_rtx (mode);
41231 emit_reduc_half (half, vec, i);
41232 if (i == GET_MODE_BITSIZE (GET_MODE_INNER (mode)) * 2)
41233 dst = dest;
41234 else
41235 dst = gen_reg_rtx (mode);
41236 emit_insn (fn (dst, half, vec));
41237 vec = dst;
41238 }
41239 }
41240 \f
41241 /* Target hook for scalar_mode_supported_p. */
41242 static bool
41243 ix86_scalar_mode_supported_p (enum machine_mode mode)
41244 {
41245 if (DECIMAL_FLOAT_MODE_P (mode))
41246 return default_decimal_float_supported_p ();
41247 else if (mode == TFmode)
41248 return true;
41249 else
41250 return default_scalar_mode_supported_p (mode);
41251 }
41252
41253 /* Implements target hook vector_mode_supported_p. */
41254 static bool
41255 ix86_vector_mode_supported_p (enum machine_mode mode)
41256 {
41257 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
41258 return true;
41259 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
41260 return true;
41261 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
41262 return true;
41263 if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
41264 return true;
41265 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
41266 return true;
41267 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
41268 return true;
41269 return false;
41270 }
41271
41272 /* Implement target hook libgcc_floating_mode_supported_p. */
41273 static bool
41274 ix86_libgcc_floating_mode_supported_p (enum machine_mode mode)
41275 {
41276 switch (mode)
41277 {
41278 case SFmode:
41279 case DFmode:
41280 case XFmode:
41281 return true;
41282
41283 case TFmode:
41284 #ifdef IX86_NO_LIBGCC_TFMODE
41285 return false;
41286 #elif defined IX86_MAYBE_NO_LIBGCC_TFMODE
41287 return TARGET_LONG_DOUBLE_128;
41288 #else
41289 return true;
41290 #endif
41291
41292 default:
41293 return false;
41294 }
41295 }
41296
41297 /* Target hook for c_mode_for_suffix. */
41298 static enum machine_mode
41299 ix86_c_mode_for_suffix (char suffix)
41300 {
41301 if (suffix == 'q')
41302 return TFmode;
41303 if (suffix == 'w')
41304 return XFmode;
41305
41306 return VOIDmode;
41307 }
41308
41309 /* Worker function for TARGET_MD_ASM_CLOBBERS.
41310
41311 We do this in the new i386 backend to maintain source compatibility
41312 with the old cc0-based compiler. */
41313
41314 static tree
41315 ix86_md_asm_clobbers (tree, tree, tree clobbers)
41316 {
41317 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
41318 clobbers);
41319 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
41320 clobbers);
41321 return clobbers;
41322 }
41323
41324 /* Implements target vector targetm.asm.encode_section_info. */
41325
41326 static void ATTRIBUTE_UNUSED
41327 ix86_encode_section_info (tree decl, rtx rtl, int first)
41328 {
41329 default_encode_section_info (decl, rtl, first);
41330
41331 if (TREE_CODE (decl) == VAR_DECL
41332 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
41333 && ix86_in_large_data_p (decl))
41334 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
41335 }
41336
41337 /* Worker function for REVERSE_CONDITION. */
41338
41339 enum rtx_code
41340 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
41341 {
41342 return (mode != CCFPmode && mode != CCFPUmode
41343 ? reverse_condition (code)
41344 : reverse_condition_maybe_unordered (code));
41345 }
41346
41347 /* Output code to perform an x87 FP register move, from OPERANDS[1]
41348 to OPERANDS[0]. */
41349
41350 const char *
41351 output_387_reg_move (rtx insn, rtx *operands)
41352 {
41353 if (REG_P (operands[0]))
41354 {
41355 if (REG_P (operands[1])
41356 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
41357 {
41358 if (REGNO (operands[0]) == FIRST_STACK_REG)
41359 return output_387_ffreep (operands, 0);
41360 return "fstp\t%y0";
41361 }
41362 if (STACK_TOP_P (operands[0]))
41363 return "fld%Z1\t%y1";
41364 return "fst\t%y0";
41365 }
41366 else if (MEM_P (operands[0]))
41367 {
41368 gcc_assert (REG_P (operands[1]));
41369 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
41370 return "fstp%Z0\t%y0";
41371 else
41372 {
41373 /* There is no non-popping store to memory for XFmode.
41374 So if we need one, follow the store with a load. */
41375 if (GET_MODE (operands[0]) == XFmode)
41376 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
41377 else
41378 return "fst%Z0\t%y0";
41379 }
41380 }
41381 else
41382 gcc_unreachable();
41383 }
41384
41385 /* Output code to perform a conditional jump to LABEL, if C2 flag in
41386 FP status register is set. */
41387
41388 void
41389 ix86_emit_fp_unordered_jump (rtx label)
41390 {
41391 rtx reg = gen_reg_rtx (HImode);
41392 rtx temp;
41393
41394 emit_insn (gen_x86_fnstsw_1 (reg));
41395
41396 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
41397 {
41398 emit_insn (gen_x86_sahf_1 (reg));
41399
41400 temp = gen_rtx_REG (CCmode, FLAGS_REG);
41401 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
41402 }
41403 else
41404 {
41405 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
41406
41407 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
41408 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
41409 }
41410
41411 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
41412 gen_rtx_LABEL_REF (VOIDmode, label),
41413 pc_rtx);
41414 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
41415
41416 emit_jump_insn (temp);
41417 predict_jump (REG_BR_PROB_BASE * 10 / 100);
41418 }
41419
41420 /* Output code to perform a log1p XFmode calculation. */
41421
41422 void ix86_emit_i387_log1p (rtx op0, rtx op1)
41423 {
41424 rtx_code_label *label1 = gen_label_rtx ();
41425 rtx_code_label *label2 = gen_label_rtx ();
41426
41427 rtx tmp = gen_reg_rtx (XFmode);
41428 rtx tmp2 = gen_reg_rtx (XFmode);
41429 rtx test;
41430
41431 emit_insn (gen_absxf2 (tmp, op1));
41432 test = gen_rtx_GE (VOIDmode, tmp,
41433 CONST_DOUBLE_FROM_REAL_VALUE (
41434 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
41435 XFmode));
41436 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
41437
41438 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
41439 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
41440 emit_jump (label2);
41441
41442 emit_label (label1);
41443 emit_move_insn (tmp, CONST1_RTX (XFmode));
41444 emit_insn (gen_addxf3 (tmp, op1, tmp));
41445 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
41446 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
41447
41448 emit_label (label2);
41449 }
41450
41451 /* Emit code for round calculation. */
41452 void ix86_emit_i387_round (rtx op0, rtx op1)
41453 {
41454 enum machine_mode inmode = GET_MODE (op1);
41455 enum machine_mode outmode = GET_MODE (op0);
41456 rtx e1, e2, res, tmp, tmp1, half;
41457 rtx scratch = gen_reg_rtx (HImode);
41458 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
41459 rtx_code_label *jump_label = gen_label_rtx ();
41460 rtx insn;
41461 rtx (*gen_abs) (rtx, rtx);
41462 rtx (*gen_neg) (rtx, rtx);
41463
41464 switch (inmode)
41465 {
41466 case SFmode:
41467 gen_abs = gen_abssf2;
41468 break;
41469 case DFmode:
41470 gen_abs = gen_absdf2;
41471 break;
41472 case XFmode:
41473 gen_abs = gen_absxf2;
41474 break;
41475 default:
41476 gcc_unreachable ();
41477 }
41478
41479 switch (outmode)
41480 {
41481 case SFmode:
41482 gen_neg = gen_negsf2;
41483 break;
41484 case DFmode:
41485 gen_neg = gen_negdf2;
41486 break;
41487 case XFmode:
41488 gen_neg = gen_negxf2;
41489 break;
41490 case HImode:
41491 gen_neg = gen_neghi2;
41492 break;
41493 case SImode:
41494 gen_neg = gen_negsi2;
41495 break;
41496 case DImode:
41497 gen_neg = gen_negdi2;
41498 break;
41499 default:
41500 gcc_unreachable ();
41501 }
41502
41503 e1 = gen_reg_rtx (inmode);
41504 e2 = gen_reg_rtx (inmode);
41505 res = gen_reg_rtx (outmode);
41506
41507 half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, inmode);
41508
41509 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
41510
41511 /* scratch = fxam(op1) */
41512 emit_insn (gen_rtx_SET (VOIDmode, scratch,
41513 gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
41514 UNSPEC_FXAM)));
41515 /* e1 = fabs(op1) */
41516 emit_insn (gen_abs (e1, op1));
41517
41518 /* e2 = e1 + 0.5 */
41519 half = force_reg (inmode, half);
41520 emit_insn (gen_rtx_SET (VOIDmode, e2,
41521 gen_rtx_PLUS (inmode, e1, half)));
41522
41523 /* res = floor(e2) */
41524 if (inmode != XFmode)
41525 {
41526 tmp1 = gen_reg_rtx (XFmode);
41527
41528 emit_insn (gen_rtx_SET (VOIDmode, tmp1,
41529 gen_rtx_FLOAT_EXTEND (XFmode, e2)));
41530 }
41531 else
41532 tmp1 = e2;
41533
41534 switch (outmode)
41535 {
41536 case SFmode:
41537 case DFmode:
41538 {
41539 rtx tmp0 = gen_reg_rtx (XFmode);
41540
41541 emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
41542
41543 emit_insn (gen_rtx_SET (VOIDmode, res,
41544 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
41545 UNSPEC_TRUNC_NOOP)));
41546 }
41547 break;
41548 case XFmode:
41549 emit_insn (gen_frndintxf2_floor (res, tmp1));
41550 break;
41551 case HImode:
41552 emit_insn (gen_lfloorxfhi2 (res, tmp1));
41553 break;
41554 case SImode:
41555 emit_insn (gen_lfloorxfsi2 (res, tmp1));
41556 break;
41557 case DImode:
41558 emit_insn (gen_lfloorxfdi2 (res, tmp1));
41559 break;
41560 default:
41561 gcc_unreachable ();
41562 }
41563
41564 /* flags = signbit(a) */
41565 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02)));
41566
41567 /* if (flags) then res = -res */
41568 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
41569 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
41570 gen_rtx_LABEL_REF (VOIDmode, jump_label),
41571 pc_rtx);
41572 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
41573 predict_jump (REG_BR_PROB_BASE * 50 / 100);
41574 JUMP_LABEL (insn) = jump_label;
41575
41576 emit_insn (gen_neg (res, res));
41577
41578 emit_label (jump_label);
41579 LABEL_NUSES (jump_label) = 1;
41580
41581 emit_move_insn (op0, res);
41582 }
41583
41584 /* Output code to perform a Newton-Rhapson approximation of a single precision
41585 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
41586
41587 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
41588 {
41589 rtx x0, x1, e0, e1;
41590
41591 x0 = gen_reg_rtx (mode);
41592 e0 = gen_reg_rtx (mode);
41593 e1 = gen_reg_rtx (mode);
41594 x1 = gen_reg_rtx (mode);
41595
41596 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
41597
41598 b = force_reg (mode, b);
41599
41600 /* x0 = rcp(b) estimate */
41601 if (mode == V16SFmode || mode == V8DFmode)
41602 emit_insn (gen_rtx_SET (VOIDmode, x0,
41603 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
41604 UNSPEC_RCP14)));
41605 else
41606 emit_insn (gen_rtx_SET (VOIDmode, x0,
41607 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
41608 UNSPEC_RCP)));
41609
41610 /* e0 = x0 * b */
41611 emit_insn (gen_rtx_SET (VOIDmode, e0,
41612 gen_rtx_MULT (mode, x0, b)));
41613
41614 /* e0 = x0 * e0 */
41615 emit_insn (gen_rtx_SET (VOIDmode, e0,
41616 gen_rtx_MULT (mode, x0, e0)));
41617
41618 /* e1 = x0 + x0 */
41619 emit_insn (gen_rtx_SET (VOIDmode, e1,
41620 gen_rtx_PLUS (mode, x0, x0)));
41621
41622 /* x1 = e1 - e0 */
41623 emit_insn (gen_rtx_SET (VOIDmode, x1,
41624 gen_rtx_MINUS (mode, e1, e0)));
41625
41626 /* res = a * x1 */
41627 emit_insn (gen_rtx_SET (VOIDmode, res,
41628 gen_rtx_MULT (mode, a, x1)));
41629 }
41630
41631 /* Output code to perform a Newton-Rhapson approximation of a
41632 single precision floating point [reciprocal] square root. */
41633
41634 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
41635 bool recip)
41636 {
41637 rtx x0, e0, e1, e2, e3, mthree, mhalf;
41638 REAL_VALUE_TYPE r;
41639 int unspec;
41640
41641 x0 = gen_reg_rtx (mode);
41642 e0 = gen_reg_rtx (mode);
41643 e1 = gen_reg_rtx (mode);
41644 e2 = gen_reg_rtx (mode);
41645 e3 = gen_reg_rtx (mode);
41646
41647 real_from_integer (&r, VOIDmode, -3, SIGNED);
41648 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
41649
41650 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
41651 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
41652 unspec = UNSPEC_RSQRT;
41653
41654 if (VECTOR_MODE_P (mode))
41655 {
41656 mthree = ix86_build_const_vector (mode, true, mthree);
41657 mhalf = ix86_build_const_vector (mode, true, mhalf);
41658 /* There is no 512-bit rsqrt. There is however rsqrt14. */
41659 if (GET_MODE_SIZE (mode) == 64)
41660 unspec = UNSPEC_RSQRT14;
41661 }
41662
41663 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
41664 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
41665
41666 a = force_reg (mode, a);
41667
41668 /* x0 = rsqrt(a) estimate */
41669 emit_insn (gen_rtx_SET (VOIDmode, x0,
41670 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
41671 unspec)));
41672
41673 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
41674 if (!recip)
41675 {
41676 rtx zero, mask;
41677
41678 zero = gen_reg_rtx (mode);
41679 mask = gen_reg_rtx (mode);
41680
41681 zero = force_reg (mode, CONST0_RTX(mode));
41682
41683 /* Handle masked compare. */
41684 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
41685 {
41686 mask = gen_reg_rtx (HImode);
41687 /* Imm value 0x4 corresponds to not-equal comparison. */
41688 emit_insn (gen_avx512f_cmpv16sf3 (mask, zero, a, GEN_INT (0x4)));
41689 emit_insn (gen_avx512f_blendmv16sf (x0, zero, x0, mask));
41690 }
41691 else
41692 {
41693 emit_insn (gen_rtx_SET (VOIDmode, mask,
41694 gen_rtx_NE (mode, zero, a)));
41695
41696 emit_insn (gen_rtx_SET (VOIDmode, x0,
41697 gen_rtx_AND (mode, x0, mask)));
41698 }
41699 }
41700
41701 /* e0 = x0 * a */
41702 emit_insn (gen_rtx_SET (VOIDmode, e0,
41703 gen_rtx_MULT (mode, x0, a)));
41704 /* e1 = e0 * x0 */
41705 emit_insn (gen_rtx_SET (VOIDmode, e1,
41706 gen_rtx_MULT (mode, e0, x0)));
41707
41708 /* e2 = e1 - 3. */
41709 mthree = force_reg (mode, mthree);
41710 emit_insn (gen_rtx_SET (VOIDmode, e2,
41711 gen_rtx_PLUS (mode, e1, mthree)));
41712
41713 mhalf = force_reg (mode, mhalf);
41714 if (recip)
41715 /* e3 = -.5 * x0 */
41716 emit_insn (gen_rtx_SET (VOIDmode, e3,
41717 gen_rtx_MULT (mode, x0, mhalf)));
41718 else
41719 /* e3 = -.5 * e0 */
41720 emit_insn (gen_rtx_SET (VOIDmode, e3,
41721 gen_rtx_MULT (mode, e0, mhalf)));
41722 /* ret = e2 * e3 */
41723 emit_insn (gen_rtx_SET (VOIDmode, res,
41724 gen_rtx_MULT (mode, e2, e3)));
41725 }
41726
41727 #ifdef TARGET_SOLARIS
41728 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
41729
41730 static void
41731 i386_solaris_elf_named_section (const char *name, unsigned int flags,
41732 tree decl)
41733 {
41734 /* With Binutils 2.15, the "@unwind" marker must be specified on
41735 every occurrence of the ".eh_frame" section, not just the first
41736 one. */
41737 if (TARGET_64BIT
41738 && strcmp (name, ".eh_frame") == 0)
41739 {
41740 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
41741 flags & SECTION_WRITE ? "aw" : "a");
41742 return;
41743 }
41744
41745 #ifndef USE_GAS
41746 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
41747 {
41748 solaris_elf_asm_comdat_section (name, flags, decl);
41749 return;
41750 }
41751 #endif
41752
41753 default_elf_asm_named_section (name, flags, decl);
41754 }
41755 #endif /* TARGET_SOLARIS */
41756
41757 /* Return the mangling of TYPE if it is an extended fundamental type. */
41758
41759 static const char *
41760 ix86_mangle_type (const_tree type)
41761 {
41762 type = TYPE_MAIN_VARIANT (type);
41763
41764 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
41765 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
41766 return NULL;
41767
41768 switch (TYPE_MODE (type))
41769 {
41770 case TFmode:
41771 /* __float128 is "g". */
41772 return "g";
41773 case XFmode:
41774 /* "long double" or __float80 is "e". */
41775 return "e";
41776 default:
41777 return NULL;
41778 }
41779 }
41780
41781 /* For 32-bit code we can save PIC register setup by using
41782 __stack_chk_fail_local hidden function instead of calling
41783 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
41784 register, so it is better to call __stack_chk_fail directly. */
41785
41786 static tree ATTRIBUTE_UNUSED
41787 ix86_stack_protect_fail (void)
41788 {
41789 return TARGET_64BIT
41790 ? default_external_stack_protect_fail ()
41791 : default_hidden_stack_protect_fail ();
41792 }
41793
41794 /* Select a format to encode pointers in exception handling data. CODE
41795 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
41796 true if the symbol may be affected by dynamic relocations.
41797
41798 ??? All x86 object file formats are capable of representing this.
41799 After all, the relocation needed is the same as for the call insn.
41800 Whether or not a particular assembler allows us to enter such, I
41801 guess we'll have to see. */
41802 int
41803 asm_preferred_eh_data_format (int code, int global)
41804 {
41805 if (flag_pic)
41806 {
41807 int type = DW_EH_PE_sdata8;
41808 if (!TARGET_64BIT
41809 || ix86_cmodel == CM_SMALL_PIC
41810 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
41811 type = DW_EH_PE_sdata4;
41812 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
41813 }
41814 if (ix86_cmodel == CM_SMALL
41815 || (ix86_cmodel == CM_MEDIUM && code))
41816 return DW_EH_PE_udata4;
41817 return DW_EH_PE_absptr;
41818 }
41819 \f
41820 /* Expand copysign from SIGN to the positive value ABS_VALUE
41821 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
41822 the sign-bit. */
41823 static void
41824 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
41825 {
41826 enum machine_mode mode = GET_MODE (sign);
41827 rtx sgn = gen_reg_rtx (mode);
41828 if (mask == NULL_RTX)
41829 {
41830 enum machine_mode vmode;
41831
41832 if (mode == SFmode)
41833 vmode = V4SFmode;
41834 else if (mode == DFmode)
41835 vmode = V2DFmode;
41836 else
41837 vmode = mode;
41838
41839 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
41840 if (!VECTOR_MODE_P (mode))
41841 {
41842 /* We need to generate a scalar mode mask in this case. */
41843 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
41844 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
41845 mask = gen_reg_rtx (mode);
41846 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
41847 }
41848 }
41849 else
41850 mask = gen_rtx_NOT (mode, mask);
41851 emit_insn (gen_rtx_SET (VOIDmode, sgn,
41852 gen_rtx_AND (mode, mask, sign)));
41853 emit_insn (gen_rtx_SET (VOIDmode, result,
41854 gen_rtx_IOR (mode, abs_value, sgn)));
41855 }
41856
41857 /* Expand fabs (OP0) and return a new rtx that holds the result. The
41858 mask for masking out the sign-bit is stored in *SMASK, if that is
41859 non-null. */
41860 static rtx
41861 ix86_expand_sse_fabs (rtx op0, rtx *smask)
41862 {
41863 enum machine_mode vmode, mode = GET_MODE (op0);
41864 rtx xa, mask;
41865
41866 xa = gen_reg_rtx (mode);
41867 if (mode == SFmode)
41868 vmode = V4SFmode;
41869 else if (mode == DFmode)
41870 vmode = V2DFmode;
41871 else
41872 vmode = mode;
41873 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
41874 if (!VECTOR_MODE_P (mode))
41875 {
41876 /* We need to generate a scalar mode mask in this case. */
41877 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
41878 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
41879 mask = gen_reg_rtx (mode);
41880 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
41881 }
41882 emit_insn (gen_rtx_SET (VOIDmode, xa,
41883 gen_rtx_AND (mode, op0, mask)));
41884
41885 if (smask)
41886 *smask = mask;
41887
41888 return xa;
41889 }
41890
41891 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
41892 swapping the operands if SWAP_OPERANDS is true. The expanded
41893 code is a forward jump to a newly created label in case the
41894 comparison is true. The generated label rtx is returned. */
41895 static rtx_code_label *
41896 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
41897 bool swap_operands)
41898 {
41899 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
41900 rtx_code_label *label;
41901 rtx tmp;
41902
41903 if (swap_operands)
41904 {
41905 tmp = op0;
41906 op0 = op1;
41907 op1 = tmp;
41908 }
41909
41910 label = gen_label_rtx ();
41911 tmp = gen_rtx_REG (fpcmp_mode, FLAGS_REG);
41912 emit_insn (gen_rtx_SET (VOIDmode, tmp,
41913 gen_rtx_COMPARE (fpcmp_mode, op0, op1)));
41914 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
41915 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
41916 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
41917 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
41918 JUMP_LABEL (tmp) = label;
41919
41920 return label;
41921 }
41922
41923 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
41924 using comparison code CODE. Operands are swapped for the comparison if
41925 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
41926 static rtx
41927 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
41928 bool swap_operands)
41929 {
41930 rtx (*insn)(rtx, rtx, rtx, rtx);
41931 enum machine_mode mode = GET_MODE (op0);
41932 rtx mask = gen_reg_rtx (mode);
41933
41934 if (swap_operands)
41935 {
41936 rtx tmp = op0;
41937 op0 = op1;
41938 op1 = tmp;
41939 }
41940
41941 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
41942
41943 emit_insn (insn (mask, op0, op1,
41944 gen_rtx_fmt_ee (code, mode, op0, op1)));
41945 return mask;
41946 }
41947
41948 /* Generate and return a rtx of mode MODE for 2**n where n is the number
41949 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
41950 static rtx
41951 ix86_gen_TWO52 (enum machine_mode mode)
41952 {
41953 REAL_VALUE_TYPE TWO52r;
41954 rtx TWO52;
41955
41956 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
41957 TWO52 = const_double_from_real_value (TWO52r, mode);
41958 TWO52 = force_reg (mode, TWO52);
41959
41960 return TWO52;
41961 }
41962
41963 /* Expand SSE sequence for computing lround from OP1 storing
41964 into OP0. */
41965 void
41966 ix86_expand_lround (rtx op0, rtx op1)
41967 {
41968 /* C code for the stuff we're doing below:
41969 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
41970 return (long)tmp;
41971 */
41972 enum machine_mode mode = GET_MODE (op1);
41973 const struct real_format *fmt;
41974 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
41975 rtx adj;
41976
41977 /* load nextafter (0.5, 0.0) */
41978 fmt = REAL_MODE_FORMAT (mode);
41979 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
41980 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
41981
41982 /* adj = copysign (0.5, op1) */
41983 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
41984 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
41985
41986 /* adj = op1 + adj */
41987 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
41988
41989 /* op0 = (imode)adj */
41990 expand_fix (op0, adj, 0);
41991 }
41992
41993 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
41994 into OPERAND0. */
41995 void
41996 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
41997 {
41998 /* C code for the stuff we're doing below (for do_floor):
41999 xi = (long)op1;
42000 xi -= (double)xi > op1 ? 1 : 0;
42001 return xi;
42002 */
42003 enum machine_mode fmode = GET_MODE (op1);
42004 enum machine_mode imode = GET_MODE (op0);
42005 rtx ireg, freg, tmp;
42006 rtx_code_label *label;
42007
42008 /* reg = (long)op1 */
42009 ireg = gen_reg_rtx (imode);
42010 expand_fix (ireg, op1, 0);
42011
42012 /* freg = (double)reg */
42013 freg = gen_reg_rtx (fmode);
42014 expand_float (freg, ireg, 0);
42015
42016 /* ireg = (freg > op1) ? ireg - 1 : ireg */
42017 label = ix86_expand_sse_compare_and_jump (UNLE,
42018 freg, op1, !do_floor);
42019 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
42020 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
42021 emit_move_insn (ireg, tmp);
42022
42023 emit_label (label);
42024 LABEL_NUSES (label) = 1;
42025
42026 emit_move_insn (op0, ireg);
42027 }
42028
42029 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
42030 result in OPERAND0. */
42031 void
42032 ix86_expand_rint (rtx operand0, rtx operand1)
42033 {
42034 /* C code for the stuff we're doing below:
42035 xa = fabs (operand1);
42036 if (!isless (xa, 2**52))
42037 return operand1;
42038 xa = xa + 2**52 - 2**52;
42039 return copysign (xa, operand1);
42040 */
42041 enum machine_mode mode = GET_MODE (operand0);
42042 rtx res, xa, TWO52, mask;
42043 rtx_code_label *label;
42044
42045 res = gen_reg_rtx (mode);
42046 emit_move_insn (res, operand1);
42047
42048 /* xa = abs (operand1) */
42049 xa = ix86_expand_sse_fabs (res, &mask);
42050
42051 /* if (!isless (xa, TWO52)) goto label; */
42052 TWO52 = ix86_gen_TWO52 (mode);
42053 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42054
42055 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
42056 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
42057
42058 ix86_sse_copysign_to_positive (res, xa, res, mask);
42059
42060 emit_label (label);
42061 LABEL_NUSES (label) = 1;
42062
42063 emit_move_insn (operand0, res);
42064 }
42065
42066 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
42067 into OPERAND0. */
42068 void
42069 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
42070 {
42071 /* C code for the stuff we expand below.
42072 double xa = fabs (x), x2;
42073 if (!isless (xa, TWO52))
42074 return x;
42075 xa = xa + TWO52 - TWO52;
42076 x2 = copysign (xa, x);
42077 Compensate. Floor:
42078 if (x2 > x)
42079 x2 -= 1;
42080 Compensate. Ceil:
42081 if (x2 < x)
42082 x2 -= -1;
42083 return x2;
42084 */
42085 enum machine_mode mode = GET_MODE (operand0);
42086 rtx xa, TWO52, tmp, one, res, mask;
42087 rtx_code_label *label;
42088
42089 TWO52 = ix86_gen_TWO52 (mode);
42090
42091 /* Temporary for holding the result, initialized to the input
42092 operand to ease control flow. */
42093 res = gen_reg_rtx (mode);
42094 emit_move_insn (res, operand1);
42095
42096 /* xa = abs (operand1) */
42097 xa = ix86_expand_sse_fabs (res, &mask);
42098
42099 /* if (!isless (xa, TWO52)) goto label; */
42100 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42101
42102 /* xa = xa + TWO52 - TWO52; */
42103 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
42104 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
42105
42106 /* xa = copysign (xa, operand1) */
42107 ix86_sse_copysign_to_positive (xa, xa, res, mask);
42108
42109 /* generate 1.0 or -1.0 */
42110 one = force_reg (mode,
42111 const_double_from_real_value (do_floor
42112 ? dconst1 : dconstm1, mode));
42113
42114 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
42115 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
42116 emit_insn (gen_rtx_SET (VOIDmode, tmp,
42117 gen_rtx_AND (mode, one, tmp)));
42118 /* We always need to subtract here to preserve signed zero. */
42119 tmp = expand_simple_binop (mode, MINUS,
42120 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
42121 emit_move_insn (res, tmp);
42122
42123 emit_label (label);
42124 LABEL_NUSES (label) = 1;
42125
42126 emit_move_insn (operand0, res);
42127 }
42128
42129 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
42130 into OPERAND0. */
42131 void
42132 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
42133 {
42134 /* C code for the stuff we expand below.
42135 double xa = fabs (x), x2;
42136 if (!isless (xa, TWO52))
42137 return x;
42138 x2 = (double)(long)x;
42139 Compensate. Floor:
42140 if (x2 > x)
42141 x2 -= 1;
42142 Compensate. Ceil:
42143 if (x2 < x)
42144 x2 += 1;
42145 if (HONOR_SIGNED_ZEROS (mode))
42146 return copysign (x2, x);
42147 return x2;
42148 */
42149 enum machine_mode mode = GET_MODE (operand0);
42150 rtx xa, xi, TWO52, tmp, one, res, mask;
42151 rtx_code_label *label;
42152
42153 TWO52 = ix86_gen_TWO52 (mode);
42154
42155 /* Temporary for holding the result, initialized to the input
42156 operand to ease control flow. */
42157 res = gen_reg_rtx (mode);
42158 emit_move_insn (res, operand1);
42159
42160 /* xa = abs (operand1) */
42161 xa = ix86_expand_sse_fabs (res, &mask);
42162
42163 /* if (!isless (xa, TWO52)) goto label; */
42164 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42165
42166 /* xa = (double)(long)x */
42167 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
42168 expand_fix (xi, res, 0);
42169 expand_float (xa, xi, 0);
42170
42171 /* generate 1.0 */
42172 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
42173
42174 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
42175 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
42176 emit_insn (gen_rtx_SET (VOIDmode, tmp,
42177 gen_rtx_AND (mode, one, tmp)));
42178 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
42179 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
42180 emit_move_insn (res, tmp);
42181
42182 if (HONOR_SIGNED_ZEROS (mode))
42183 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
42184
42185 emit_label (label);
42186 LABEL_NUSES (label) = 1;
42187
42188 emit_move_insn (operand0, res);
42189 }
42190
42191 /* Expand SSE sequence for computing round from OPERAND1 storing
42192 into OPERAND0. Sequence that works without relying on DImode truncation
42193 via cvttsd2siq that is only available on 64bit targets. */
42194 void
42195 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
42196 {
42197 /* C code for the stuff we expand below.
42198 double xa = fabs (x), xa2, x2;
42199 if (!isless (xa, TWO52))
42200 return x;
42201 Using the absolute value and copying back sign makes
42202 -0.0 -> -0.0 correct.
42203 xa2 = xa + TWO52 - TWO52;
42204 Compensate.
42205 dxa = xa2 - xa;
42206 if (dxa <= -0.5)
42207 xa2 += 1;
42208 else if (dxa > 0.5)
42209 xa2 -= 1;
42210 x2 = copysign (xa2, x);
42211 return x2;
42212 */
42213 enum machine_mode mode = GET_MODE (operand0);
42214 rtx xa, xa2, dxa, TWO52, tmp, half, mhalf, one, res, mask;
42215 rtx_code_label *label;
42216
42217 TWO52 = ix86_gen_TWO52 (mode);
42218
42219 /* Temporary for holding the result, initialized to the input
42220 operand to ease control flow. */
42221 res = gen_reg_rtx (mode);
42222 emit_move_insn (res, operand1);
42223
42224 /* xa = abs (operand1) */
42225 xa = ix86_expand_sse_fabs (res, &mask);
42226
42227 /* if (!isless (xa, TWO52)) goto label; */
42228 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42229
42230 /* xa2 = xa + TWO52 - TWO52; */
42231 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
42232 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
42233
42234 /* dxa = xa2 - xa; */
42235 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
42236
42237 /* generate 0.5, 1.0 and -0.5 */
42238 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
42239 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
42240 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
42241 0, OPTAB_DIRECT);
42242
42243 /* Compensate. */
42244 tmp = gen_reg_rtx (mode);
42245 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
42246 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
42247 emit_insn (gen_rtx_SET (VOIDmode, tmp,
42248 gen_rtx_AND (mode, one, tmp)));
42249 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
42250 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
42251 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
42252 emit_insn (gen_rtx_SET (VOIDmode, tmp,
42253 gen_rtx_AND (mode, one, tmp)));
42254 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
42255
42256 /* res = copysign (xa2, operand1) */
42257 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
42258
42259 emit_label (label);
42260 LABEL_NUSES (label) = 1;
42261
42262 emit_move_insn (operand0, res);
42263 }
42264
42265 /* Expand SSE sequence for computing trunc from OPERAND1 storing
42266 into OPERAND0. */
42267 void
42268 ix86_expand_trunc (rtx operand0, rtx operand1)
42269 {
42270 /* C code for SSE variant we expand below.
42271 double xa = fabs (x), x2;
42272 if (!isless (xa, TWO52))
42273 return x;
42274 x2 = (double)(long)x;
42275 if (HONOR_SIGNED_ZEROS (mode))
42276 return copysign (x2, x);
42277 return x2;
42278 */
42279 enum machine_mode mode = GET_MODE (operand0);
42280 rtx xa, xi, TWO52, res, mask;
42281 rtx_code_label *label;
42282
42283 TWO52 = ix86_gen_TWO52 (mode);
42284
42285 /* Temporary for holding the result, initialized to the input
42286 operand to ease control flow. */
42287 res = gen_reg_rtx (mode);
42288 emit_move_insn (res, operand1);
42289
42290 /* xa = abs (operand1) */
42291 xa = ix86_expand_sse_fabs (res, &mask);
42292
42293 /* if (!isless (xa, TWO52)) goto label; */
42294 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42295
42296 /* x = (double)(long)x */
42297 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
42298 expand_fix (xi, res, 0);
42299 expand_float (res, xi, 0);
42300
42301 if (HONOR_SIGNED_ZEROS (mode))
42302 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
42303
42304 emit_label (label);
42305 LABEL_NUSES (label) = 1;
42306
42307 emit_move_insn (operand0, res);
42308 }
42309
42310 /* Expand SSE sequence for computing trunc from OPERAND1 storing
42311 into OPERAND0. */
42312 void
42313 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
42314 {
42315 enum machine_mode mode = GET_MODE (operand0);
42316 rtx xa, mask, TWO52, one, res, smask, tmp;
42317 rtx_code_label *label;
42318
42319 /* C code for SSE variant we expand below.
42320 double xa = fabs (x), x2;
42321 if (!isless (xa, TWO52))
42322 return x;
42323 xa2 = xa + TWO52 - TWO52;
42324 Compensate:
42325 if (xa2 > xa)
42326 xa2 -= 1.0;
42327 x2 = copysign (xa2, x);
42328 return x2;
42329 */
42330
42331 TWO52 = ix86_gen_TWO52 (mode);
42332
42333 /* Temporary for holding the result, initialized to the input
42334 operand to ease control flow. */
42335 res = gen_reg_rtx (mode);
42336 emit_move_insn (res, operand1);
42337
42338 /* xa = abs (operand1) */
42339 xa = ix86_expand_sse_fabs (res, &smask);
42340
42341 /* if (!isless (xa, TWO52)) goto label; */
42342 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42343
42344 /* res = xa + TWO52 - TWO52; */
42345 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
42346 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
42347 emit_move_insn (res, tmp);
42348
42349 /* generate 1.0 */
42350 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
42351
42352 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
42353 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
42354 emit_insn (gen_rtx_SET (VOIDmode, mask,
42355 gen_rtx_AND (mode, mask, one)));
42356 tmp = expand_simple_binop (mode, MINUS,
42357 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
42358 emit_move_insn (res, tmp);
42359
42360 /* res = copysign (res, operand1) */
42361 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
42362
42363 emit_label (label);
42364 LABEL_NUSES (label) = 1;
42365
42366 emit_move_insn (operand0, res);
42367 }
42368
42369 /* Expand SSE sequence for computing round from OPERAND1 storing
42370 into OPERAND0. */
42371 void
42372 ix86_expand_round (rtx operand0, rtx operand1)
42373 {
42374 /* C code for the stuff we're doing below:
42375 double xa = fabs (x);
42376 if (!isless (xa, TWO52))
42377 return x;
42378 xa = (double)(long)(xa + nextafter (0.5, 0.0));
42379 return copysign (xa, x);
42380 */
42381 enum machine_mode mode = GET_MODE (operand0);
42382 rtx res, TWO52, xa, xi, half, mask;
42383 rtx_code_label *label;
42384 const struct real_format *fmt;
42385 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
42386
42387 /* Temporary for holding the result, initialized to the input
42388 operand to ease control flow. */
42389 res = gen_reg_rtx (mode);
42390 emit_move_insn (res, operand1);
42391
42392 TWO52 = ix86_gen_TWO52 (mode);
42393 xa = ix86_expand_sse_fabs (res, &mask);
42394 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42395
42396 /* load nextafter (0.5, 0.0) */
42397 fmt = REAL_MODE_FORMAT (mode);
42398 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
42399 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
42400
42401 /* xa = xa + 0.5 */
42402 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
42403 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
42404
42405 /* xa = (double)(int64_t)xa */
42406 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
42407 expand_fix (xi, xa, 0);
42408 expand_float (xa, xi, 0);
42409
42410 /* res = copysign (xa, operand1) */
42411 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
42412
42413 emit_label (label);
42414 LABEL_NUSES (label) = 1;
42415
42416 emit_move_insn (operand0, res);
42417 }
42418
42419 /* Expand SSE sequence for computing round
42420 from OP1 storing into OP0 using sse4 round insn. */
42421 void
42422 ix86_expand_round_sse4 (rtx op0, rtx op1)
42423 {
42424 enum machine_mode mode = GET_MODE (op0);
42425 rtx e1, e2, res, half;
42426 const struct real_format *fmt;
42427 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
42428 rtx (*gen_copysign) (rtx, rtx, rtx);
42429 rtx (*gen_round) (rtx, rtx, rtx);
42430
42431 switch (mode)
42432 {
42433 case SFmode:
42434 gen_copysign = gen_copysignsf3;
42435 gen_round = gen_sse4_1_roundsf2;
42436 break;
42437 case DFmode:
42438 gen_copysign = gen_copysigndf3;
42439 gen_round = gen_sse4_1_rounddf2;
42440 break;
42441 default:
42442 gcc_unreachable ();
42443 }
42444
42445 /* round (a) = trunc (a + copysign (0.5, a)) */
42446
42447 /* load nextafter (0.5, 0.0) */
42448 fmt = REAL_MODE_FORMAT (mode);
42449 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
42450 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
42451 half = const_double_from_real_value (pred_half, mode);
42452
42453 /* e1 = copysign (0.5, op1) */
42454 e1 = gen_reg_rtx (mode);
42455 emit_insn (gen_copysign (e1, half, op1));
42456
42457 /* e2 = op1 + e1 */
42458 e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
42459
42460 /* res = trunc (e2) */
42461 res = gen_reg_rtx (mode);
42462 emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
42463
42464 emit_move_insn (op0, res);
42465 }
42466 \f
42467
42468 /* Table of valid machine attributes. */
42469 static const struct attribute_spec ix86_attribute_table[] =
42470 {
42471 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
42472 affects_type_identity } */
42473 /* Stdcall attribute says callee is responsible for popping arguments
42474 if they are not variable. */
42475 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42476 true },
42477 /* Fastcall attribute says callee is responsible for popping arguments
42478 if they are not variable. */
42479 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42480 true },
42481 /* Thiscall attribute says callee is responsible for popping arguments
42482 if they are not variable. */
42483 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42484 true },
42485 /* Cdecl attribute says the callee is a normal C declaration */
42486 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42487 true },
42488 /* Regparm attribute specifies how many integer arguments are to be
42489 passed in registers. */
42490 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
42491 true },
42492 /* Sseregparm attribute says we are using x86_64 calling conventions
42493 for FP arguments. */
42494 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42495 true },
42496 /* The transactional memory builtins are implicitly regparm or fastcall
42497 depending on the ABI. Override the generic do-nothing attribute that
42498 these builtins were declared with. */
42499 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute,
42500 true },
42501 /* force_align_arg_pointer says this function realigns the stack at entry. */
42502 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
42503 false, true, true, ix86_handle_cconv_attribute, false },
42504 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
42505 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
42506 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
42507 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
42508 false },
42509 #endif
42510 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
42511 false },
42512 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
42513 false },
42514 #ifdef SUBTARGET_ATTRIBUTE_TABLE
42515 SUBTARGET_ATTRIBUTE_TABLE,
42516 #endif
42517 /* ms_abi and sysv_abi calling convention function attributes. */
42518 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
42519 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
42520 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
42521 false },
42522 { "callee_pop_aggregate_return", 1, 1, false, true, true,
42523 ix86_handle_callee_pop_aggregate_return, true },
42524 /* End element. */
42525 { NULL, 0, 0, false, false, false, NULL, false }
42526 };
42527
42528 /* Implement targetm.vectorize.builtin_vectorization_cost. */
42529 static int
42530 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
42531 tree vectype, int)
42532 {
42533 unsigned elements;
42534
42535 switch (type_of_cost)
42536 {
42537 case scalar_stmt:
42538 return ix86_cost->scalar_stmt_cost;
42539
42540 case scalar_load:
42541 return ix86_cost->scalar_load_cost;
42542
42543 case scalar_store:
42544 return ix86_cost->scalar_store_cost;
42545
42546 case vector_stmt:
42547 return ix86_cost->vec_stmt_cost;
42548
42549 case vector_load:
42550 return ix86_cost->vec_align_load_cost;
42551
42552 case vector_store:
42553 return ix86_cost->vec_store_cost;
42554
42555 case vec_to_scalar:
42556 return ix86_cost->vec_to_scalar_cost;
42557
42558 case scalar_to_vec:
42559 return ix86_cost->scalar_to_vec_cost;
42560
42561 case unaligned_load:
42562 case unaligned_store:
42563 return ix86_cost->vec_unalign_load_cost;
42564
42565 case cond_branch_taken:
42566 return ix86_cost->cond_taken_branch_cost;
42567
42568 case cond_branch_not_taken:
42569 return ix86_cost->cond_not_taken_branch_cost;
42570
42571 case vec_perm:
42572 case vec_promote_demote:
42573 return ix86_cost->vec_stmt_cost;
42574
42575 case vec_construct:
42576 elements = TYPE_VECTOR_SUBPARTS (vectype);
42577 return elements / 2 + 1;
42578
42579 default:
42580 gcc_unreachable ();
42581 }
42582 }
42583
42584 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
42585 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
42586 insn every time. */
42587
42588 static GTY(()) rtx_insn *vselect_insn;
42589
42590 /* Initialize vselect_insn. */
42591
42592 static void
42593 init_vselect_insn (void)
42594 {
42595 unsigned i;
42596 rtx x;
42597
42598 x = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (MAX_VECT_LEN));
42599 for (i = 0; i < MAX_VECT_LEN; ++i)
42600 XVECEXP (x, 0, i) = const0_rtx;
42601 x = gen_rtx_VEC_SELECT (V2DFmode, gen_rtx_VEC_CONCAT (V4DFmode, const0_rtx,
42602 const0_rtx), x);
42603 x = gen_rtx_SET (VOIDmode, const0_rtx, x);
42604 start_sequence ();
42605 vselect_insn = emit_insn (x);
42606 end_sequence ();
42607 }
42608
42609 /* Construct (set target (vec_select op0 (parallel perm))) and
42610 return true if that's a valid instruction in the active ISA. */
42611
42612 static bool
42613 expand_vselect (rtx target, rtx op0, const unsigned char *perm,
42614 unsigned nelt, bool testing_p)
42615 {
42616 unsigned int i;
42617 rtx x, save_vconcat;
42618 int icode;
42619
42620 if (vselect_insn == NULL_RTX)
42621 init_vselect_insn ();
42622
42623 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 1);
42624 PUT_NUM_ELEM (XVEC (x, 0), nelt);
42625 for (i = 0; i < nelt; ++i)
42626 XVECEXP (x, 0, i) = GEN_INT (perm[i]);
42627 save_vconcat = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
42628 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = op0;
42629 PUT_MODE (SET_SRC (PATTERN (vselect_insn)), GET_MODE (target));
42630 SET_DEST (PATTERN (vselect_insn)) = target;
42631 icode = recog_memoized (vselect_insn);
42632
42633 if (icode >= 0 && !testing_p)
42634 emit_insn (copy_rtx (PATTERN (vselect_insn)));
42635
42636 SET_DEST (PATTERN (vselect_insn)) = const0_rtx;
42637 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = save_vconcat;
42638 INSN_CODE (vselect_insn) = -1;
42639
42640 return icode >= 0;
42641 }
42642
42643 /* Similar, but generate a vec_concat from op0 and op1 as well. */
42644
42645 static bool
42646 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
42647 const unsigned char *perm, unsigned nelt,
42648 bool testing_p)
42649 {
42650 enum machine_mode v2mode;
42651 rtx x;
42652 bool ok;
42653
42654 if (vselect_insn == NULL_RTX)
42655 init_vselect_insn ();
42656
42657 v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
42658 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
42659 PUT_MODE (x, v2mode);
42660 XEXP (x, 0) = op0;
42661 XEXP (x, 1) = op1;
42662 ok = expand_vselect (target, x, perm, nelt, testing_p);
42663 XEXP (x, 0) = const0_rtx;
42664 XEXP (x, 1) = const0_rtx;
42665 return ok;
42666 }
42667
42668 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
42669 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
42670
42671 static bool
42672 expand_vec_perm_blend (struct expand_vec_perm_d *d)
42673 {
42674 enum machine_mode vmode = d->vmode;
42675 unsigned i, mask, nelt = d->nelt;
42676 rtx target, op0, op1, x;
42677 rtx rperm[32], vperm;
42678
42679 if (d->one_operand_p)
42680 return false;
42681 if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
42682 ;
42683 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
42684 ;
42685 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
42686 ;
42687 else
42688 return false;
42689
42690 /* This is a blend, not a permute. Elements must stay in their
42691 respective lanes. */
42692 for (i = 0; i < nelt; ++i)
42693 {
42694 unsigned e = d->perm[i];
42695 if (!(e == i || e == i + nelt))
42696 return false;
42697 }
42698
42699 if (d->testing_p)
42700 return true;
42701
42702 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
42703 decision should be extracted elsewhere, so that we only try that
42704 sequence once all budget==3 options have been tried. */
42705 target = d->target;
42706 op0 = d->op0;
42707 op1 = d->op1;
42708 mask = 0;
42709
42710 switch (vmode)
42711 {
42712 case V4DFmode:
42713 case V8SFmode:
42714 case V2DFmode:
42715 case V4SFmode:
42716 case V8HImode:
42717 case V8SImode:
42718 for (i = 0; i < nelt; ++i)
42719 mask |= (d->perm[i] >= nelt) << i;
42720 break;
42721
42722 case V2DImode:
42723 for (i = 0; i < 2; ++i)
42724 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
42725 vmode = V8HImode;
42726 goto do_subreg;
42727
42728 case V4SImode:
42729 for (i = 0; i < 4; ++i)
42730 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
42731 vmode = V8HImode;
42732 goto do_subreg;
42733
42734 case V16QImode:
42735 /* See if bytes move in pairs so we can use pblendw with
42736 an immediate argument, rather than pblendvb with a vector
42737 argument. */
42738 for (i = 0; i < 16; i += 2)
42739 if (d->perm[i] + 1 != d->perm[i + 1])
42740 {
42741 use_pblendvb:
42742 for (i = 0; i < nelt; ++i)
42743 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
42744
42745 finish_pblendvb:
42746 vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
42747 vperm = force_reg (vmode, vperm);
42748
42749 if (GET_MODE_SIZE (vmode) == 16)
42750 emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
42751 else
42752 emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm));
42753 if (target != d->target)
42754 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42755 return true;
42756 }
42757
42758 for (i = 0; i < 8; ++i)
42759 mask |= (d->perm[i * 2] >= 16) << i;
42760 vmode = V8HImode;
42761 /* FALLTHRU */
42762
42763 do_subreg:
42764 target = gen_reg_rtx (vmode);
42765 op0 = gen_lowpart (vmode, op0);
42766 op1 = gen_lowpart (vmode, op1);
42767 break;
42768
42769 case V32QImode:
42770 /* See if bytes move in pairs. If not, vpblendvb must be used. */
42771 for (i = 0; i < 32; i += 2)
42772 if (d->perm[i] + 1 != d->perm[i + 1])
42773 goto use_pblendvb;
42774 /* See if bytes move in quadruplets. If yes, vpblendd
42775 with immediate can be used. */
42776 for (i = 0; i < 32; i += 4)
42777 if (d->perm[i] + 2 != d->perm[i + 2])
42778 break;
42779 if (i < 32)
42780 {
42781 /* See if bytes move the same in both lanes. If yes,
42782 vpblendw with immediate can be used. */
42783 for (i = 0; i < 16; i += 2)
42784 if (d->perm[i] + 16 != d->perm[i + 16])
42785 goto use_pblendvb;
42786
42787 /* Use vpblendw. */
42788 for (i = 0; i < 16; ++i)
42789 mask |= (d->perm[i * 2] >= 32) << i;
42790 vmode = V16HImode;
42791 goto do_subreg;
42792 }
42793
42794 /* Use vpblendd. */
42795 for (i = 0; i < 8; ++i)
42796 mask |= (d->perm[i * 4] >= 32) << i;
42797 vmode = V8SImode;
42798 goto do_subreg;
42799
42800 case V16HImode:
42801 /* See if words move in pairs. If yes, vpblendd can be used. */
42802 for (i = 0; i < 16; i += 2)
42803 if (d->perm[i] + 1 != d->perm[i + 1])
42804 break;
42805 if (i < 16)
42806 {
42807 /* See if words move the same in both lanes. If not,
42808 vpblendvb must be used. */
42809 for (i = 0; i < 8; i++)
42810 if (d->perm[i] + 8 != d->perm[i + 8])
42811 {
42812 /* Use vpblendvb. */
42813 for (i = 0; i < 32; ++i)
42814 rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx);
42815
42816 vmode = V32QImode;
42817 nelt = 32;
42818 target = gen_reg_rtx (vmode);
42819 op0 = gen_lowpart (vmode, op0);
42820 op1 = gen_lowpart (vmode, op1);
42821 goto finish_pblendvb;
42822 }
42823
42824 /* Use vpblendw. */
42825 for (i = 0; i < 16; ++i)
42826 mask |= (d->perm[i] >= 16) << i;
42827 break;
42828 }
42829
42830 /* Use vpblendd. */
42831 for (i = 0; i < 8; ++i)
42832 mask |= (d->perm[i * 2] >= 16) << i;
42833 vmode = V8SImode;
42834 goto do_subreg;
42835
42836 case V4DImode:
42837 /* Use vpblendd. */
42838 for (i = 0; i < 4; ++i)
42839 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
42840 vmode = V8SImode;
42841 goto do_subreg;
42842
42843 default:
42844 gcc_unreachable ();
42845 }
42846
42847 /* This matches five different patterns with the different modes. */
42848 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
42849 x = gen_rtx_SET (VOIDmode, target, x);
42850 emit_insn (x);
42851 if (target != d->target)
42852 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42853
42854 return true;
42855 }
42856
42857 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
42858 in terms of the variable form of vpermilps.
42859
42860 Note that we will have already failed the immediate input vpermilps,
42861 which requires that the high and low part shuffle be identical; the
42862 variable form doesn't require that. */
42863
42864 static bool
42865 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
42866 {
42867 rtx rperm[8], vperm;
42868 unsigned i;
42869
42870 if (!TARGET_AVX || d->vmode != V8SFmode || !d->one_operand_p)
42871 return false;
42872
42873 /* We can only permute within the 128-bit lane. */
42874 for (i = 0; i < 8; ++i)
42875 {
42876 unsigned e = d->perm[i];
42877 if (i < 4 ? e >= 4 : e < 4)
42878 return false;
42879 }
42880
42881 if (d->testing_p)
42882 return true;
42883
42884 for (i = 0; i < 8; ++i)
42885 {
42886 unsigned e = d->perm[i];
42887
42888 /* Within each 128-bit lane, the elements of op0 are numbered
42889 from 0 and the elements of op1 are numbered from 4. */
42890 if (e >= 8 + 4)
42891 e -= 8;
42892 else if (e >= 4)
42893 e -= 4;
42894
42895 rperm[i] = GEN_INT (e);
42896 }
42897
42898 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
42899 vperm = force_reg (V8SImode, vperm);
42900 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
42901
42902 return true;
42903 }
42904
42905 /* Return true if permutation D can be performed as VMODE permutation
42906 instead. */
42907
42908 static bool
42909 valid_perm_using_mode_p (enum machine_mode vmode, struct expand_vec_perm_d *d)
42910 {
42911 unsigned int i, j, chunk;
42912
42913 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT
42914 || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT
42915 || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode))
42916 return false;
42917
42918 if (GET_MODE_NUNITS (vmode) >= d->nelt)
42919 return true;
42920
42921 chunk = d->nelt / GET_MODE_NUNITS (vmode);
42922 for (i = 0; i < d->nelt; i += chunk)
42923 if (d->perm[i] & (chunk - 1))
42924 return false;
42925 else
42926 for (j = 1; j < chunk; ++j)
42927 if (d->perm[i] + j != d->perm[i + j])
42928 return false;
42929
42930 return true;
42931 }
42932
42933 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
42934 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
42935
42936 static bool
42937 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
42938 {
42939 unsigned i, nelt, eltsz, mask;
42940 unsigned char perm[32];
42941 enum machine_mode vmode = V16QImode;
42942 rtx rperm[32], vperm, target, op0, op1;
42943
42944 nelt = d->nelt;
42945
42946 if (!d->one_operand_p)
42947 {
42948 if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
42949 {
42950 if (TARGET_AVX2
42951 && valid_perm_using_mode_p (V2TImode, d))
42952 {
42953 if (d->testing_p)
42954 return true;
42955
42956 /* Use vperm2i128 insn. The pattern uses
42957 V4DImode instead of V2TImode. */
42958 target = d->target;
42959 if (d->vmode != V4DImode)
42960 target = gen_reg_rtx (V4DImode);
42961 op0 = gen_lowpart (V4DImode, d->op0);
42962 op1 = gen_lowpart (V4DImode, d->op1);
42963 rperm[0]
42964 = GEN_INT ((d->perm[0] / (nelt / 2))
42965 | ((d->perm[nelt / 2] / (nelt / 2)) * 16));
42966 emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
42967 if (target != d->target)
42968 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42969 return true;
42970 }
42971 return false;
42972 }
42973 }
42974 else
42975 {
42976 if (GET_MODE_SIZE (d->vmode) == 16)
42977 {
42978 if (!TARGET_SSSE3)
42979 return false;
42980 }
42981 else if (GET_MODE_SIZE (d->vmode) == 32)
42982 {
42983 if (!TARGET_AVX2)
42984 return false;
42985
42986 /* V4DImode should be already handled through
42987 expand_vselect by vpermq instruction. */
42988 gcc_assert (d->vmode != V4DImode);
42989
42990 vmode = V32QImode;
42991 if (d->vmode == V8SImode
42992 || d->vmode == V16HImode
42993 || d->vmode == V32QImode)
42994 {
42995 /* First see if vpermq can be used for
42996 V8SImode/V16HImode/V32QImode. */
42997 if (valid_perm_using_mode_p (V4DImode, d))
42998 {
42999 for (i = 0; i < 4; i++)
43000 perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
43001 if (d->testing_p)
43002 return true;
43003 target = gen_reg_rtx (V4DImode);
43004 if (expand_vselect (target, gen_lowpart (V4DImode, d->op0),
43005 perm, 4, false))
43006 {
43007 emit_move_insn (d->target,
43008 gen_lowpart (d->vmode, target));
43009 return true;
43010 }
43011 return false;
43012 }
43013
43014 /* Next see if vpermd can be used. */
43015 if (valid_perm_using_mode_p (V8SImode, d))
43016 vmode = V8SImode;
43017 }
43018 /* Or if vpermps can be used. */
43019 else if (d->vmode == V8SFmode)
43020 vmode = V8SImode;
43021
43022 if (vmode == V32QImode)
43023 {
43024 /* vpshufb only works intra lanes, it is not
43025 possible to shuffle bytes in between the lanes. */
43026 for (i = 0; i < nelt; ++i)
43027 if ((d->perm[i] ^ i) & (nelt / 2))
43028 return false;
43029 }
43030 }
43031 else
43032 return false;
43033 }
43034
43035 if (d->testing_p)
43036 return true;
43037
43038 if (vmode == V8SImode)
43039 for (i = 0; i < 8; ++i)
43040 rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7);
43041 else
43042 {
43043 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
43044 if (!d->one_operand_p)
43045 mask = 2 * nelt - 1;
43046 else if (vmode == V16QImode)
43047 mask = nelt - 1;
43048 else
43049 mask = nelt / 2 - 1;
43050
43051 for (i = 0; i < nelt; ++i)
43052 {
43053 unsigned j, e = d->perm[i] & mask;
43054 for (j = 0; j < eltsz; ++j)
43055 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
43056 }
43057 }
43058
43059 vperm = gen_rtx_CONST_VECTOR (vmode,
43060 gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
43061 vperm = force_reg (vmode, vperm);
43062
43063 target = d->target;
43064 if (d->vmode != vmode)
43065 target = gen_reg_rtx (vmode);
43066 op0 = gen_lowpart (vmode, d->op0);
43067 if (d->one_operand_p)
43068 {
43069 if (vmode == V16QImode)
43070 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
43071 else if (vmode == V32QImode)
43072 emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
43073 else if (vmode == V8SFmode)
43074 emit_insn (gen_avx2_permvarv8sf (target, op0, vperm));
43075 else
43076 emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
43077 }
43078 else
43079 {
43080 op1 = gen_lowpart (vmode, d->op1);
43081 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
43082 }
43083 if (target != d->target)
43084 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
43085
43086 return true;
43087 }
43088
43089 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
43090 in a single instruction. */
43091
43092 static bool
43093 expand_vec_perm_1 (struct expand_vec_perm_d *d)
43094 {
43095 unsigned i, nelt = d->nelt;
43096 unsigned char perm2[MAX_VECT_LEN];
43097
43098 /* Check plain VEC_SELECT first, because AVX has instructions that could
43099 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
43100 input where SEL+CONCAT may not. */
43101 if (d->one_operand_p)
43102 {
43103 int mask = nelt - 1;
43104 bool identity_perm = true;
43105 bool broadcast_perm = true;
43106
43107 for (i = 0; i < nelt; i++)
43108 {
43109 perm2[i] = d->perm[i] & mask;
43110 if (perm2[i] != i)
43111 identity_perm = false;
43112 if (perm2[i])
43113 broadcast_perm = false;
43114 }
43115
43116 if (identity_perm)
43117 {
43118 if (!d->testing_p)
43119 emit_move_insn (d->target, d->op0);
43120 return true;
43121 }
43122 else if (broadcast_perm && TARGET_AVX2)
43123 {
43124 /* Use vpbroadcast{b,w,d}. */
43125 rtx (*gen) (rtx, rtx) = NULL;
43126 switch (d->vmode)
43127 {
43128 case V32QImode:
43129 gen = gen_avx2_pbroadcastv32qi_1;
43130 break;
43131 case V16HImode:
43132 gen = gen_avx2_pbroadcastv16hi_1;
43133 break;
43134 case V8SImode:
43135 gen = gen_avx2_pbroadcastv8si_1;
43136 break;
43137 case V16QImode:
43138 gen = gen_avx2_pbroadcastv16qi;
43139 break;
43140 case V8HImode:
43141 gen = gen_avx2_pbroadcastv8hi;
43142 break;
43143 case V8SFmode:
43144 gen = gen_avx2_vec_dupv8sf_1;
43145 break;
43146 /* For other modes prefer other shuffles this function creates. */
43147 default: break;
43148 }
43149 if (gen != NULL)
43150 {
43151 if (!d->testing_p)
43152 emit_insn (gen (d->target, d->op0));
43153 return true;
43154 }
43155 }
43156
43157 if (expand_vselect (d->target, d->op0, perm2, nelt, d->testing_p))
43158 return true;
43159
43160 /* There are plenty of patterns in sse.md that are written for
43161 SEL+CONCAT and are not replicated for a single op. Perhaps
43162 that should be changed, to avoid the nastiness here. */
43163
43164 /* Recognize interleave style patterns, which means incrementing
43165 every other permutation operand. */
43166 for (i = 0; i < nelt; i += 2)
43167 {
43168 perm2[i] = d->perm[i] & mask;
43169 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
43170 }
43171 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
43172 d->testing_p))
43173 return true;
43174
43175 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
43176 if (nelt >= 4)
43177 {
43178 for (i = 0; i < nelt; i += 4)
43179 {
43180 perm2[i + 0] = d->perm[i + 0] & mask;
43181 perm2[i + 1] = d->perm[i + 1] & mask;
43182 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
43183 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
43184 }
43185
43186 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
43187 d->testing_p))
43188 return true;
43189 }
43190 }
43191
43192 /* Finally, try the fully general two operand permute. */
43193 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt,
43194 d->testing_p))
43195 return true;
43196
43197 /* Recognize interleave style patterns with reversed operands. */
43198 if (!d->one_operand_p)
43199 {
43200 for (i = 0; i < nelt; ++i)
43201 {
43202 unsigned e = d->perm[i];
43203 if (e >= nelt)
43204 e -= nelt;
43205 else
43206 e += nelt;
43207 perm2[i] = e;
43208 }
43209
43210 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt,
43211 d->testing_p))
43212 return true;
43213 }
43214
43215 /* Try the SSE4.1 blend variable merge instructions. */
43216 if (expand_vec_perm_blend (d))
43217 return true;
43218
43219 /* Try one of the AVX vpermil variable permutations. */
43220 if (expand_vec_perm_vpermil (d))
43221 return true;
43222
43223 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
43224 vpshufb, vpermd, vpermps or vpermq variable permutation. */
43225 if (expand_vec_perm_pshufb (d))
43226 return true;
43227
43228 /* Try the AVX512F vpermi2 instructions. */
43229 rtx vec[64];
43230 enum machine_mode mode = d->vmode;
43231 if (mode == V8DFmode)
43232 mode = V8DImode;
43233 else if (mode == V16SFmode)
43234 mode = V16SImode;
43235 for (i = 0; i < nelt; ++i)
43236 vec[i] = GEN_INT (d->perm[i]);
43237 rtx mask = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt, vec));
43238 if (ix86_expand_vec_perm_vpermi2 (d->target, d->op0, mask, d->op1))
43239 return true;
43240
43241 return false;
43242 }
43243
43244 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
43245 in terms of a pair of pshuflw + pshufhw instructions. */
43246
43247 static bool
43248 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
43249 {
43250 unsigned char perm2[MAX_VECT_LEN];
43251 unsigned i;
43252 bool ok;
43253
43254 if (d->vmode != V8HImode || !d->one_operand_p)
43255 return false;
43256
43257 /* The two permutations only operate in 64-bit lanes. */
43258 for (i = 0; i < 4; ++i)
43259 if (d->perm[i] >= 4)
43260 return false;
43261 for (i = 4; i < 8; ++i)
43262 if (d->perm[i] < 4)
43263 return false;
43264
43265 if (d->testing_p)
43266 return true;
43267
43268 /* Emit the pshuflw. */
43269 memcpy (perm2, d->perm, 4);
43270 for (i = 4; i < 8; ++i)
43271 perm2[i] = i;
43272 ok = expand_vselect (d->target, d->op0, perm2, 8, d->testing_p);
43273 gcc_assert (ok);
43274
43275 /* Emit the pshufhw. */
43276 memcpy (perm2 + 4, d->perm + 4, 4);
43277 for (i = 0; i < 4; ++i)
43278 perm2[i] = i;
43279 ok = expand_vselect (d->target, d->target, perm2, 8, d->testing_p);
43280 gcc_assert (ok);
43281
43282 return true;
43283 }
43284
43285 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43286 the permutation using the SSSE3 palignr instruction. This succeeds
43287 when all of the elements in PERM fit within one vector and we merely
43288 need to shift them down so that a single vector permutation has a
43289 chance to succeed. */
43290
43291 static bool
43292 expand_vec_perm_palignr (struct expand_vec_perm_d *d)
43293 {
43294 unsigned i, nelt = d->nelt;
43295 unsigned min, max;
43296 bool in_order, ok;
43297 rtx shift, target;
43298 struct expand_vec_perm_d dcopy;
43299
43300 /* Even with AVX, palignr only operates on 128-bit vectors. */
43301 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
43302 return false;
43303
43304 min = nelt, max = 0;
43305 for (i = 0; i < nelt; ++i)
43306 {
43307 unsigned e = d->perm[i];
43308 if (e < min)
43309 min = e;
43310 if (e > max)
43311 max = e;
43312 }
43313 if (min == 0 || max - min >= nelt)
43314 return false;
43315
43316 /* Given that we have SSSE3, we know we'll be able to implement the
43317 single operand permutation after the palignr with pshufb. */
43318 if (d->testing_p)
43319 return true;
43320
43321 dcopy = *d;
43322 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
43323 target = gen_reg_rtx (TImode);
43324 emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, d->op1),
43325 gen_lowpart (TImode, d->op0), shift));
43326
43327 dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target);
43328 dcopy.one_operand_p = true;
43329
43330 in_order = true;
43331 for (i = 0; i < nelt; ++i)
43332 {
43333 unsigned e = dcopy.perm[i] - min;
43334 if (e != i)
43335 in_order = false;
43336 dcopy.perm[i] = e;
43337 }
43338
43339 /* Test for the degenerate case where the alignment by itself
43340 produces the desired permutation. */
43341 if (in_order)
43342 {
43343 emit_move_insn (d->target, dcopy.op0);
43344 return true;
43345 }
43346
43347 ok = expand_vec_perm_1 (&dcopy);
43348 gcc_assert (ok);
43349
43350 return ok;
43351 }
43352
43353 /* A subroutine of ix86_expand_vec_perm_const_1. Try to simplify
43354 the permutation using the SSE4_1 pblendv instruction. Potentially
43355 reduces permutaion from 2 pshufb and or to 1 pshufb and pblendv. */
43356
43357 static bool
43358 expand_vec_perm_pblendv (struct expand_vec_perm_d *d)
43359 {
43360 unsigned i, which, nelt = d->nelt;
43361 struct expand_vec_perm_d dcopy, dcopy1;
43362 enum machine_mode vmode = d->vmode;
43363 bool ok;
43364
43365 /* Use the same checks as in expand_vec_perm_blend, but skipping
43366 AVX and AVX2 as they require more than 2 instructions. */
43367 if (d->one_operand_p)
43368 return false;
43369 if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
43370 ;
43371 else
43372 return false;
43373
43374 /* Figure out where permutation elements stay not in their
43375 respective lanes. */
43376 for (i = 0, which = 0; i < nelt; ++i)
43377 {
43378 unsigned e = d->perm[i];
43379 if (e != i)
43380 which |= (e < nelt ? 1 : 2);
43381 }
43382 /* We can pblend the part where elements stay not in their
43383 respective lanes only when these elements are all in one
43384 half of a permutation.
43385 {0 1 8 3 4 5 9 7} is ok as 8, 9 are at not at their respective
43386 lanes, but both 8 and 9 >= 8
43387 {0 1 8 3 4 5 2 7} is not ok as 2 and 8 are not at their
43388 respective lanes and 8 >= 8, but 2 not. */
43389 if (which != 1 && which != 2)
43390 return false;
43391 if (d->testing_p)
43392 return true;
43393
43394 /* First we apply one operand permutation to the part where
43395 elements stay not in their respective lanes. */
43396 dcopy = *d;
43397 if (which == 2)
43398 dcopy.op0 = dcopy.op1 = d->op1;
43399 else
43400 dcopy.op0 = dcopy.op1 = d->op0;
43401 dcopy.one_operand_p = true;
43402
43403 for (i = 0; i < nelt; ++i)
43404 dcopy.perm[i] = d->perm[i] & (nelt - 1);
43405
43406 ok = expand_vec_perm_1 (&dcopy);
43407 gcc_assert (ok);
43408
43409 /* Next we put permuted elements into their positions. */
43410 dcopy1 = *d;
43411 if (which == 2)
43412 dcopy1.op1 = dcopy.target;
43413 else
43414 dcopy1.op0 = dcopy.target;
43415
43416 for (i = 0; i < nelt; ++i)
43417 dcopy1.perm[i] = ((d->perm[i] >= nelt) ? (nelt + i) : i);
43418
43419 ok = expand_vec_perm_blend (&dcopy1);
43420 gcc_assert (ok);
43421
43422 return true;
43423 }
43424
43425 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
43426
43427 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43428 a two vector permutation into a single vector permutation by using
43429 an interleave operation to merge the vectors. */
43430
43431 static bool
43432 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
43433 {
43434 struct expand_vec_perm_d dremap, dfinal;
43435 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
43436 unsigned HOST_WIDE_INT contents;
43437 unsigned char remap[2 * MAX_VECT_LEN];
43438 rtx_insn *seq;
43439 bool ok, same_halves = false;
43440
43441 if (GET_MODE_SIZE (d->vmode) == 16)
43442 {
43443 if (d->one_operand_p)
43444 return false;
43445 }
43446 else if (GET_MODE_SIZE (d->vmode) == 32)
43447 {
43448 if (!TARGET_AVX)
43449 return false;
43450 /* For 32-byte modes allow even d->one_operand_p.
43451 The lack of cross-lane shuffling in some instructions
43452 might prevent a single insn shuffle. */
43453 dfinal = *d;
43454 dfinal.testing_p = true;
43455 /* If expand_vec_perm_interleave3 can expand this into
43456 a 3 insn sequence, give up and let it be expanded as
43457 3 insn sequence. While that is one insn longer,
43458 it doesn't need a memory operand and in the common
43459 case that both interleave low and high permutations
43460 with the same operands are adjacent needs 4 insns
43461 for both after CSE. */
43462 if (expand_vec_perm_interleave3 (&dfinal))
43463 return false;
43464 }
43465 else
43466 return false;
43467
43468 /* Examine from whence the elements come. */
43469 contents = 0;
43470 for (i = 0; i < nelt; ++i)
43471 contents |= ((unsigned HOST_WIDE_INT) 1) << d->perm[i];
43472
43473 memset (remap, 0xff, sizeof (remap));
43474 dremap = *d;
43475
43476 if (GET_MODE_SIZE (d->vmode) == 16)
43477 {
43478 unsigned HOST_WIDE_INT h1, h2, h3, h4;
43479
43480 /* Split the two input vectors into 4 halves. */
43481 h1 = (((unsigned HOST_WIDE_INT) 1) << nelt2) - 1;
43482 h2 = h1 << nelt2;
43483 h3 = h2 << nelt2;
43484 h4 = h3 << nelt2;
43485
43486 /* If the elements from the low halves use interleave low, and similarly
43487 for interleave high. If the elements are from mis-matched halves, we
43488 can use shufps for V4SF/V4SI or do a DImode shuffle. */
43489 if ((contents & (h1 | h3)) == contents)
43490 {
43491 /* punpckl* */
43492 for (i = 0; i < nelt2; ++i)
43493 {
43494 remap[i] = i * 2;
43495 remap[i + nelt] = i * 2 + 1;
43496 dremap.perm[i * 2] = i;
43497 dremap.perm[i * 2 + 1] = i + nelt;
43498 }
43499 if (!TARGET_SSE2 && d->vmode == V4SImode)
43500 dremap.vmode = V4SFmode;
43501 }
43502 else if ((contents & (h2 | h4)) == contents)
43503 {
43504 /* punpckh* */
43505 for (i = 0; i < nelt2; ++i)
43506 {
43507 remap[i + nelt2] = i * 2;
43508 remap[i + nelt + nelt2] = i * 2 + 1;
43509 dremap.perm[i * 2] = i + nelt2;
43510 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
43511 }
43512 if (!TARGET_SSE2 && d->vmode == V4SImode)
43513 dremap.vmode = V4SFmode;
43514 }
43515 else if ((contents & (h1 | h4)) == contents)
43516 {
43517 /* shufps */
43518 for (i = 0; i < nelt2; ++i)
43519 {
43520 remap[i] = i;
43521 remap[i + nelt + nelt2] = i + nelt2;
43522 dremap.perm[i] = i;
43523 dremap.perm[i + nelt2] = i + nelt + nelt2;
43524 }
43525 if (nelt != 4)
43526 {
43527 /* shufpd */
43528 dremap.vmode = V2DImode;
43529 dremap.nelt = 2;
43530 dremap.perm[0] = 0;
43531 dremap.perm[1] = 3;
43532 }
43533 }
43534 else if ((contents & (h2 | h3)) == contents)
43535 {
43536 /* shufps */
43537 for (i = 0; i < nelt2; ++i)
43538 {
43539 remap[i + nelt2] = i;
43540 remap[i + nelt] = i + nelt2;
43541 dremap.perm[i] = i + nelt2;
43542 dremap.perm[i + nelt2] = i + nelt;
43543 }
43544 if (nelt != 4)
43545 {
43546 /* shufpd */
43547 dremap.vmode = V2DImode;
43548 dremap.nelt = 2;
43549 dremap.perm[0] = 1;
43550 dremap.perm[1] = 2;
43551 }
43552 }
43553 else
43554 return false;
43555 }
43556 else
43557 {
43558 unsigned int nelt4 = nelt / 4, nzcnt = 0;
43559 unsigned HOST_WIDE_INT q[8];
43560 unsigned int nonzero_halves[4];
43561
43562 /* Split the two input vectors into 8 quarters. */
43563 q[0] = (((unsigned HOST_WIDE_INT) 1) << nelt4) - 1;
43564 for (i = 1; i < 8; ++i)
43565 q[i] = q[0] << (nelt4 * i);
43566 for (i = 0; i < 4; ++i)
43567 if (((q[2 * i] | q[2 * i + 1]) & contents) != 0)
43568 {
43569 nonzero_halves[nzcnt] = i;
43570 ++nzcnt;
43571 }
43572
43573 if (nzcnt == 1)
43574 {
43575 gcc_assert (d->one_operand_p);
43576 nonzero_halves[1] = nonzero_halves[0];
43577 same_halves = true;
43578 }
43579 else if (d->one_operand_p)
43580 {
43581 gcc_assert (nonzero_halves[0] == 0);
43582 gcc_assert (nonzero_halves[1] == 1);
43583 }
43584
43585 if (nzcnt <= 2)
43586 {
43587 if (d->perm[0] / nelt2 == nonzero_halves[1])
43588 {
43589 /* Attempt to increase the likelihood that dfinal
43590 shuffle will be intra-lane. */
43591 char tmph = nonzero_halves[0];
43592 nonzero_halves[0] = nonzero_halves[1];
43593 nonzero_halves[1] = tmph;
43594 }
43595
43596 /* vperm2f128 or vperm2i128. */
43597 for (i = 0; i < nelt2; ++i)
43598 {
43599 remap[i + nonzero_halves[1] * nelt2] = i + nelt2;
43600 remap[i + nonzero_halves[0] * nelt2] = i;
43601 dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2;
43602 dremap.perm[i] = i + nonzero_halves[0] * nelt2;
43603 }
43604
43605 if (d->vmode != V8SFmode
43606 && d->vmode != V4DFmode
43607 && d->vmode != V8SImode)
43608 {
43609 dremap.vmode = V8SImode;
43610 dremap.nelt = 8;
43611 for (i = 0; i < 4; ++i)
43612 {
43613 dremap.perm[i] = i + nonzero_halves[0] * 4;
43614 dremap.perm[i + 4] = i + nonzero_halves[1] * 4;
43615 }
43616 }
43617 }
43618 else if (d->one_operand_p)
43619 return false;
43620 else if (TARGET_AVX2
43621 && (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
43622 {
43623 /* vpunpckl* */
43624 for (i = 0; i < nelt4; ++i)
43625 {
43626 remap[i] = i * 2;
43627 remap[i + nelt] = i * 2 + 1;
43628 remap[i + nelt2] = i * 2 + nelt2;
43629 remap[i + nelt + nelt2] = i * 2 + nelt2 + 1;
43630 dremap.perm[i * 2] = i;
43631 dremap.perm[i * 2 + 1] = i + nelt;
43632 dremap.perm[i * 2 + nelt2] = i + nelt2;
43633 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2;
43634 }
43635 }
43636 else if (TARGET_AVX2
43637 && (contents & (q[1] | q[3] | q[5] | q[7])) == contents)
43638 {
43639 /* vpunpckh* */
43640 for (i = 0; i < nelt4; ++i)
43641 {
43642 remap[i + nelt4] = i * 2;
43643 remap[i + nelt + nelt4] = i * 2 + 1;
43644 remap[i + nelt2 + nelt4] = i * 2 + nelt2;
43645 remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1;
43646 dremap.perm[i * 2] = i + nelt4;
43647 dremap.perm[i * 2 + 1] = i + nelt + nelt4;
43648 dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4;
43649 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4;
43650 }
43651 }
43652 else
43653 return false;
43654 }
43655
43656 /* Use the remapping array set up above to move the elements from their
43657 swizzled locations into their final destinations. */
43658 dfinal = *d;
43659 for (i = 0; i < nelt; ++i)
43660 {
43661 unsigned e = remap[d->perm[i]];
43662 gcc_assert (e < nelt);
43663 /* If same_halves is true, both halves of the remapped vector are the
43664 same. Avoid cross-lane accesses if possible. */
43665 if (same_halves && i >= nelt2)
43666 {
43667 gcc_assert (e < nelt2);
43668 dfinal.perm[i] = e + nelt2;
43669 }
43670 else
43671 dfinal.perm[i] = e;
43672 }
43673 if (!d->testing_p)
43674 {
43675 dremap.target = gen_reg_rtx (dremap.vmode);
43676 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
43677 }
43678 dfinal.op1 = dfinal.op0;
43679 dfinal.one_operand_p = true;
43680
43681 /* Test if the final remap can be done with a single insn. For V4SFmode or
43682 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
43683 start_sequence ();
43684 ok = expand_vec_perm_1 (&dfinal);
43685 seq = get_insns ();
43686 end_sequence ();
43687
43688 if (!ok)
43689 return false;
43690
43691 if (d->testing_p)
43692 return true;
43693
43694 if (dremap.vmode != dfinal.vmode)
43695 {
43696 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
43697 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
43698 }
43699
43700 ok = expand_vec_perm_1 (&dremap);
43701 gcc_assert (ok);
43702
43703 emit_insn (seq);
43704 return true;
43705 }
43706
43707 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43708 a single vector cross-lane permutation into vpermq followed
43709 by any of the single insn permutations. */
43710
43711 static bool
43712 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
43713 {
43714 struct expand_vec_perm_d dremap, dfinal;
43715 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4;
43716 unsigned contents[2];
43717 bool ok;
43718
43719 if (!(TARGET_AVX2
43720 && (d->vmode == V32QImode || d->vmode == V16HImode)
43721 && d->one_operand_p))
43722 return false;
43723
43724 contents[0] = 0;
43725 contents[1] = 0;
43726 for (i = 0; i < nelt2; ++i)
43727 {
43728 contents[0] |= 1u << (d->perm[i] / nelt4);
43729 contents[1] |= 1u << (d->perm[i + nelt2] / nelt4);
43730 }
43731
43732 for (i = 0; i < 2; ++i)
43733 {
43734 unsigned int cnt = 0;
43735 for (j = 0; j < 4; ++j)
43736 if ((contents[i] & (1u << j)) != 0 && ++cnt > 2)
43737 return false;
43738 }
43739
43740 if (d->testing_p)
43741 return true;
43742
43743 dremap = *d;
43744 dremap.vmode = V4DImode;
43745 dremap.nelt = 4;
43746 dremap.target = gen_reg_rtx (V4DImode);
43747 dremap.op0 = gen_lowpart (V4DImode, d->op0);
43748 dremap.op1 = dremap.op0;
43749 dremap.one_operand_p = true;
43750 for (i = 0; i < 2; ++i)
43751 {
43752 unsigned int cnt = 0;
43753 for (j = 0; j < 4; ++j)
43754 if ((contents[i] & (1u << j)) != 0)
43755 dremap.perm[2 * i + cnt++] = j;
43756 for (; cnt < 2; ++cnt)
43757 dremap.perm[2 * i + cnt] = 0;
43758 }
43759
43760 dfinal = *d;
43761 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
43762 dfinal.op1 = dfinal.op0;
43763 dfinal.one_operand_p = true;
43764 for (i = 0, j = 0; i < nelt; ++i)
43765 {
43766 if (i == nelt2)
43767 j = 2;
43768 dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0);
43769 if ((d->perm[i] / nelt4) == dremap.perm[j])
43770 ;
43771 else if ((d->perm[i] / nelt4) == dremap.perm[j + 1])
43772 dfinal.perm[i] |= nelt4;
43773 else
43774 gcc_unreachable ();
43775 }
43776
43777 ok = expand_vec_perm_1 (&dremap);
43778 gcc_assert (ok);
43779
43780 ok = expand_vec_perm_1 (&dfinal);
43781 gcc_assert (ok);
43782
43783 return true;
43784 }
43785
43786 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
43787 a vector permutation using two instructions, vperm2f128 resp.
43788 vperm2i128 followed by any single in-lane permutation. */
43789
43790 static bool
43791 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d)
43792 {
43793 struct expand_vec_perm_d dfirst, dsecond;
43794 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, perm;
43795 bool ok;
43796
43797 if (!TARGET_AVX
43798 || GET_MODE_SIZE (d->vmode) != 32
43799 || (d->vmode != V8SFmode && d->vmode != V4DFmode && !TARGET_AVX2))
43800 return false;
43801
43802 dsecond = *d;
43803 dsecond.one_operand_p = false;
43804 dsecond.testing_p = true;
43805
43806 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
43807 immediate. For perm < 16 the second permutation uses
43808 d->op0 as first operand, for perm >= 16 it uses d->op1
43809 as first operand. The second operand is the result of
43810 vperm2[fi]128. */
43811 for (perm = 0; perm < 32; perm++)
43812 {
43813 /* Ignore permutations which do not move anything cross-lane. */
43814 if (perm < 16)
43815 {
43816 /* The second shuffle for e.g. V4DFmode has
43817 0123 and ABCD operands.
43818 Ignore AB23, as 23 is already in the second lane
43819 of the first operand. */
43820 if ((perm & 0xc) == (1 << 2)) continue;
43821 /* And 01CD, as 01 is in the first lane of the first
43822 operand. */
43823 if ((perm & 3) == 0) continue;
43824 /* And 4567, as then the vperm2[fi]128 doesn't change
43825 anything on the original 4567 second operand. */
43826 if ((perm & 0xf) == ((3 << 2) | 2)) continue;
43827 }
43828 else
43829 {
43830 /* The second shuffle for e.g. V4DFmode has
43831 4567 and ABCD operands.
43832 Ignore AB67, as 67 is already in the second lane
43833 of the first operand. */
43834 if ((perm & 0xc) == (3 << 2)) continue;
43835 /* And 45CD, as 45 is in the first lane of the first
43836 operand. */
43837 if ((perm & 3) == 2) continue;
43838 /* And 0123, as then the vperm2[fi]128 doesn't change
43839 anything on the original 0123 first operand. */
43840 if ((perm & 0xf) == (1 << 2)) continue;
43841 }
43842
43843 for (i = 0; i < nelt; i++)
43844 {
43845 j = d->perm[i] / nelt2;
43846 if (j == ((perm >> (2 * (i >= nelt2))) & 3))
43847 dsecond.perm[i] = nelt + (i & nelt2) + (d->perm[i] & (nelt2 - 1));
43848 else if (j == (unsigned) (i >= nelt2) + 2 * (perm >= 16))
43849 dsecond.perm[i] = d->perm[i] & (nelt - 1);
43850 else
43851 break;
43852 }
43853
43854 if (i == nelt)
43855 {
43856 start_sequence ();
43857 ok = expand_vec_perm_1 (&dsecond);
43858 end_sequence ();
43859 }
43860 else
43861 ok = false;
43862
43863 if (ok)
43864 {
43865 if (d->testing_p)
43866 return true;
43867
43868 /* Found a usable second shuffle. dfirst will be
43869 vperm2f128 on d->op0 and d->op1. */
43870 dsecond.testing_p = false;
43871 dfirst = *d;
43872 dfirst.target = gen_reg_rtx (d->vmode);
43873 for (i = 0; i < nelt; i++)
43874 dfirst.perm[i] = (i & (nelt2 - 1))
43875 + ((perm >> (2 * (i >= nelt2))) & 3) * nelt2;
43876
43877 canonicalize_perm (&dfirst);
43878 ok = expand_vec_perm_1 (&dfirst);
43879 gcc_assert (ok);
43880
43881 /* And dsecond is some single insn shuffle, taking
43882 d->op0 and result of vperm2f128 (if perm < 16) or
43883 d->op1 and result of vperm2f128 (otherwise). */
43884 if (perm >= 16)
43885 dsecond.op0 = dsecond.op1;
43886 dsecond.op1 = dfirst.target;
43887
43888 ok = expand_vec_perm_1 (&dsecond);
43889 gcc_assert (ok);
43890
43891 return true;
43892 }
43893
43894 /* For one operand, the only useful vperm2f128 permutation is 0x10. */
43895 if (d->one_operand_p)
43896 return false;
43897 }
43898
43899 return false;
43900 }
43901
43902 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43903 a two vector permutation using 2 intra-lane interleave insns
43904 and cross-lane shuffle for 32-byte vectors. */
43905
43906 static bool
43907 expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
43908 {
43909 unsigned i, nelt;
43910 rtx (*gen) (rtx, rtx, rtx);
43911
43912 if (d->one_operand_p)
43913 return false;
43914 if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
43915 ;
43916 else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
43917 ;
43918 else
43919 return false;
43920
43921 nelt = d->nelt;
43922 if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
43923 return false;
43924 for (i = 0; i < nelt; i += 2)
43925 if (d->perm[i] != d->perm[0] + i / 2
43926 || d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
43927 return false;
43928
43929 if (d->testing_p)
43930 return true;
43931
43932 switch (d->vmode)
43933 {
43934 case V32QImode:
43935 if (d->perm[0])
43936 gen = gen_vec_interleave_highv32qi;
43937 else
43938 gen = gen_vec_interleave_lowv32qi;
43939 break;
43940 case V16HImode:
43941 if (d->perm[0])
43942 gen = gen_vec_interleave_highv16hi;
43943 else
43944 gen = gen_vec_interleave_lowv16hi;
43945 break;
43946 case V8SImode:
43947 if (d->perm[0])
43948 gen = gen_vec_interleave_highv8si;
43949 else
43950 gen = gen_vec_interleave_lowv8si;
43951 break;
43952 case V4DImode:
43953 if (d->perm[0])
43954 gen = gen_vec_interleave_highv4di;
43955 else
43956 gen = gen_vec_interleave_lowv4di;
43957 break;
43958 case V8SFmode:
43959 if (d->perm[0])
43960 gen = gen_vec_interleave_highv8sf;
43961 else
43962 gen = gen_vec_interleave_lowv8sf;
43963 break;
43964 case V4DFmode:
43965 if (d->perm[0])
43966 gen = gen_vec_interleave_highv4df;
43967 else
43968 gen = gen_vec_interleave_lowv4df;
43969 break;
43970 default:
43971 gcc_unreachable ();
43972 }
43973
43974 emit_insn (gen (d->target, d->op0, d->op1));
43975 return true;
43976 }
43977
43978 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
43979 a single vector permutation using a single intra-lane vector
43980 permutation, vperm2f128 swapping the lanes and vblend* insn blending
43981 the non-swapped and swapped vectors together. */
43982
43983 static bool
43984 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
43985 {
43986 struct expand_vec_perm_d dfirst, dsecond;
43987 unsigned i, j, msk, nelt = d->nelt, nelt2 = nelt / 2;
43988 rtx_insn *seq;
43989 bool ok;
43990 rtx (*blend) (rtx, rtx, rtx, rtx) = NULL;
43991
43992 if (!TARGET_AVX
43993 || TARGET_AVX2
43994 || (d->vmode != V8SFmode && d->vmode != V4DFmode)
43995 || !d->one_operand_p)
43996 return false;
43997
43998 dfirst = *d;
43999 for (i = 0; i < nelt; i++)
44000 dfirst.perm[i] = 0xff;
44001 for (i = 0, msk = 0; i < nelt; i++)
44002 {
44003 j = (d->perm[i] & nelt2) ? i | nelt2 : i & ~nelt2;
44004 if (dfirst.perm[j] != 0xff && dfirst.perm[j] != d->perm[i])
44005 return false;
44006 dfirst.perm[j] = d->perm[i];
44007 if (j != i)
44008 msk |= (1 << i);
44009 }
44010 for (i = 0; i < nelt; i++)
44011 if (dfirst.perm[i] == 0xff)
44012 dfirst.perm[i] = i;
44013
44014 if (!d->testing_p)
44015 dfirst.target = gen_reg_rtx (dfirst.vmode);
44016
44017 start_sequence ();
44018 ok = expand_vec_perm_1 (&dfirst);
44019 seq = get_insns ();
44020 end_sequence ();
44021
44022 if (!ok)
44023 return false;
44024
44025 if (d->testing_p)
44026 return true;
44027
44028 emit_insn (seq);
44029
44030 dsecond = *d;
44031 dsecond.op0 = dfirst.target;
44032 dsecond.op1 = dfirst.target;
44033 dsecond.one_operand_p = true;
44034 dsecond.target = gen_reg_rtx (dsecond.vmode);
44035 for (i = 0; i < nelt; i++)
44036 dsecond.perm[i] = i ^ nelt2;
44037
44038 ok = expand_vec_perm_1 (&dsecond);
44039 gcc_assert (ok);
44040
44041 blend = d->vmode == V8SFmode ? gen_avx_blendps256 : gen_avx_blendpd256;
44042 emit_insn (blend (d->target, dfirst.target, dsecond.target, GEN_INT (msk)));
44043 return true;
44044 }
44045
44046 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
44047 permutation using two vperm2f128, followed by a vshufpd insn blending
44048 the two vectors together. */
44049
44050 static bool
44051 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d *d)
44052 {
44053 struct expand_vec_perm_d dfirst, dsecond, dthird;
44054 bool ok;
44055
44056 if (!TARGET_AVX || (d->vmode != V4DFmode))
44057 return false;
44058
44059 if (d->testing_p)
44060 return true;
44061
44062 dfirst = *d;
44063 dsecond = *d;
44064 dthird = *d;
44065
44066 dfirst.perm[0] = (d->perm[0] & ~1);
44067 dfirst.perm[1] = (d->perm[0] & ~1) + 1;
44068 dfirst.perm[2] = (d->perm[2] & ~1);
44069 dfirst.perm[3] = (d->perm[2] & ~1) + 1;
44070 dsecond.perm[0] = (d->perm[1] & ~1);
44071 dsecond.perm[1] = (d->perm[1] & ~1) + 1;
44072 dsecond.perm[2] = (d->perm[3] & ~1);
44073 dsecond.perm[3] = (d->perm[3] & ~1) + 1;
44074 dthird.perm[0] = (d->perm[0] % 2);
44075 dthird.perm[1] = (d->perm[1] % 2) + 4;
44076 dthird.perm[2] = (d->perm[2] % 2) + 2;
44077 dthird.perm[3] = (d->perm[3] % 2) + 6;
44078
44079 dfirst.target = gen_reg_rtx (dfirst.vmode);
44080 dsecond.target = gen_reg_rtx (dsecond.vmode);
44081 dthird.op0 = dfirst.target;
44082 dthird.op1 = dsecond.target;
44083 dthird.one_operand_p = false;
44084
44085 canonicalize_perm (&dfirst);
44086 canonicalize_perm (&dsecond);
44087
44088 ok = expand_vec_perm_1 (&dfirst)
44089 && expand_vec_perm_1 (&dsecond)
44090 && expand_vec_perm_1 (&dthird);
44091
44092 gcc_assert (ok);
44093
44094 return true;
44095 }
44096
44097 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
44098 permutation with two pshufb insns and an ior. We should have already
44099 failed all two instruction sequences. */
44100
44101 static bool
44102 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
44103 {
44104 rtx rperm[2][16], vperm, l, h, op, m128;
44105 unsigned int i, nelt, eltsz;
44106
44107 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
44108 return false;
44109 gcc_assert (!d->one_operand_p);
44110
44111 if (d->testing_p)
44112 return true;
44113
44114 nelt = d->nelt;
44115 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
44116
44117 /* Generate two permutation masks. If the required element is within
44118 the given vector it is shuffled into the proper lane. If the required
44119 element is in the other vector, force a zero into the lane by setting
44120 bit 7 in the permutation mask. */
44121 m128 = GEN_INT (-128);
44122 for (i = 0; i < nelt; ++i)
44123 {
44124 unsigned j, e = d->perm[i];
44125 unsigned which = (e >= nelt);
44126 if (e >= nelt)
44127 e -= nelt;
44128
44129 for (j = 0; j < eltsz; ++j)
44130 {
44131 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
44132 rperm[1-which][i*eltsz + j] = m128;
44133 }
44134 }
44135
44136 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
44137 vperm = force_reg (V16QImode, vperm);
44138
44139 l = gen_reg_rtx (V16QImode);
44140 op = gen_lowpart (V16QImode, d->op0);
44141 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
44142
44143 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
44144 vperm = force_reg (V16QImode, vperm);
44145
44146 h = gen_reg_rtx (V16QImode);
44147 op = gen_lowpart (V16QImode, d->op1);
44148 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
44149
44150 op = d->target;
44151 if (d->vmode != V16QImode)
44152 op = gen_reg_rtx (V16QImode);
44153 emit_insn (gen_iorv16qi3 (op, l, h));
44154 if (op != d->target)
44155 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
44156
44157 return true;
44158 }
44159
44160 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
44161 with two vpshufb insns, vpermq and vpor. We should have already failed
44162 all two or three instruction sequences. */
44163
44164 static bool
44165 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
44166 {
44167 rtx rperm[2][32], vperm, l, h, hp, op, m128;
44168 unsigned int i, nelt, eltsz;
44169
44170 if (!TARGET_AVX2
44171 || !d->one_operand_p
44172 || (d->vmode != V32QImode && d->vmode != V16HImode))
44173 return false;
44174
44175 if (d->testing_p)
44176 return true;
44177
44178 nelt = d->nelt;
44179 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
44180
44181 /* Generate two permutation masks. If the required element is within
44182 the same lane, it is shuffled in. If the required element from the
44183 other lane, force a zero by setting bit 7 in the permutation mask.
44184 In the other mask the mask has non-negative elements if element
44185 is requested from the other lane, but also moved to the other lane,
44186 so that the result of vpshufb can have the two V2TImode halves
44187 swapped. */
44188 m128 = GEN_INT (-128);
44189 for (i = 0; i < nelt; ++i)
44190 {
44191 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
44192 unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
44193
44194 for (j = 0; j < eltsz; ++j)
44195 {
44196 rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j);
44197 rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128;
44198 }
44199 }
44200
44201 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
44202 vperm = force_reg (V32QImode, vperm);
44203
44204 h = gen_reg_rtx (V32QImode);
44205 op = gen_lowpart (V32QImode, d->op0);
44206 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
44207
44208 /* Swap the 128-byte lanes of h into hp. */
44209 hp = gen_reg_rtx (V4DImode);
44210 op = gen_lowpart (V4DImode, h);
44211 emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx,
44212 const1_rtx));
44213
44214 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
44215 vperm = force_reg (V32QImode, vperm);
44216
44217 l = gen_reg_rtx (V32QImode);
44218 op = gen_lowpart (V32QImode, d->op0);
44219 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
44220
44221 op = d->target;
44222 if (d->vmode != V32QImode)
44223 op = gen_reg_rtx (V32QImode);
44224 emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp)));
44225 if (op != d->target)
44226 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
44227
44228 return true;
44229 }
44230
44231 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
44232 and extract-odd permutations of two V32QImode and V16QImode operand
44233 with two vpshufb insns, vpor and vpermq. We should have already
44234 failed all two or three instruction sequences. */
44235
44236 static bool
44237 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
44238 {
44239 rtx rperm[2][32], vperm, l, h, ior, op, m128;
44240 unsigned int i, nelt, eltsz;
44241
44242 if (!TARGET_AVX2
44243 || d->one_operand_p
44244 || (d->vmode != V32QImode && d->vmode != V16HImode))
44245 return false;
44246
44247 for (i = 0; i < d->nelt; ++i)
44248 if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2))
44249 return false;
44250
44251 if (d->testing_p)
44252 return true;
44253
44254 nelt = d->nelt;
44255 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
44256
44257 /* Generate two permutation masks. In the first permutation mask
44258 the first quarter will contain indexes for the first half
44259 of the op0, the second quarter will contain bit 7 set, third quarter
44260 will contain indexes for the second half of the op0 and the
44261 last quarter bit 7 set. In the second permutation mask
44262 the first quarter will contain bit 7 set, the second quarter
44263 indexes for the first half of the op1, the third quarter bit 7 set
44264 and last quarter indexes for the second half of the op1.
44265 I.e. the first mask e.g. for V32QImode extract even will be:
44266 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
44267 (all values masked with 0xf except for -128) and second mask
44268 for extract even will be
44269 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
44270 m128 = GEN_INT (-128);
44271 for (i = 0; i < nelt; ++i)
44272 {
44273 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
44274 unsigned which = d->perm[i] >= nelt;
44275 unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0;
44276
44277 for (j = 0; j < eltsz; ++j)
44278 {
44279 rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j);
44280 rperm[1 - which][(i * eltsz + j) ^ xorv] = m128;
44281 }
44282 }
44283
44284 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
44285 vperm = force_reg (V32QImode, vperm);
44286
44287 l = gen_reg_rtx (V32QImode);
44288 op = gen_lowpart (V32QImode, d->op0);
44289 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
44290
44291 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
44292 vperm = force_reg (V32QImode, vperm);
44293
44294 h = gen_reg_rtx (V32QImode);
44295 op = gen_lowpart (V32QImode, d->op1);
44296 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
44297
44298 ior = gen_reg_rtx (V32QImode);
44299 emit_insn (gen_iorv32qi3 (ior, l, h));
44300
44301 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
44302 op = gen_reg_rtx (V4DImode);
44303 ior = gen_lowpart (V4DImode, ior);
44304 emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx,
44305 const1_rtx, GEN_INT (3)));
44306 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
44307
44308 return true;
44309 }
44310
44311 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
44312 and extract-odd permutations. */
44313
44314 static bool
44315 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
44316 {
44317 rtx t1, t2, t3, t4, t5;
44318
44319 switch (d->vmode)
44320 {
44321 case V4DFmode:
44322 if (d->testing_p)
44323 break;
44324 t1 = gen_reg_rtx (V4DFmode);
44325 t2 = gen_reg_rtx (V4DFmode);
44326
44327 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
44328 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
44329 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
44330
44331 /* Now an unpck[lh]pd will produce the result required. */
44332 if (odd)
44333 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
44334 else
44335 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
44336 emit_insn (t3);
44337 break;
44338
44339 case V8SFmode:
44340 {
44341 int mask = odd ? 0xdd : 0x88;
44342
44343 if (d->testing_p)
44344 break;
44345 t1 = gen_reg_rtx (V8SFmode);
44346 t2 = gen_reg_rtx (V8SFmode);
44347 t3 = gen_reg_rtx (V8SFmode);
44348
44349 /* Shuffle within the 128-bit lanes to produce:
44350 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
44351 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
44352 GEN_INT (mask)));
44353
44354 /* Shuffle the lanes around to produce:
44355 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
44356 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
44357 GEN_INT (0x3)));
44358
44359 /* Shuffle within the 128-bit lanes to produce:
44360 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
44361 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
44362
44363 /* Shuffle within the 128-bit lanes to produce:
44364 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
44365 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
44366
44367 /* Shuffle the lanes around to produce:
44368 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
44369 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
44370 GEN_INT (0x20)));
44371 }
44372 break;
44373
44374 case V2DFmode:
44375 case V4SFmode:
44376 case V2DImode:
44377 case V4SImode:
44378 /* These are always directly implementable by expand_vec_perm_1. */
44379 gcc_unreachable ();
44380
44381 case V8HImode:
44382 if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB)
44383 return expand_vec_perm_pshufb2 (d);
44384 else
44385 {
44386 if (d->testing_p)
44387 break;
44388 /* We need 2*log2(N)-1 operations to achieve odd/even
44389 with interleave. */
44390 t1 = gen_reg_rtx (V8HImode);
44391 t2 = gen_reg_rtx (V8HImode);
44392 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
44393 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
44394 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
44395 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
44396 if (odd)
44397 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
44398 else
44399 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
44400 emit_insn (t3);
44401 }
44402 break;
44403
44404 case V16QImode:
44405 if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB)
44406 return expand_vec_perm_pshufb2 (d);
44407 else
44408 {
44409 if (d->testing_p)
44410 break;
44411 t1 = gen_reg_rtx (V16QImode);
44412 t2 = gen_reg_rtx (V16QImode);
44413 t3 = gen_reg_rtx (V16QImode);
44414 emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
44415 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
44416 emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
44417 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
44418 emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
44419 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
44420 if (odd)
44421 t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
44422 else
44423 t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
44424 emit_insn (t3);
44425 }
44426 break;
44427
44428 case V16HImode:
44429 case V32QImode:
44430 return expand_vec_perm_vpshufb2_vpermq_even_odd (d);
44431
44432 case V4DImode:
44433 if (!TARGET_AVX2)
44434 {
44435 struct expand_vec_perm_d d_copy = *d;
44436 d_copy.vmode = V4DFmode;
44437 if (d->testing_p)
44438 d_copy.target = gen_lowpart (V4DFmode, d->target);
44439 else
44440 d_copy.target = gen_reg_rtx (V4DFmode);
44441 d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
44442 d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
44443 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
44444 {
44445 if (!d->testing_p)
44446 emit_move_insn (d->target,
44447 gen_lowpart (V4DImode, d_copy.target));
44448 return true;
44449 }
44450 return false;
44451 }
44452
44453 if (d->testing_p)
44454 break;
44455
44456 t1 = gen_reg_rtx (V4DImode);
44457 t2 = gen_reg_rtx (V4DImode);
44458
44459 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
44460 emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20)));
44461 emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31)));
44462
44463 /* Now an vpunpck[lh]qdq will produce the result required. */
44464 if (odd)
44465 t3 = gen_avx2_interleave_highv4di (d->target, t1, t2);
44466 else
44467 t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2);
44468 emit_insn (t3);
44469 break;
44470
44471 case V8SImode:
44472 if (!TARGET_AVX2)
44473 {
44474 struct expand_vec_perm_d d_copy = *d;
44475 d_copy.vmode = V8SFmode;
44476 if (d->testing_p)
44477 d_copy.target = gen_lowpart (V8SFmode, d->target);
44478 else
44479 d_copy.target = gen_reg_rtx (V8SFmode);
44480 d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
44481 d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
44482 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
44483 {
44484 if (!d->testing_p)
44485 emit_move_insn (d->target,
44486 gen_lowpart (V8SImode, d_copy.target));
44487 return true;
44488 }
44489 return false;
44490 }
44491
44492 if (d->testing_p)
44493 break;
44494
44495 t1 = gen_reg_rtx (V8SImode);
44496 t2 = gen_reg_rtx (V8SImode);
44497 t3 = gen_reg_rtx (V4DImode);
44498 t4 = gen_reg_rtx (V4DImode);
44499 t5 = gen_reg_rtx (V4DImode);
44500
44501 /* Shuffle the lanes around into
44502 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
44503 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, d->op0),
44504 gen_lowpart (V4DImode, d->op1),
44505 GEN_INT (0x20)));
44506 emit_insn (gen_avx2_permv2ti (t4, gen_lowpart (V4DImode, d->op0),
44507 gen_lowpart (V4DImode, d->op1),
44508 GEN_INT (0x31)));
44509
44510 /* Swap the 2nd and 3rd position in each lane into
44511 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
44512 emit_insn (gen_avx2_pshufdv3 (t1, gen_lowpart (V8SImode, t3),
44513 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
44514 emit_insn (gen_avx2_pshufdv3 (t2, gen_lowpart (V8SImode, t4),
44515 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
44516
44517 /* Now an vpunpck[lh]qdq will produce
44518 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
44519 if (odd)
44520 t3 = gen_avx2_interleave_highv4di (t5, gen_lowpart (V4DImode, t1),
44521 gen_lowpart (V4DImode, t2));
44522 else
44523 t3 = gen_avx2_interleave_lowv4di (t5, gen_lowpart (V4DImode, t1),
44524 gen_lowpart (V4DImode, t2));
44525 emit_insn (t3);
44526 emit_move_insn (d->target, gen_lowpart (V8SImode, t5));
44527 break;
44528
44529 default:
44530 gcc_unreachable ();
44531 }
44532
44533 return true;
44534 }
44535
44536 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
44537 extract-even and extract-odd permutations. */
44538
44539 static bool
44540 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
44541 {
44542 unsigned i, odd, nelt = d->nelt;
44543
44544 odd = d->perm[0];
44545 if (odd != 0 && odd != 1)
44546 return false;
44547
44548 for (i = 1; i < nelt; ++i)
44549 if (d->perm[i] != 2 * i + odd)
44550 return false;
44551
44552 return expand_vec_perm_even_odd_1 (d, odd);
44553 }
44554
44555 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
44556 permutations. We assume that expand_vec_perm_1 has already failed. */
44557
44558 static bool
44559 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
44560 {
44561 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
44562 enum machine_mode vmode = d->vmode;
44563 unsigned char perm2[4];
44564 rtx op0 = d->op0, dest;
44565 bool ok;
44566
44567 switch (vmode)
44568 {
44569 case V4DFmode:
44570 case V8SFmode:
44571 /* These are special-cased in sse.md so that we can optionally
44572 use the vbroadcast instruction. They expand to two insns
44573 if the input happens to be in a register. */
44574 gcc_unreachable ();
44575
44576 case V2DFmode:
44577 case V2DImode:
44578 case V4SFmode:
44579 case V4SImode:
44580 /* These are always implementable using standard shuffle patterns. */
44581 gcc_unreachable ();
44582
44583 case V8HImode:
44584 case V16QImode:
44585 /* These can be implemented via interleave. We save one insn by
44586 stopping once we have promoted to V4SImode and then use pshufd. */
44587 if (d->testing_p)
44588 return true;
44589 do
44590 {
44591 rtx dest;
44592 rtx (*gen) (rtx, rtx, rtx)
44593 = vmode == V16QImode ? gen_vec_interleave_lowv16qi
44594 : gen_vec_interleave_lowv8hi;
44595
44596 if (elt >= nelt2)
44597 {
44598 gen = vmode == V16QImode ? gen_vec_interleave_highv16qi
44599 : gen_vec_interleave_highv8hi;
44600 elt -= nelt2;
44601 }
44602 nelt2 /= 2;
44603
44604 dest = gen_reg_rtx (vmode);
44605 emit_insn (gen (dest, op0, op0));
44606 vmode = get_mode_wider_vector (vmode);
44607 op0 = gen_lowpart (vmode, dest);
44608 }
44609 while (vmode != V4SImode);
44610
44611 memset (perm2, elt, 4);
44612 dest = gen_reg_rtx (V4SImode);
44613 ok = expand_vselect (dest, op0, perm2, 4, d->testing_p);
44614 gcc_assert (ok);
44615 if (!d->testing_p)
44616 emit_move_insn (d->target, gen_lowpart (d->vmode, dest));
44617 return true;
44618
44619 case V32QImode:
44620 case V16HImode:
44621 case V8SImode:
44622 case V4DImode:
44623 /* For AVX2 broadcasts of the first element vpbroadcast* or
44624 vpermq should be used by expand_vec_perm_1. */
44625 gcc_assert (!TARGET_AVX2 || d->perm[0]);
44626 return false;
44627
44628 default:
44629 gcc_unreachable ();
44630 }
44631 }
44632
44633 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
44634 broadcast permutations. */
44635
44636 static bool
44637 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
44638 {
44639 unsigned i, elt, nelt = d->nelt;
44640
44641 if (!d->one_operand_p)
44642 return false;
44643
44644 elt = d->perm[0];
44645 for (i = 1; i < nelt; ++i)
44646 if (d->perm[i] != elt)
44647 return false;
44648
44649 return expand_vec_perm_broadcast_1 (d);
44650 }
44651
44652 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
44653 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
44654 all the shorter instruction sequences. */
44655
44656 static bool
44657 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
44658 {
44659 rtx rperm[4][32], vperm, l[2], h[2], op, m128;
44660 unsigned int i, nelt, eltsz;
44661 bool used[4];
44662
44663 if (!TARGET_AVX2
44664 || d->one_operand_p
44665 || (d->vmode != V32QImode && d->vmode != V16HImode))
44666 return false;
44667
44668 if (d->testing_p)
44669 return true;
44670
44671 nelt = d->nelt;
44672 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
44673
44674 /* Generate 4 permutation masks. If the required element is within
44675 the same lane, it is shuffled in. If the required element from the
44676 other lane, force a zero by setting bit 7 in the permutation mask.
44677 In the other mask the mask has non-negative elements if element
44678 is requested from the other lane, but also moved to the other lane,
44679 so that the result of vpshufb can have the two V2TImode halves
44680 swapped. */
44681 m128 = GEN_INT (-128);
44682 for (i = 0; i < 32; ++i)
44683 {
44684 rperm[0][i] = m128;
44685 rperm[1][i] = m128;
44686 rperm[2][i] = m128;
44687 rperm[3][i] = m128;
44688 }
44689 used[0] = false;
44690 used[1] = false;
44691 used[2] = false;
44692 used[3] = false;
44693 for (i = 0; i < nelt; ++i)
44694 {
44695 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
44696 unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
44697 unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0);
44698
44699 for (j = 0; j < eltsz; ++j)
44700 rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j);
44701 used[which] = true;
44702 }
44703
44704 for (i = 0; i < 2; ++i)
44705 {
44706 if (!used[2 * i + 1])
44707 {
44708 h[i] = NULL_RTX;
44709 continue;
44710 }
44711 vperm = gen_rtx_CONST_VECTOR (V32QImode,
44712 gen_rtvec_v (32, rperm[2 * i + 1]));
44713 vperm = force_reg (V32QImode, vperm);
44714 h[i] = gen_reg_rtx (V32QImode);
44715 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
44716 emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm));
44717 }
44718
44719 /* Swap the 128-byte lanes of h[X]. */
44720 for (i = 0; i < 2; ++i)
44721 {
44722 if (h[i] == NULL_RTX)
44723 continue;
44724 op = gen_reg_rtx (V4DImode);
44725 emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]),
44726 const2_rtx, GEN_INT (3), const0_rtx,
44727 const1_rtx));
44728 h[i] = gen_lowpart (V32QImode, op);
44729 }
44730
44731 for (i = 0; i < 2; ++i)
44732 {
44733 if (!used[2 * i])
44734 {
44735 l[i] = NULL_RTX;
44736 continue;
44737 }
44738 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i]));
44739 vperm = force_reg (V32QImode, vperm);
44740 l[i] = gen_reg_rtx (V32QImode);
44741 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
44742 emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm));
44743 }
44744
44745 for (i = 0; i < 2; ++i)
44746 {
44747 if (h[i] && l[i])
44748 {
44749 op = gen_reg_rtx (V32QImode);
44750 emit_insn (gen_iorv32qi3 (op, l[i], h[i]));
44751 l[i] = op;
44752 }
44753 else if (h[i])
44754 l[i] = h[i];
44755 }
44756
44757 gcc_assert (l[0] && l[1]);
44758 op = d->target;
44759 if (d->vmode != V32QImode)
44760 op = gen_reg_rtx (V32QImode);
44761 emit_insn (gen_iorv32qi3 (op, l[0], l[1]));
44762 if (op != d->target)
44763 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
44764 return true;
44765 }
44766
44767 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
44768 With all of the interface bits taken care of, perform the expansion
44769 in D and return true on success. */
44770
44771 static bool
44772 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
44773 {
44774 /* Try a single instruction expansion. */
44775 if (expand_vec_perm_1 (d))
44776 return true;
44777
44778 /* Try sequences of two instructions. */
44779
44780 if (expand_vec_perm_pshuflw_pshufhw (d))
44781 return true;
44782
44783 if (expand_vec_perm_palignr (d))
44784 return true;
44785
44786 if (expand_vec_perm_interleave2 (d))
44787 return true;
44788
44789 if (expand_vec_perm_broadcast (d))
44790 return true;
44791
44792 if (expand_vec_perm_vpermq_perm_1 (d))
44793 return true;
44794
44795 if (expand_vec_perm_vperm2f128 (d))
44796 return true;
44797
44798 if (expand_vec_perm_pblendv (d))
44799 return true;
44800
44801 /* Try sequences of three instructions. */
44802
44803 if (expand_vec_perm_2vperm2f128_vshuf (d))
44804 return true;
44805
44806 if (expand_vec_perm_pshufb2 (d))
44807 return true;
44808
44809 if (expand_vec_perm_interleave3 (d))
44810 return true;
44811
44812 if (expand_vec_perm_vperm2f128_vblend (d))
44813 return true;
44814
44815 /* Try sequences of four instructions. */
44816
44817 if (expand_vec_perm_vpshufb2_vpermq (d))
44818 return true;
44819
44820 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
44821 return true;
44822
44823 /* ??? Look for narrow permutations whose element orderings would
44824 allow the promotion to a wider mode. */
44825
44826 /* ??? Look for sequences of interleave or a wider permute that place
44827 the data into the correct lanes for a half-vector shuffle like
44828 pshuf[lh]w or vpermilps. */
44829
44830 /* ??? Look for sequences of interleave that produce the desired results.
44831 The combinatorics of punpck[lh] get pretty ugly... */
44832
44833 if (expand_vec_perm_even_odd (d))
44834 return true;
44835
44836 /* Even longer sequences. */
44837 if (expand_vec_perm_vpshufb4_vpermq2 (d))
44838 return true;
44839
44840 return false;
44841 }
44842
44843 /* If a permutation only uses one operand, make it clear. Returns true
44844 if the permutation references both operands. */
44845
44846 static bool
44847 canonicalize_perm (struct expand_vec_perm_d *d)
44848 {
44849 int i, which, nelt = d->nelt;
44850
44851 for (i = which = 0; i < nelt; ++i)
44852 which |= (d->perm[i] < nelt ? 1 : 2);
44853
44854 d->one_operand_p = true;
44855 switch (which)
44856 {
44857 default:
44858 gcc_unreachable();
44859
44860 case 3:
44861 if (!rtx_equal_p (d->op0, d->op1))
44862 {
44863 d->one_operand_p = false;
44864 break;
44865 }
44866 /* The elements of PERM do not suggest that only the first operand
44867 is used, but both operands are identical. Allow easier matching
44868 of the permutation by folding the permutation into the single
44869 input vector. */
44870 /* FALLTHRU */
44871
44872 case 2:
44873 for (i = 0; i < nelt; ++i)
44874 d->perm[i] &= nelt - 1;
44875 d->op0 = d->op1;
44876 break;
44877
44878 case 1:
44879 d->op1 = d->op0;
44880 break;
44881 }
44882
44883 return (which == 3);
44884 }
44885
44886 bool
44887 ix86_expand_vec_perm_const (rtx operands[4])
44888 {
44889 struct expand_vec_perm_d d;
44890 unsigned char perm[MAX_VECT_LEN];
44891 int i, nelt;
44892 bool two_args;
44893 rtx sel;
44894
44895 d.target = operands[0];
44896 d.op0 = operands[1];
44897 d.op1 = operands[2];
44898 sel = operands[3];
44899
44900 d.vmode = GET_MODE (d.target);
44901 gcc_assert (VECTOR_MODE_P (d.vmode));
44902 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
44903 d.testing_p = false;
44904
44905 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
44906 gcc_assert (XVECLEN (sel, 0) == nelt);
44907 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
44908
44909 for (i = 0; i < nelt; ++i)
44910 {
44911 rtx e = XVECEXP (sel, 0, i);
44912 int ei = INTVAL (e) & (2 * nelt - 1);
44913 d.perm[i] = ei;
44914 perm[i] = ei;
44915 }
44916
44917 two_args = canonicalize_perm (&d);
44918
44919 if (ix86_expand_vec_perm_const_1 (&d))
44920 return true;
44921
44922 /* If the selector says both arguments are needed, but the operands are the
44923 same, the above tried to expand with one_operand_p and flattened selector.
44924 If that didn't work, retry without one_operand_p; we succeeded with that
44925 during testing. */
44926 if (two_args && d.one_operand_p)
44927 {
44928 d.one_operand_p = false;
44929 memcpy (d.perm, perm, sizeof (perm));
44930 return ix86_expand_vec_perm_const_1 (&d);
44931 }
44932
44933 return false;
44934 }
44935
44936 /* Implement targetm.vectorize.vec_perm_const_ok. */
44937
44938 static bool
44939 ix86_vectorize_vec_perm_const_ok (enum machine_mode vmode,
44940 const unsigned char *sel)
44941 {
44942 struct expand_vec_perm_d d;
44943 unsigned int i, nelt, which;
44944 bool ret;
44945
44946 d.vmode = vmode;
44947 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
44948 d.testing_p = true;
44949
44950 /* Given sufficient ISA support we can just return true here
44951 for selected vector modes. */
44952 if (d.vmode == V16SImode || d.vmode == V16SFmode
44953 || d.vmode == V8DFmode || d.vmode == V8DImode)
44954 /* All implementable with a single vpermi2 insn. */
44955 return true;
44956 if (GET_MODE_SIZE (d.vmode) == 16)
44957 {
44958 /* All implementable with a single vpperm insn. */
44959 if (TARGET_XOP)
44960 return true;
44961 /* All implementable with 2 pshufb + 1 ior. */
44962 if (TARGET_SSSE3)
44963 return true;
44964 /* All implementable with shufpd or unpck[lh]pd. */
44965 if (d.nelt == 2)
44966 return true;
44967 }
44968
44969 /* Extract the values from the vector CST into the permutation
44970 array in D. */
44971 memcpy (d.perm, sel, nelt);
44972 for (i = which = 0; i < nelt; ++i)
44973 {
44974 unsigned char e = d.perm[i];
44975 gcc_assert (e < 2 * nelt);
44976 which |= (e < nelt ? 1 : 2);
44977 }
44978
44979 /* For all elements from second vector, fold the elements to first. */
44980 if (which == 2)
44981 for (i = 0; i < nelt; ++i)
44982 d.perm[i] -= nelt;
44983
44984 /* Check whether the mask can be applied to the vector type. */
44985 d.one_operand_p = (which != 3);
44986
44987 /* Implementable with shufps or pshufd. */
44988 if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode))
44989 return true;
44990
44991 /* Otherwise we have to go through the motions and see if we can
44992 figure out how to generate the requested permutation. */
44993 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
44994 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
44995 if (!d.one_operand_p)
44996 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
44997
44998 start_sequence ();
44999 ret = ix86_expand_vec_perm_const_1 (&d);
45000 end_sequence ();
45001
45002 return ret;
45003 }
45004
45005 void
45006 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
45007 {
45008 struct expand_vec_perm_d d;
45009 unsigned i, nelt;
45010
45011 d.target = targ;
45012 d.op0 = op0;
45013 d.op1 = op1;
45014 d.vmode = GET_MODE (targ);
45015 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
45016 d.one_operand_p = false;
45017 d.testing_p = false;
45018
45019 for (i = 0; i < nelt; ++i)
45020 d.perm[i] = i * 2 + odd;
45021
45022 /* We'll either be able to implement the permutation directly... */
45023 if (expand_vec_perm_1 (&d))
45024 return;
45025
45026 /* ... or we use the special-case patterns. */
45027 expand_vec_perm_even_odd_1 (&d, odd);
45028 }
45029
45030 static void
45031 ix86_expand_vec_interleave (rtx targ, rtx op0, rtx op1, bool high_p)
45032 {
45033 struct expand_vec_perm_d d;
45034 unsigned i, nelt, base;
45035 bool ok;
45036
45037 d.target = targ;
45038 d.op0 = op0;
45039 d.op1 = op1;
45040 d.vmode = GET_MODE (targ);
45041 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
45042 d.one_operand_p = false;
45043 d.testing_p = false;
45044
45045 base = high_p ? nelt / 2 : 0;
45046 for (i = 0; i < nelt / 2; ++i)
45047 {
45048 d.perm[i * 2] = i + base;
45049 d.perm[i * 2 + 1] = i + base + nelt;
45050 }
45051
45052 /* Note that for AVX this isn't one instruction. */
45053 ok = ix86_expand_vec_perm_const_1 (&d);
45054 gcc_assert (ok);
45055 }
45056
45057
45058 /* Expand a vector operation CODE for a V*QImode in terms of the
45059 same operation on V*HImode. */
45060
45061 void
45062 ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
45063 {
45064 enum machine_mode qimode = GET_MODE (dest);
45065 enum machine_mode himode;
45066 rtx (*gen_il) (rtx, rtx, rtx);
45067 rtx (*gen_ih) (rtx, rtx, rtx);
45068 rtx op1_l, op1_h, op2_l, op2_h, res_l, res_h;
45069 struct expand_vec_perm_d d;
45070 bool ok, full_interleave;
45071 bool uns_p = false;
45072 int i;
45073
45074 switch (qimode)
45075 {
45076 case V16QImode:
45077 himode = V8HImode;
45078 gen_il = gen_vec_interleave_lowv16qi;
45079 gen_ih = gen_vec_interleave_highv16qi;
45080 break;
45081 case V32QImode:
45082 himode = V16HImode;
45083 gen_il = gen_avx2_interleave_lowv32qi;
45084 gen_ih = gen_avx2_interleave_highv32qi;
45085 break;
45086 default:
45087 gcc_unreachable ();
45088 }
45089
45090 op2_l = op2_h = op2;
45091 switch (code)
45092 {
45093 case MULT:
45094 /* Unpack data such that we've got a source byte in each low byte of
45095 each word. We don't care what goes into the high byte of each word.
45096 Rather than trying to get zero in there, most convenient is to let
45097 it be a copy of the low byte. */
45098 op2_l = gen_reg_rtx (qimode);
45099 op2_h = gen_reg_rtx (qimode);
45100 emit_insn (gen_il (op2_l, op2, op2));
45101 emit_insn (gen_ih (op2_h, op2, op2));
45102 /* FALLTHRU */
45103
45104 op1_l = gen_reg_rtx (qimode);
45105 op1_h = gen_reg_rtx (qimode);
45106 emit_insn (gen_il (op1_l, op1, op1));
45107 emit_insn (gen_ih (op1_h, op1, op1));
45108 full_interleave = qimode == V16QImode;
45109 break;
45110
45111 case ASHIFT:
45112 case LSHIFTRT:
45113 uns_p = true;
45114 /* FALLTHRU */
45115 case ASHIFTRT:
45116 op1_l = gen_reg_rtx (himode);
45117 op1_h = gen_reg_rtx (himode);
45118 ix86_expand_sse_unpack (op1_l, op1, uns_p, false);
45119 ix86_expand_sse_unpack (op1_h, op1, uns_p, true);
45120 full_interleave = true;
45121 break;
45122 default:
45123 gcc_unreachable ();
45124 }
45125
45126 /* Perform the operation. */
45127 res_l = expand_simple_binop (himode, code, op1_l, op2_l, NULL_RTX,
45128 1, OPTAB_DIRECT);
45129 res_h = expand_simple_binop (himode, code, op1_h, op2_h, NULL_RTX,
45130 1, OPTAB_DIRECT);
45131 gcc_assert (res_l && res_h);
45132
45133 /* Merge the data back into the right place. */
45134 d.target = dest;
45135 d.op0 = gen_lowpart (qimode, res_l);
45136 d.op1 = gen_lowpart (qimode, res_h);
45137 d.vmode = qimode;
45138 d.nelt = GET_MODE_NUNITS (qimode);
45139 d.one_operand_p = false;
45140 d.testing_p = false;
45141
45142 if (full_interleave)
45143 {
45144 /* For SSE2, we used an full interleave, so the desired
45145 results are in the even elements. */
45146 for (i = 0; i < 32; ++i)
45147 d.perm[i] = i * 2;
45148 }
45149 else
45150 {
45151 /* For AVX, the interleave used above was not cross-lane. So the
45152 extraction is evens but with the second and third quarter swapped.
45153 Happily, that is even one insn shorter than even extraction. */
45154 for (i = 0; i < 32; ++i)
45155 d.perm[i] = i * 2 + ((i & 24) == 8 ? 16 : (i & 24) == 16 ? -16 : 0);
45156 }
45157
45158 ok = ix86_expand_vec_perm_const_1 (&d);
45159 gcc_assert (ok);
45160
45161 set_unique_reg_note (get_last_insn (), REG_EQUAL,
45162 gen_rtx_fmt_ee (code, qimode, op1, op2));
45163 }
45164
45165 /* Helper function of ix86_expand_mul_widen_evenodd. Return true
45166 if op is CONST_VECTOR with all odd elements equal to their
45167 preceding element. */
45168
45169 static bool
45170 const_vector_equal_evenodd_p (rtx op)
45171 {
45172 enum machine_mode mode = GET_MODE (op);
45173 int i, nunits = GET_MODE_NUNITS (mode);
45174 if (GET_CODE (op) != CONST_VECTOR
45175 || nunits != CONST_VECTOR_NUNITS (op))
45176 return false;
45177 for (i = 0; i < nunits; i += 2)
45178 if (CONST_VECTOR_ELT (op, i) != CONST_VECTOR_ELT (op, i + 1))
45179 return false;
45180 return true;
45181 }
45182
45183 void
45184 ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
45185 bool uns_p, bool odd_p)
45186 {
45187 enum machine_mode mode = GET_MODE (op1);
45188 enum machine_mode wmode = GET_MODE (dest);
45189 rtx x;
45190 rtx orig_op1 = op1, orig_op2 = op2;
45191
45192 if (!nonimmediate_operand (op1, mode))
45193 op1 = force_reg (mode, op1);
45194 if (!nonimmediate_operand (op2, mode))
45195 op2 = force_reg (mode, op2);
45196
45197 /* We only play even/odd games with vectors of SImode. */
45198 gcc_assert (mode == V4SImode || mode == V8SImode || mode == V16SImode);
45199
45200 /* If we're looking for the odd results, shift those members down to
45201 the even slots. For some cpus this is faster than a PSHUFD. */
45202 if (odd_p)
45203 {
45204 /* For XOP use vpmacsdqh, but only for smult, as it is only
45205 signed. */
45206 if (TARGET_XOP && mode == V4SImode && !uns_p)
45207 {
45208 x = force_reg (wmode, CONST0_RTX (wmode));
45209 emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x));
45210 return;
45211 }
45212
45213 x = GEN_INT (GET_MODE_UNIT_BITSIZE (mode));
45214 if (!const_vector_equal_evenodd_p (orig_op1))
45215 op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1),
45216 x, NULL, 1, OPTAB_DIRECT);
45217 if (!const_vector_equal_evenodd_p (orig_op2))
45218 op2 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op2),
45219 x, NULL, 1, OPTAB_DIRECT);
45220 op1 = gen_lowpart (mode, op1);
45221 op2 = gen_lowpart (mode, op2);
45222 }
45223
45224 if (mode == V16SImode)
45225 {
45226 if (uns_p)
45227 x = gen_vec_widen_umult_even_v16si (dest, op1, op2);
45228 else
45229 x = gen_vec_widen_smult_even_v16si (dest, op1, op2);
45230 }
45231 else if (mode == V8SImode)
45232 {
45233 if (uns_p)
45234 x = gen_vec_widen_umult_even_v8si (dest, op1, op2);
45235 else
45236 x = gen_vec_widen_smult_even_v8si (dest, op1, op2);
45237 }
45238 else if (uns_p)
45239 x = gen_vec_widen_umult_even_v4si (dest, op1, op2);
45240 else if (TARGET_SSE4_1)
45241 x = gen_sse4_1_mulv2siv2di3 (dest, op1, op2);
45242 else
45243 {
45244 rtx s1, s2, t0, t1, t2;
45245
45246 /* The easiest way to implement this without PMULDQ is to go through
45247 the motions as if we are performing a full 64-bit multiply. With
45248 the exception that we need to do less shuffling of the elements. */
45249
45250 /* Compute the sign-extension, aka highparts, of the two operands. */
45251 s1 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
45252 op1, pc_rtx, pc_rtx);
45253 s2 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
45254 op2, pc_rtx, pc_rtx);
45255
45256 /* Multiply LO(A) * HI(B), and vice-versa. */
45257 t1 = gen_reg_rtx (wmode);
45258 t2 = gen_reg_rtx (wmode);
45259 emit_insn (gen_vec_widen_umult_even_v4si (t1, s1, op2));
45260 emit_insn (gen_vec_widen_umult_even_v4si (t2, s2, op1));
45261
45262 /* Multiply LO(A) * LO(B). */
45263 t0 = gen_reg_rtx (wmode);
45264 emit_insn (gen_vec_widen_umult_even_v4si (t0, op1, op2));
45265
45266 /* Combine and shift the highparts into place. */
45267 t1 = expand_binop (wmode, add_optab, t1, t2, t1, 1, OPTAB_DIRECT);
45268 t1 = expand_binop (wmode, ashl_optab, t1, GEN_INT (32), t1,
45269 1, OPTAB_DIRECT);
45270
45271 /* Combine high and low parts. */
45272 force_expand_binop (wmode, add_optab, t0, t1, dest, 1, OPTAB_DIRECT);
45273 return;
45274 }
45275 emit_insn (x);
45276 }
45277
45278 void
45279 ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2,
45280 bool uns_p, bool high_p)
45281 {
45282 enum machine_mode wmode = GET_MODE (dest);
45283 enum machine_mode mode = GET_MODE (op1);
45284 rtx t1, t2, t3, t4, mask;
45285
45286 switch (mode)
45287 {
45288 case V4SImode:
45289 t1 = gen_reg_rtx (mode);
45290 t2 = gen_reg_rtx (mode);
45291 if (TARGET_XOP && !uns_p)
45292 {
45293 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
45294 shuffle the elements once so that all elements are in the right
45295 place for immediate use: { A C B D }. */
45296 emit_insn (gen_sse2_pshufd_1 (t1, op1, const0_rtx, const2_rtx,
45297 const1_rtx, GEN_INT (3)));
45298 emit_insn (gen_sse2_pshufd_1 (t2, op2, const0_rtx, const2_rtx,
45299 const1_rtx, GEN_INT (3)));
45300 }
45301 else
45302 {
45303 /* Put the elements into place for the multiply. */
45304 ix86_expand_vec_interleave (t1, op1, op1, high_p);
45305 ix86_expand_vec_interleave (t2, op2, op2, high_p);
45306 high_p = false;
45307 }
45308 ix86_expand_mul_widen_evenodd (dest, t1, t2, uns_p, high_p);
45309 break;
45310
45311 case V8SImode:
45312 /* Shuffle the elements between the lanes. After this we
45313 have { A B E F | C D G H } for each operand. */
45314 t1 = gen_reg_rtx (V4DImode);
45315 t2 = gen_reg_rtx (V4DImode);
45316 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, op1),
45317 const0_rtx, const2_rtx,
45318 const1_rtx, GEN_INT (3)));
45319 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, op2),
45320 const0_rtx, const2_rtx,
45321 const1_rtx, GEN_INT (3)));
45322
45323 /* Shuffle the elements within the lanes. After this we
45324 have { A A B B | C C D D } or { E E F F | G G H H }. */
45325 t3 = gen_reg_rtx (V8SImode);
45326 t4 = gen_reg_rtx (V8SImode);
45327 mask = GEN_INT (high_p
45328 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
45329 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
45330 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), mask));
45331 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), mask));
45332
45333 ix86_expand_mul_widen_evenodd (dest, t3, t4, uns_p, false);
45334 break;
45335
45336 case V8HImode:
45337 case V16HImode:
45338 t1 = expand_binop (mode, smul_optab, op1, op2, NULL_RTX,
45339 uns_p, OPTAB_DIRECT);
45340 t2 = expand_binop (mode,
45341 uns_p ? umul_highpart_optab : smul_highpart_optab,
45342 op1, op2, NULL_RTX, uns_p, OPTAB_DIRECT);
45343 gcc_assert (t1 && t2);
45344
45345 t3 = gen_reg_rtx (mode);
45346 ix86_expand_vec_interleave (t3, t1, t2, high_p);
45347 emit_move_insn (dest, gen_lowpart (wmode, t3));
45348 break;
45349
45350 case V16QImode:
45351 case V32QImode:
45352 t1 = gen_reg_rtx (wmode);
45353 t2 = gen_reg_rtx (wmode);
45354 ix86_expand_sse_unpack (t1, op1, uns_p, high_p);
45355 ix86_expand_sse_unpack (t2, op2, uns_p, high_p);
45356
45357 emit_insn (gen_rtx_SET (VOIDmode, dest, gen_rtx_MULT (wmode, t1, t2)));
45358 break;
45359
45360 default:
45361 gcc_unreachable ();
45362 }
45363 }
45364
45365 void
45366 ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2)
45367 {
45368 rtx res_1, res_2, res_3, res_4;
45369
45370 res_1 = gen_reg_rtx (V4SImode);
45371 res_2 = gen_reg_rtx (V4SImode);
45372 res_3 = gen_reg_rtx (V2DImode);
45373 res_4 = gen_reg_rtx (V2DImode);
45374 ix86_expand_mul_widen_evenodd (res_3, op1, op2, true, false);
45375 ix86_expand_mul_widen_evenodd (res_4, op1, op2, true, true);
45376
45377 /* Move the results in element 2 down to element 1; we don't care
45378 what goes in elements 2 and 3. Then we can merge the parts
45379 back together with an interleave.
45380
45381 Note that two other sequences were tried:
45382 (1) Use interleaves at the start instead of psrldq, which allows
45383 us to use a single shufps to merge things back at the end.
45384 (2) Use shufps here to combine the two vectors, then pshufd to
45385 put the elements in the correct order.
45386 In both cases the cost of the reformatting stall was too high
45387 and the overall sequence slower. */
45388
45389 emit_insn (gen_sse2_pshufd_1 (res_1, gen_lowpart (V4SImode, res_3),
45390 const0_rtx, const2_rtx,
45391 const0_rtx, const0_rtx));
45392 emit_insn (gen_sse2_pshufd_1 (res_2, gen_lowpart (V4SImode, res_4),
45393 const0_rtx, const2_rtx,
45394 const0_rtx, const0_rtx));
45395 res_1 = emit_insn (gen_vec_interleave_lowv4si (op0, res_1, res_2));
45396
45397 set_unique_reg_note (res_1, REG_EQUAL, gen_rtx_MULT (V4SImode, op1, op2));
45398 }
45399
45400 void
45401 ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
45402 {
45403 enum machine_mode mode = GET_MODE (op0);
45404 rtx t1, t2, t3, t4, t5, t6;
45405
45406 if (TARGET_XOP && mode == V2DImode)
45407 {
45408 /* op1: A,B,C,D, op2: E,F,G,H */
45409 op1 = gen_lowpart (V4SImode, op1);
45410 op2 = gen_lowpart (V4SImode, op2);
45411
45412 t1 = gen_reg_rtx (V4SImode);
45413 t2 = gen_reg_rtx (V4SImode);
45414 t3 = gen_reg_rtx (V2DImode);
45415 t4 = gen_reg_rtx (V2DImode);
45416
45417 /* t1: B,A,D,C */
45418 emit_insn (gen_sse2_pshufd_1 (t1, op1,
45419 GEN_INT (1),
45420 GEN_INT (0),
45421 GEN_INT (3),
45422 GEN_INT (2)));
45423
45424 /* t2: (B*E),(A*F),(D*G),(C*H) */
45425 emit_insn (gen_mulv4si3 (t2, t1, op2));
45426
45427 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
45428 emit_insn (gen_xop_phadddq (t3, t2));
45429
45430 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
45431 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
45432
45433 /* Multiply lower parts and add all */
45434 t5 = gen_reg_rtx (V2DImode);
45435 emit_insn (gen_vec_widen_umult_even_v4si (t5,
45436 gen_lowpart (V4SImode, op1),
45437 gen_lowpart (V4SImode, op2)));
45438 op0 = expand_binop (mode, add_optab, t5, t4, op0, 1, OPTAB_DIRECT);
45439
45440 }
45441 else
45442 {
45443 enum machine_mode nmode;
45444 rtx (*umul) (rtx, rtx, rtx);
45445
45446 if (mode == V2DImode)
45447 {
45448 umul = gen_vec_widen_umult_even_v4si;
45449 nmode = V4SImode;
45450 }
45451 else if (mode == V4DImode)
45452 {
45453 umul = gen_vec_widen_umult_even_v8si;
45454 nmode = V8SImode;
45455 }
45456 else if (mode == V8DImode)
45457 {
45458 umul = gen_vec_widen_umult_even_v16si;
45459 nmode = V16SImode;
45460 }
45461 else
45462 gcc_unreachable ();
45463
45464
45465 /* Multiply low parts. */
45466 t1 = gen_reg_rtx (mode);
45467 emit_insn (umul (t1, gen_lowpart (nmode, op1), gen_lowpart (nmode, op2)));
45468
45469 /* Shift input vectors right 32 bits so we can multiply high parts. */
45470 t6 = GEN_INT (32);
45471 t2 = expand_binop (mode, lshr_optab, op1, t6, NULL, 1, OPTAB_DIRECT);
45472 t3 = expand_binop (mode, lshr_optab, op2, t6, NULL, 1, OPTAB_DIRECT);
45473
45474 /* Multiply high parts by low parts. */
45475 t4 = gen_reg_rtx (mode);
45476 t5 = gen_reg_rtx (mode);
45477 emit_insn (umul (t4, gen_lowpart (nmode, t2), gen_lowpart (nmode, op2)));
45478 emit_insn (umul (t5, gen_lowpart (nmode, t3), gen_lowpart (nmode, op1)));
45479
45480 /* Combine and shift the highparts back. */
45481 t4 = expand_binop (mode, add_optab, t4, t5, t4, 1, OPTAB_DIRECT);
45482 t4 = expand_binop (mode, ashl_optab, t4, t6, t4, 1, OPTAB_DIRECT);
45483
45484 /* Combine high and low parts. */
45485 force_expand_binop (mode, add_optab, t1, t4, op0, 1, OPTAB_DIRECT);
45486 }
45487
45488 set_unique_reg_note (get_last_insn (), REG_EQUAL,
45489 gen_rtx_MULT (mode, op1, op2));
45490 }
45491
45492 /* Calculate integer abs() using only SSE2 instructions. */
45493
45494 void
45495 ix86_expand_sse2_abs (rtx target, rtx input)
45496 {
45497 enum machine_mode mode = GET_MODE (target);
45498 rtx tmp0, tmp1, x;
45499
45500 switch (mode)
45501 {
45502 /* For 32-bit signed integer X, the best way to calculate the absolute
45503 value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)). */
45504 case V4SImode:
45505 tmp0 = expand_simple_binop (mode, ASHIFTRT, input,
45506 GEN_INT (GET_MODE_BITSIZE
45507 (GET_MODE_INNER (mode)) - 1),
45508 NULL, 0, OPTAB_DIRECT);
45509 tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
45510 NULL, 0, OPTAB_DIRECT);
45511 x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
45512 target, 0, OPTAB_DIRECT);
45513 break;
45514
45515 /* For 16-bit signed integer X, the best way to calculate the absolute
45516 value of X is max (X, -X), as SSE2 provides the PMAXSW insn. */
45517 case V8HImode:
45518 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
45519
45520 x = expand_simple_binop (mode, SMAX, tmp0, input,
45521 target, 0, OPTAB_DIRECT);
45522 break;
45523
45524 /* For 8-bit signed integer X, the best way to calculate the absolute
45525 value of X is min ((unsigned char) X, (unsigned char) (-X)),
45526 as SSE2 provides the PMINUB insn. */
45527 case V16QImode:
45528 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
45529
45530 x = expand_simple_binop (V16QImode, UMIN, tmp0, input,
45531 target, 0, OPTAB_DIRECT);
45532 break;
45533
45534 default:
45535 gcc_unreachable ();
45536 }
45537
45538 if (x != target)
45539 emit_move_insn (target, x);
45540 }
45541
45542 /* Expand an insert into a vector register through pinsr insn.
45543 Return true if successful. */
45544
45545 bool
45546 ix86_expand_pinsr (rtx *operands)
45547 {
45548 rtx dst = operands[0];
45549 rtx src = operands[3];
45550
45551 unsigned int size = INTVAL (operands[1]);
45552 unsigned int pos = INTVAL (operands[2]);
45553
45554 if (GET_CODE (dst) == SUBREG)
45555 {
45556 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
45557 dst = SUBREG_REG (dst);
45558 }
45559
45560 if (GET_CODE (src) == SUBREG)
45561 src = SUBREG_REG (src);
45562
45563 switch (GET_MODE (dst))
45564 {
45565 case V16QImode:
45566 case V8HImode:
45567 case V4SImode:
45568 case V2DImode:
45569 {
45570 enum machine_mode srcmode, dstmode;
45571 rtx (*pinsr)(rtx, rtx, rtx, rtx);
45572
45573 srcmode = mode_for_size (size, MODE_INT, 0);
45574
45575 switch (srcmode)
45576 {
45577 case QImode:
45578 if (!TARGET_SSE4_1)
45579 return false;
45580 dstmode = V16QImode;
45581 pinsr = gen_sse4_1_pinsrb;
45582 break;
45583
45584 case HImode:
45585 if (!TARGET_SSE2)
45586 return false;
45587 dstmode = V8HImode;
45588 pinsr = gen_sse2_pinsrw;
45589 break;
45590
45591 case SImode:
45592 if (!TARGET_SSE4_1)
45593 return false;
45594 dstmode = V4SImode;
45595 pinsr = gen_sse4_1_pinsrd;
45596 break;
45597
45598 case DImode:
45599 gcc_assert (TARGET_64BIT);
45600 if (!TARGET_SSE4_1)
45601 return false;
45602 dstmode = V2DImode;
45603 pinsr = gen_sse4_1_pinsrq;
45604 break;
45605
45606 default:
45607 return false;
45608 }
45609
45610 rtx d = dst;
45611 if (GET_MODE (dst) != dstmode)
45612 d = gen_reg_rtx (dstmode);
45613 src = gen_lowpart (srcmode, src);
45614
45615 pos /= size;
45616
45617 emit_insn (pinsr (d, gen_lowpart (dstmode, dst), src,
45618 GEN_INT (1 << pos)));
45619 if (d != dst)
45620 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
45621 return true;
45622 }
45623
45624 default:
45625 return false;
45626 }
45627 }
45628 \f
45629 /* This function returns the calling abi specific va_list type node.
45630 It returns the FNDECL specific va_list type. */
45631
45632 static tree
45633 ix86_fn_abi_va_list (tree fndecl)
45634 {
45635 if (!TARGET_64BIT)
45636 return va_list_type_node;
45637 gcc_assert (fndecl != NULL_TREE);
45638
45639 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
45640 return ms_va_list_type_node;
45641 else
45642 return sysv_va_list_type_node;
45643 }
45644
45645 /* Returns the canonical va_list type specified by TYPE. If there
45646 is no valid TYPE provided, it return NULL_TREE. */
45647
45648 static tree
45649 ix86_canonical_va_list_type (tree type)
45650 {
45651 tree wtype, htype;
45652
45653 /* Resolve references and pointers to va_list type. */
45654 if (TREE_CODE (type) == MEM_REF)
45655 type = TREE_TYPE (type);
45656 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
45657 type = TREE_TYPE (type);
45658 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
45659 type = TREE_TYPE (type);
45660
45661 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
45662 {
45663 wtype = va_list_type_node;
45664 gcc_assert (wtype != NULL_TREE);
45665 htype = type;
45666 if (TREE_CODE (wtype) == ARRAY_TYPE)
45667 {
45668 /* If va_list is an array type, the argument may have decayed
45669 to a pointer type, e.g. by being passed to another function.
45670 In that case, unwrap both types so that we can compare the
45671 underlying records. */
45672 if (TREE_CODE (htype) == ARRAY_TYPE
45673 || POINTER_TYPE_P (htype))
45674 {
45675 wtype = TREE_TYPE (wtype);
45676 htype = TREE_TYPE (htype);
45677 }
45678 }
45679 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
45680 return va_list_type_node;
45681 wtype = sysv_va_list_type_node;
45682 gcc_assert (wtype != NULL_TREE);
45683 htype = type;
45684 if (TREE_CODE (wtype) == ARRAY_TYPE)
45685 {
45686 /* If va_list is an array type, the argument may have decayed
45687 to a pointer type, e.g. by being passed to another function.
45688 In that case, unwrap both types so that we can compare the
45689 underlying records. */
45690 if (TREE_CODE (htype) == ARRAY_TYPE
45691 || POINTER_TYPE_P (htype))
45692 {
45693 wtype = TREE_TYPE (wtype);
45694 htype = TREE_TYPE (htype);
45695 }
45696 }
45697 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
45698 return sysv_va_list_type_node;
45699 wtype = ms_va_list_type_node;
45700 gcc_assert (wtype != NULL_TREE);
45701 htype = type;
45702 if (TREE_CODE (wtype) == ARRAY_TYPE)
45703 {
45704 /* If va_list is an array type, the argument may have decayed
45705 to a pointer type, e.g. by being passed to another function.
45706 In that case, unwrap both types so that we can compare the
45707 underlying records. */
45708 if (TREE_CODE (htype) == ARRAY_TYPE
45709 || POINTER_TYPE_P (htype))
45710 {
45711 wtype = TREE_TYPE (wtype);
45712 htype = TREE_TYPE (htype);
45713 }
45714 }
45715 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
45716 return ms_va_list_type_node;
45717 return NULL_TREE;
45718 }
45719 return std_canonical_va_list_type (type);
45720 }
45721
45722 /* Iterate through the target-specific builtin types for va_list.
45723 IDX denotes the iterator, *PTREE is set to the result type of
45724 the va_list builtin, and *PNAME to its internal type.
45725 Returns zero if there is no element for this index, otherwise
45726 IDX should be increased upon the next call.
45727 Note, do not iterate a base builtin's name like __builtin_va_list.
45728 Used from c_common_nodes_and_builtins. */
45729
45730 static int
45731 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
45732 {
45733 if (TARGET_64BIT)
45734 {
45735 switch (idx)
45736 {
45737 default:
45738 break;
45739
45740 case 0:
45741 *ptree = ms_va_list_type_node;
45742 *pname = "__builtin_ms_va_list";
45743 return 1;
45744
45745 case 1:
45746 *ptree = sysv_va_list_type_node;
45747 *pname = "__builtin_sysv_va_list";
45748 return 1;
45749 }
45750 }
45751
45752 return 0;
45753 }
45754
45755 #undef TARGET_SCHED_DISPATCH
45756 #define TARGET_SCHED_DISPATCH has_dispatch
45757 #undef TARGET_SCHED_DISPATCH_DO
45758 #define TARGET_SCHED_DISPATCH_DO do_dispatch
45759 #undef TARGET_SCHED_REASSOCIATION_WIDTH
45760 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
45761 #undef TARGET_SCHED_REORDER
45762 #define TARGET_SCHED_REORDER ix86_sched_reorder
45763 #undef TARGET_SCHED_ADJUST_PRIORITY
45764 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
45765 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
45766 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
45767 ix86_dependencies_evaluation_hook
45768
45769 /* The size of the dispatch window is the total number of bytes of
45770 object code allowed in a window. */
45771 #define DISPATCH_WINDOW_SIZE 16
45772
45773 /* Number of dispatch windows considered for scheduling. */
45774 #define MAX_DISPATCH_WINDOWS 3
45775
45776 /* Maximum number of instructions in a window. */
45777 #define MAX_INSN 4
45778
45779 /* Maximum number of immediate operands in a window. */
45780 #define MAX_IMM 4
45781
45782 /* Maximum number of immediate bits allowed in a window. */
45783 #define MAX_IMM_SIZE 128
45784
45785 /* Maximum number of 32 bit immediates allowed in a window. */
45786 #define MAX_IMM_32 4
45787
45788 /* Maximum number of 64 bit immediates allowed in a window. */
45789 #define MAX_IMM_64 2
45790
45791 /* Maximum total of loads or prefetches allowed in a window. */
45792 #define MAX_LOAD 2
45793
45794 /* Maximum total of stores allowed in a window. */
45795 #define MAX_STORE 1
45796
45797 #undef BIG
45798 #define BIG 100
45799
45800
45801 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
45802 enum dispatch_group {
45803 disp_no_group = 0,
45804 disp_load,
45805 disp_store,
45806 disp_load_store,
45807 disp_prefetch,
45808 disp_imm,
45809 disp_imm_32,
45810 disp_imm_64,
45811 disp_branch,
45812 disp_cmp,
45813 disp_jcc,
45814 disp_last
45815 };
45816
45817 /* Number of allowable groups in a dispatch window. It is an array
45818 indexed by dispatch_group enum. 100 is used as a big number,
45819 because the number of these kind of operations does not have any
45820 effect in dispatch window, but we need them for other reasons in
45821 the table. */
45822 static unsigned int num_allowable_groups[disp_last] = {
45823 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
45824 };
45825
45826 char group_name[disp_last + 1][16] = {
45827 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
45828 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
45829 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
45830 };
45831
45832 /* Instruction path. */
45833 enum insn_path {
45834 no_path = 0,
45835 path_single, /* Single micro op. */
45836 path_double, /* Double micro op. */
45837 path_multi, /* Instructions with more than 2 micro op.. */
45838 last_path
45839 };
45840
45841 /* sched_insn_info defines a window to the instructions scheduled in
45842 the basic block. It contains a pointer to the insn_info table and
45843 the instruction scheduled.
45844
45845 Windows are allocated for each basic block and are linked
45846 together. */
45847 typedef struct sched_insn_info_s {
45848 rtx insn;
45849 enum dispatch_group group;
45850 enum insn_path path;
45851 int byte_len;
45852 int imm_bytes;
45853 } sched_insn_info;
45854
45855 /* Linked list of dispatch windows. This is a two way list of
45856 dispatch windows of a basic block. It contains information about
45857 the number of uops in the window and the total number of
45858 instructions and of bytes in the object code for this dispatch
45859 window. */
45860 typedef struct dispatch_windows_s {
45861 int num_insn; /* Number of insn in the window. */
45862 int num_uops; /* Number of uops in the window. */
45863 int window_size; /* Number of bytes in the window. */
45864 int window_num; /* Window number between 0 or 1. */
45865 int num_imm; /* Number of immediates in an insn. */
45866 int num_imm_32; /* Number of 32 bit immediates in an insn. */
45867 int num_imm_64; /* Number of 64 bit immediates in an insn. */
45868 int imm_size; /* Total immediates in the window. */
45869 int num_loads; /* Total memory loads in the window. */
45870 int num_stores; /* Total memory stores in the window. */
45871 int violation; /* Violation exists in window. */
45872 sched_insn_info *window; /* Pointer to the window. */
45873 struct dispatch_windows_s *next;
45874 struct dispatch_windows_s *prev;
45875 } dispatch_windows;
45876
45877 /* Immediate valuse used in an insn. */
45878 typedef struct imm_info_s
45879 {
45880 int imm;
45881 int imm32;
45882 int imm64;
45883 } imm_info;
45884
45885 static dispatch_windows *dispatch_window_list;
45886 static dispatch_windows *dispatch_window_list1;
45887
45888 /* Get dispatch group of insn. */
45889
45890 static enum dispatch_group
45891 get_mem_group (rtx_insn *insn)
45892 {
45893 enum attr_memory memory;
45894
45895 if (INSN_CODE (insn) < 0)
45896 return disp_no_group;
45897 memory = get_attr_memory (insn);
45898 if (memory == MEMORY_STORE)
45899 return disp_store;
45900
45901 if (memory == MEMORY_LOAD)
45902 return disp_load;
45903
45904 if (memory == MEMORY_BOTH)
45905 return disp_load_store;
45906
45907 return disp_no_group;
45908 }
45909
45910 /* Return true if insn is a compare instruction. */
45911
45912 static bool
45913 is_cmp (rtx_insn *insn)
45914 {
45915 enum attr_type type;
45916
45917 type = get_attr_type (insn);
45918 return (type == TYPE_TEST
45919 || type == TYPE_ICMP
45920 || type == TYPE_FCMP
45921 || GET_CODE (PATTERN (insn)) == COMPARE);
45922 }
45923
45924 /* Return true if a dispatch violation encountered. */
45925
45926 static bool
45927 dispatch_violation (void)
45928 {
45929 if (dispatch_window_list->next)
45930 return dispatch_window_list->next->violation;
45931 return dispatch_window_list->violation;
45932 }
45933
45934 /* Return true if insn is a branch instruction. */
45935
45936 static bool
45937 is_branch (rtx insn)
45938 {
45939 return (CALL_P (insn) || JUMP_P (insn));
45940 }
45941
45942 /* Return true if insn is a prefetch instruction. */
45943
45944 static bool
45945 is_prefetch (rtx insn)
45946 {
45947 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
45948 }
45949
45950 /* This function initializes a dispatch window and the list container holding a
45951 pointer to the window. */
45952
45953 static void
45954 init_window (int window_num)
45955 {
45956 int i;
45957 dispatch_windows *new_list;
45958
45959 if (window_num == 0)
45960 new_list = dispatch_window_list;
45961 else
45962 new_list = dispatch_window_list1;
45963
45964 new_list->num_insn = 0;
45965 new_list->num_uops = 0;
45966 new_list->window_size = 0;
45967 new_list->next = NULL;
45968 new_list->prev = NULL;
45969 new_list->window_num = window_num;
45970 new_list->num_imm = 0;
45971 new_list->num_imm_32 = 0;
45972 new_list->num_imm_64 = 0;
45973 new_list->imm_size = 0;
45974 new_list->num_loads = 0;
45975 new_list->num_stores = 0;
45976 new_list->violation = false;
45977
45978 for (i = 0; i < MAX_INSN; i++)
45979 {
45980 new_list->window[i].insn = NULL;
45981 new_list->window[i].group = disp_no_group;
45982 new_list->window[i].path = no_path;
45983 new_list->window[i].byte_len = 0;
45984 new_list->window[i].imm_bytes = 0;
45985 }
45986 return;
45987 }
45988
45989 /* This function allocates and initializes a dispatch window and the
45990 list container holding a pointer to the window. */
45991
45992 static dispatch_windows *
45993 allocate_window (void)
45994 {
45995 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
45996 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
45997
45998 return new_list;
45999 }
46000
46001 /* This routine initializes the dispatch scheduling information. It
46002 initiates building dispatch scheduler tables and constructs the
46003 first dispatch window. */
46004
46005 static void
46006 init_dispatch_sched (void)
46007 {
46008 /* Allocate a dispatch list and a window. */
46009 dispatch_window_list = allocate_window ();
46010 dispatch_window_list1 = allocate_window ();
46011 init_window (0);
46012 init_window (1);
46013 }
46014
46015 /* This function returns true if a branch is detected. End of a basic block
46016 does not have to be a branch, but here we assume only branches end a
46017 window. */
46018
46019 static bool
46020 is_end_basic_block (enum dispatch_group group)
46021 {
46022 return group == disp_branch;
46023 }
46024
46025 /* This function is called when the end of a window processing is reached. */
46026
46027 static void
46028 process_end_window (void)
46029 {
46030 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
46031 if (dispatch_window_list->next)
46032 {
46033 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
46034 gcc_assert (dispatch_window_list->window_size
46035 + dispatch_window_list1->window_size <= 48);
46036 init_window (1);
46037 }
46038 init_window (0);
46039 }
46040
46041 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
46042 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
46043 for 48 bytes of instructions. Note that these windows are not dispatch
46044 windows that their sizes are DISPATCH_WINDOW_SIZE. */
46045
46046 static dispatch_windows *
46047 allocate_next_window (int window_num)
46048 {
46049 if (window_num == 0)
46050 {
46051 if (dispatch_window_list->next)
46052 init_window (1);
46053 init_window (0);
46054 return dispatch_window_list;
46055 }
46056
46057 dispatch_window_list->next = dispatch_window_list1;
46058 dispatch_window_list1->prev = dispatch_window_list;
46059
46060 return dispatch_window_list1;
46061 }
46062
46063 /* Increment the number of immediate operands of an instruction. */
46064
46065 static int
46066 find_constant_1 (rtx *in_rtx, imm_info *imm_values)
46067 {
46068 if (*in_rtx == 0)
46069 return 0;
46070
46071 switch ( GET_CODE (*in_rtx))
46072 {
46073 case CONST:
46074 case SYMBOL_REF:
46075 case CONST_INT:
46076 (imm_values->imm)++;
46077 if (x86_64_immediate_operand (*in_rtx, SImode))
46078 (imm_values->imm32)++;
46079 else
46080 (imm_values->imm64)++;
46081 break;
46082
46083 case CONST_DOUBLE:
46084 (imm_values->imm)++;
46085 (imm_values->imm64)++;
46086 break;
46087
46088 case CODE_LABEL:
46089 if (LABEL_KIND (*in_rtx) == LABEL_NORMAL)
46090 {
46091 (imm_values->imm)++;
46092 (imm_values->imm32)++;
46093 }
46094 break;
46095
46096 default:
46097 break;
46098 }
46099
46100 return 0;
46101 }
46102
46103 /* Compute number of immediate operands of an instruction. */
46104
46105 static void
46106 find_constant (rtx in_rtx, imm_info *imm_values)
46107 {
46108 for_each_rtx (INSN_P (in_rtx) ? &PATTERN (in_rtx) : &in_rtx,
46109 (rtx_function) find_constant_1, (void *) imm_values);
46110 }
46111
46112 /* Return total size of immediate operands of an instruction along with number
46113 of corresponding immediate-operands. It initializes its parameters to zero
46114 befor calling FIND_CONSTANT.
46115 INSN is the input instruction. IMM is the total of immediates.
46116 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
46117 bit immediates. */
46118
46119 static int
46120 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
46121 {
46122 imm_info imm_values = {0, 0, 0};
46123
46124 find_constant (insn, &imm_values);
46125 *imm = imm_values.imm;
46126 *imm32 = imm_values.imm32;
46127 *imm64 = imm_values.imm64;
46128 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
46129 }
46130
46131 /* This function indicates if an operand of an instruction is an
46132 immediate. */
46133
46134 static bool
46135 has_immediate (rtx insn)
46136 {
46137 int num_imm_operand;
46138 int num_imm32_operand;
46139 int num_imm64_operand;
46140
46141 if (insn)
46142 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
46143 &num_imm64_operand);
46144 return false;
46145 }
46146
46147 /* Return single or double path for instructions. */
46148
46149 static enum insn_path
46150 get_insn_path (rtx_insn *insn)
46151 {
46152 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
46153
46154 if ((int)path == 0)
46155 return path_single;
46156
46157 if ((int)path == 1)
46158 return path_double;
46159
46160 return path_multi;
46161 }
46162
46163 /* Return insn dispatch group. */
46164
46165 static enum dispatch_group
46166 get_insn_group (rtx_insn *insn)
46167 {
46168 enum dispatch_group group = get_mem_group (insn);
46169 if (group)
46170 return group;
46171
46172 if (is_branch (insn))
46173 return disp_branch;
46174
46175 if (is_cmp (insn))
46176 return disp_cmp;
46177
46178 if (has_immediate (insn))
46179 return disp_imm;
46180
46181 if (is_prefetch (insn))
46182 return disp_prefetch;
46183
46184 return disp_no_group;
46185 }
46186
46187 /* Count number of GROUP restricted instructions in a dispatch
46188 window WINDOW_LIST. */
46189
46190 static int
46191 count_num_restricted (rtx_insn *insn, dispatch_windows *window_list)
46192 {
46193 enum dispatch_group group = get_insn_group (insn);
46194 int imm_size;
46195 int num_imm_operand;
46196 int num_imm32_operand;
46197 int num_imm64_operand;
46198
46199 if (group == disp_no_group)
46200 return 0;
46201
46202 if (group == disp_imm)
46203 {
46204 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
46205 &num_imm64_operand);
46206 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
46207 || num_imm_operand + window_list->num_imm > MAX_IMM
46208 || (num_imm32_operand > 0
46209 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
46210 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
46211 || (num_imm64_operand > 0
46212 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
46213 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
46214 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
46215 && num_imm64_operand > 0
46216 && ((window_list->num_imm_64 > 0
46217 && window_list->num_insn >= 2)
46218 || window_list->num_insn >= 3)))
46219 return BIG;
46220
46221 return 1;
46222 }
46223
46224 if ((group == disp_load_store
46225 && (window_list->num_loads >= MAX_LOAD
46226 || window_list->num_stores >= MAX_STORE))
46227 || ((group == disp_load
46228 || group == disp_prefetch)
46229 && window_list->num_loads >= MAX_LOAD)
46230 || (group == disp_store
46231 && window_list->num_stores >= MAX_STORE))
46232 return BIG;
46233
46234 return 1;
46235 }
46236
46237 /* This function returns true if insn satisfies dispatch rules on the
46238 last window scheduled. */
46239
46240 static bool
46241 fits_dispatch_window (rtx_insn *insn)
46242 {
46243 dispatch_windows *window_list = dispatch_window_list;
46244 dispatch_windows *window_list_next = dispatch_window_list->next;
46245 unsigned int num_restrict;
46246 enum dispatch_group group = get_insn_group (insn);
46247 enum insn_path path = get_insn_path (insn);
46248 int sum;
46249
46250 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
46251 instructions should be given the lowest priority in the
46252 scheduling process in Haifa scheduler to make sure they will be
46253 scheduled in the same dispatch window as the reference to them. */
46254 if (group == disp_jcc || group == disp_cmp)
46255 return false;
46256
46257 /* Check nonrestricted. */
46258 if (group == disp_no_group || group == disp_branch)
46259 return true;
46260
46261 /* Get last dispatch window. */
46262 if (window_list_next)
46263 window_list = window_list_next;
46264
46265 if (window_list->window_num == 1)
46266 {
46267 sum = window_list->prev->window_size + window_list->window_size;
46268
46269 if (sum == 32
46270 || (min_insn_size (insn) + sum) >= 48)
46271 /* Window 1 is full. Go for next window. */
46272 return true;
46273 }
46274
46275 num_restrict = count_num_restricted (insn, window_list);
46276
46277 if (num_restrict > num_allowable_groups[group])
46278 return false;
46279
46280 /* See if it fits in the first window. */
46281 if (window_list->window_num == 0)
46282 {
46283 /* The first widow should have only single and double path
46284 uops. */
46285 if (path == path_double
46286 && (window_list->num_uops + 2) > MAX_INSN)
46287 return false;
46288 else if (path != path_single)
46289 return false;
46290 }
46291 return true;
46292 }
46293
46294 /* Add an instruction INSN with NUM_UOPS micro-operations to the
46295 dispatch window WINDOW_LIST. */
46296
46297 static void
46298 add_insn_window (rtx_insn *insn, dispatch_windows *window_list, int num_uops)
46299 {
46300 int byte_len = min_insn_size (insn);
46301 int num_insn = window_list->num_insn;
46302 int imm_size;
46303 sched_insn_info *window = window_list->window;
46304 enum dispatch_group group = get_insn_group (insn);
46305 enum insn_path path = get_insn_path (insn);
46306 int num_imm_operand;
46307 int num_imm32_operand;
46308 int num_imm64_operand;
46309
46310 if (!window_list->violation && group != disp_cmp
46311 && !fits_dispatch_window (insn))
46312 window_list->violation = true;
46313
46314 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
46315 &num_imm64_operand);
46316
46317 /* Initialize window with new instruction. */
46318 window[num_insn].insn = insn;
46319 window[num_insn].byte_len = byte_len;
46320 window[num_insn].group = group;
46321 window[num_insn].path = path;
46322 window[num_insn].imm_bytes = imm_size;
46323
46324 window_list->window_size += byte_len;
46325 window_list->num_insn = num_insn + 1;
46326 window_list->num_uops = window_list->num_uops + num_uops;
46327 window_list->imm_size += imm_size;
46328 window_list->num_imm += num_imm_operand;
46329 window_list->num_imm_32 += num_imm32_operand;
46330 window_list->num_imm_64 += num_imm64_operand;
46331
46332 if (group == disp_store)
46333 window_list->num_stores += 1;
46334 else if (group == disp_load
46335 || group == disp_prefetch)
46336 window_list->num_loads += 1;
46337 else if (group == disp_load_store)
46338 {
46339 window_list->num_stores += 1;
46340 window_list->num_loads += 1;
46341 }
46342 }
46343
46344 /* Adds a scheduled instruction, INSN, to the current dispatch window.
46345 If the total bytes of instructions or the number of instructions in
46346 the window exceed allowable, it allocates a new window. */
46347
46348 static void
46349 add_to_dispatch_window (rtx_insn *insn)
46350 {
46351 int byte_len;
46352 dispatch_windows *window_list;
46353 dispatch_windows *next_list;
46354 dispatch_windows *window0_list;
46355 enum insn_path path;
46356 enum dispatch_group insn_group;
46357 bool insn_fits;
46358 int num_insn;
46359 int num_uops;
46360 int window_num;
46361 int insn_num_uops;
46362 int sum;
46363
46364 if (INSN_CODE (insn) < 0)
46365 return;
46366
46367 byte_len = min_insn_size (insn);
46368 window_list = dispatch_window_list;
46369 next_list = window_list->next;
46370 path = get_insn_path (insn);
46371 insn_group = get_insn_group (insn);
46372
46373 /* Get the last dispatch window. */
46374 if (next_list)
46375 window_list = dispatch_window_list->next;
46376
46377 if (path == path_single)
46378 insn_num_uops = 1;
46379 else if (path == path_double)
46380 insn_num_uops = 2;
46381 else
46382 insn_num_uops = (int) path;
46383
46384 /* If current window is full, get a new window.
46385 Window number zero is full, if MAX_INSN uops are scheduled in it.
46386 Window number one is full, if window zero's bytes plus window
46387 one's bytes is 32, or if the bytes of the new instruction added
46388 to the total makes it greater than 48, or it has already MAX_INSN
46389 instructions in it. */
46390 num_insn = window_list->num_insn;
46391 num_uops = window_list->num_uops;
46392 window_num = window_list->window_num;
46393 insn_fits = fits_dispatch_window (insn);
46394
46395 if (num_insn >= MAX_INSN
46396 || num_uops + insn_num_uops > MAX_INSN
46397 || !(insn_fits))
46398 {
46399 window_num = ~window_num & 1;
46400 window_list = allocate_next_window (window_num);
46401 }
46402
46403 if (window_num == 0)
46404 {
46405 add_insn_window (insn, window_list, insn_num_uops);
46406 if (window_list->num_insn >= MAX_INSN
46407 && insn_group == disp_branch)
46408 {
46409 process_end_window ();
46410 return;
46411 }
46412 }
46413 else if (window_num == 1)
46414 {
46415 window0_list = window_list->prev;
46416 sum = window0_list->window_size + window_list->window_size;
46417 if (sum == 32
46418 || (byte_len + sum) >= 48)
46419 {
46420 process_end_window ();
46421 window_list = dispatch_window_list;
46422 }
46423
46424 add_insn_window (insn, window_list, insn_num_uops);
46425 }
46426 else
46427 gcc_unreachable ();
46428
46429 if (is_end_basic_block (insn_group))
46430 {
46431 /* End of basic block is reached do end-basic-block process. */
46432 process_end_window ();
46433 return;
46434 }
46435 }
46436
46437 /* Print the dispatch window, WINDOW_NUM, to FILE. */
46438
46439 DEBUG_FUNCTION static void
46440 debug_dispatch_window_file (FILE *file, int window_num)
46441 {
46442 dispatch_windows *list;
46443 int i;
46444
46445 if (window_num == 0)
46446 list = dispatch_window_list;
46447 else
46448 list = dispatch_window_list1;
46449
46450 fprintf (file, "Window #%d:\n", list->window_num);
46451 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
46452 list->num_insn, list->num_uops, list->window_size);
46453 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
46454 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
46455
46456 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
46457 list->num_stores);
46458 fprintf (file, " insn info:\n");
46459
46460 for (i = 0; i < MAX_INSN; i++)
46461 {
46462 if (!list->window[i].insn)
46463 break;
46464 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
46465 i, group_name[list->window[i].group],
46466 i, (void *)list->window[i].insn,
46467 i, list->window[i].path,
46468 i, list->window[i].byte_len,
46469 i, list->window[i].imm_bytes);
46470 }
46471 }
46472
46473 /* Print to stdout a dispatch window. */
46474
46475 DEBUG_FUNCTION void
46476 debug_dispatch_window (int window_num)
46477 {
46478 debug_dispatch_window_file (stdout, window_num);
46479 }
46480
46481 /* Print INSN dispatch information to FILE. */
46482
46483 DEBUG_FUNCTION static void
46484 debug_insn_dispatch_info_file (FILE *file, rtx_insn *insn)
46485 {
46486 int byte_len;
46487 enum insn_path path;
46488 enum dispatch_group group;
46489 int imm_size;
46490 int num_imm_operand;
46491 int num_imm32_operand;
46492 int num_imm64_operand;
46493
46494 if (INSN_CODE (insn) < 0)
46495 return;
46496
46497 byte_len = min_insn_size (insn);
46498 path = get_insn_path (insn);
46499 group = get_insn_group (insn);
46500 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
46501 &num_imm64_operand);
46502
46503 fprintf (file, " insn info:\n");
46504 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
46505 group_name[group], path, byte_len);
46506 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
46507 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
46508 }
46509
46510 /* Print to STDERR the status of the ready list with respect to
46511 dispatch windows. */
46512
46513 DEBUG_FUNCTION void
46514 debug_ready_dispatch (void)
46515 {
46516 int i;
46517 int no_ready = number_in_ready ();
46518
46519 fprintf (stdout, "Number of ready: %d\n", no_ready);
46520
46521 for (i = 0; i < no_ready; i++)
46522 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
46523 }
46524
46525 /* This routine is the driver of the dispatch scheduler. */
46526
46527 static void
46528 do_dispatch (rtx_insn *insn, int mode)
46529 {
46530 if (mode == DISPATCH_INIT)
46531 init_dispatch_sched ();
46532 else if (mode == ADD_TO_DISPATCH_WINDOW)
46533 add_to_dispatch_window (insn);
46534 }
46535
46536 /* Return TRUE if Dispatch Scheduling is supported. */
46537
46538 static bool
46539 has_dispatch (rtx_insn *insn, int action)
46540 {
46541 if ((TARGET_BDVER1 || TARGET_BDVER2 || TARGET_BDVER3 || TARGET_BDVER4)
46542 && flag_dispatch_scheduler)
46543 switch (action)
46544 {
46545 default:
46546 return false;
46547
46548 case IS_DISPATCH_ON:
46549 return true;
46550 break;
46551
46552 case IS_CMP:
46553 return is_cmp (insn);
46554
46555 case DISPATCH_VIOLATION:
46556 return dispatch_violation ();
46557
46558 case FITS_DISPATCH_WINDOW:
46559 return fits_dispatch_window (insn);
46560 }
46561
46562 return false;
46563 }
46564
46565 /* Implementation of reassociation_width target hook used by
46566 reassoc phase to identify parallelism level in reassociated
46567 tree. Statements tree_code is passed in OPC. Arguments type
46568 is passed in MODE.
46569
46570 Currently parallel reassociation is enabled for Atom
46571 processors only and we set reassociation width to be 2
46572 because Atom may issue up to 2 instructions per cycle.
46573
46574 Return value should be fixed if parallel reassociation is
46575 enabled for other processors. */
46576
46577 static int
46578 ix86_reassociation_width (unsigned int, enum machine_mode mode)
46579 {
46580 int res = 1;
46581
46582 /* Vector part. */
46583 if (VECTOR_MODE_P (mode))
46584 {
46585 if (TARGET_VECTOR_PARALLEL_EXECUTION)
46586 return 2;
46587 else
46588 return 1;
46589 }
46590
46591 /* Scalar part. */
46592 if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL)
46593 res = 2;
46594 else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL)
46595 res = 2;
46596
46597 return res;
46598 }
46599
46600 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
46601 place emms and femms instructions. */
46602
46603 static enum machine_mode
46604 ix86_preferred_simd_mode (enum machine_mode mode)
46605 {
46606 if (!TARGET_SSE)
46607 return word_mode;
46608
46609 switch (mode)
46610 {
46611 case QImode:
46612 return TARGET_AVX512BW ? V64QImode :
46613 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V32QImode : V16QImode;
46614 case HImode:
46615 return TARGET_AVX512BW ? V32HImode :
46616 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V16HImode : V8HImode;
46617 case SImode:
46618 return TARGET_AVX512F ? V16SImode :
46619 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode;
46620 case DImode:
46621 return TARGET_AVX512F ? V8DImode :
46622 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V4DImode : V2DImode;
46623
46624 case SFmode:
46625 if (TARGET_AVX512F)
46626 return V16SFmode;
46627 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
46628 return V8SFmode;
46629 else
46630 return V4SFmode;
46631
46632 case DFmode:
46633 if (!TARGET_VECTORIZE_DOUBLE)
46634 return word_mode;
46635 else if (TARGET_AVX512F)
46636 return V8DFmode;
46637 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
46638 return V4DFmode;
46639 else if (TARGET_SSE2)
46640 return V2DFmode;
46641 /* FALLTHRU */
46642
46643 default:
46644 return word_mode;
46645 }
46646 }
46647
46648 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
46649 vectors. If AVX512F is enabled then try vectorizing with 512bit,
46650 256bit and 128bit vectors. */
46651
46652 static unsigned int
46653 ix86_autovectorize_vector_sizes (void)
46654 {
46655 return TARGET_AVX512F ? 64 | 32 | 16 :
46656 (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
46657 }
46658
46659 \f
46660
46661 /* Return class of registers which could be used for pseudo of MODE
46662 and of class RCLASS for spilling instead of memory. Return NO_REGS
46663 if it is not possible or non-profitable. */
46664 static reg_class_t
46665 ix86_spill_class (reg_class_t rclass, enum machine_mode mode)
46666 {
46667 if (TARGET_SSE && TARGET_GENERAL_REGS_SSE_SPILL && ! TARGET_MMX
46668 && (mode == SImode || (TARGET_64BIT && mode == DImode))
46669 && rclass != NO_REGS && INTEGER_CLASS_P (rclass))
46670 return ALL_SSE_REGS;
46671 return NO_REGS;
46672 }
46673
46674 /* Implement targetm.vectorize.init_cost. */
46675
46676 static void *
46677 ix86_init_cost (struct loop *)
46678 {
46679 unsigned *cost = XNEWVEC (unsigned, 3);
46680 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
46681 return cost;
46682 }
46683
46684 /* Implement targetm.vectorize.add_stmt_cost. */
46685
46686 static unsigned
46687 ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
46688 struct _stmt_vec_info *stmt_info, int misalign,
46689 enum vect_cost_model_location where)
46690 {
46691 unsigned *cost = (unsigned *) data;
46692 unsigned retval = 0;
46693
46694 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
46695 int stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
46696
46697 /* Statements in an inner loop relative to the loop being
46698 vectorized are weighted more heavily. The value here is
46699 arbitrary and could potentially be improved with analysis. */
46700 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
46701 count *= 50; /* FIXME. */
46702
46703 retval = (unsigned) (count * stmt_cost);
46704
46705 /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
46706 for Silvermont as it has out of order integer pipeline and can execute
46707 2 scalar instruction per tick, but has in order SIMD pipeline. */
46708 if (TARGET_SILVERMONT || TARGET_INTEL)
46709 if (stmt_info && stmt_info->stmt)
46710 {
46711 tree lhs_op = gimple_get_lhs (stmt_info->stmt);
46712 if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
46713 retval = (retval * 17) / 10;
46714 }
46715
46716 cost[where] += retval;
46717
46718 return retval;
46719 }
46720
46721 /* Implement targetm.vectorize.finish_cost. */
46722
46723 static void
46724 ix86_finish_cost (void *data, unsigned *prologue_cost,
46725 unsigned *body_cost, unsigned *epilogue_cost)
46726 {
46727 unsigned *cost = (unsigned *) data;
46728 *prologue_cost = cost[vect_prologue];
46729 *body_cost = cost[vect_body];
46730 *epilogue_cost = cost[vect_epilogue];
46731 }
46732
46733 /* Implement targetm.vectorize.destroy_cost_data. */
46734
46735 static void
46736 ix86_destroy_cost_data (void *data)
46737 {
46738 free (data);
46739 }
46740
46741 /* Validate target specific memory model bits in VAL. */
46742
46743 static unsigned HOST_WIDE_INT
46744 ix86_memmodel_check (unsigned HOST_WIDE_INT val)
46745 {
46746 unsigned HOST_WIDE_INT model = val & MEMMODEL_MASK;
46747 bool strong;
46748
46749 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
46750 |MEMMODEL_MASK)
46751 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
46752 {
46753 warning (OPT_Winvalid_memory_model,
46754 "Unknown architecture specific memory model");
46755 return MEMMODEL_SEQ_CST;
46756 }
46757 strong = (model == MEMMODEL_ACQ_REL || model == MEMMODEL_SEQ_CST);
46758 if (val & IX86_HLE_ACQUIRE && !(model == MEMMODEL_ACQUIRE || strong))
46759 {
46760 warning (OPT_Winvalid_memory_model,
46761 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
46762 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
46763 }
46764 if (val & IX86_HLE_RELEASE && !(model == MEMMODEL_RELEASE || strong))
46765 {
46766 warning (OPT_Winvalid_memory_model,
46767 "HLE_RELEASE not used with RELEASE or stronger memory model");
46768 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
46769 }
46770 return val;
46771 }
46772
46773 /* Set CLONEI->vecsize_mangle, CLONEI->vecsize_int,
46774 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
46775 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
46776 or number of vecsize_mangle variants that should be emitted. */
46777
46778 static int
46779 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
46780 struct cgraph_simd_clone *clonei,
46781 tree base_type, int num)
46782 {
46783 int ret = 1;
46784
46785 if (clonei->simdlen
46786 && (clonei->simdlen < 2
46787 || clonei->simdlen > 16
46788 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
46789 {
46790 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
46791 "unsupported simdlen %d", clonei->simdlen);
46792 return 0;
46793 }
46794
46795 tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
46796 if (TREE_CODE (ret_type) != VOID_TYPE)
46797 switch (TYPE_MODE (ret_type))
46798 {
46799 case QImode:
46800 case HImode:
46801 case SImode:
46802 case DImode:
46803 case SFmode:
46804 case DFmode:
46805 /* case SCmode: */
46806 /* case DCmode: */
46807 break;
46808 default:
46809 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
46810 "unsupported return type %qT for simd\n", ret_type);
46811 return 0;
46812 }
46813
46814 tree t;
46815 int i;
46816
46817 for (t = DECL_ARGUMENTS (node->decl), i = 0; t; t = DECL_CHAIN (t), i++)
46818 /* FIXME: Shouldn't we allow such arguments if they are uniform? */
46819 switch (TYPE_MODE (TREE_TYPE (t)))
46820 {
46821 case QImode:
46822 case HImode:
46823 case SImode:
46824 case DImode:
46825 case SFmode:
46826 case DFmode:
46827 /* case SCmode: */
46828 /* case DCmode: */
46829 break;
46830 default:
46831 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
46832 "unsupported argument type %qT for simd\n", TREE_TYPE (t));
46833 return 0;
46834 }
46835
46836 if (clonei->cilk_elemental)
46837 {
46838 /* Parse here processor clause. If not present, default to 'b'. */
46839 clonei->vecsize_mangle = 'b';
46840 }
46841 else if (!TREE_PUBLIC (node->decl))
46842 {
46843 /* If the function isn't exported, we can pick up just one ISA
46844 for the clones. */
46845 if (TARGET_AVX2)
46846 clonei->vecsize_mangle = 'd';
46847 else if (TARGET_AVX)
46848 clonei->vecsize_mangle = 'c';
46849 else
46850 clonei->vecsize_mangle = 'b';
46851 ret = 1;
46852 }
46853 else
46854 {
46855 clonei->vecsize_mangle = "bcd"[num];
46856 ret = 3;
46857 }
46858 switch (clonei->vecsize_mangle)
46859 {
46860 case 'b':
46861 clonei->vecsize_int = 128;
46862 clonei->vecsize_float = 128;
46863 break;
46864 case 'c':
46865 clonei->vecsize_int = 128;
46866 clonei->vecsize_float = 256;
46867 break;
46868 case 'd':
46869 clonei->vecsize_int = 256;
46870 clonei->vecsize_float = 256;
46871 break;
46872 }
46873 if (clonei->simdlen == 0)
46874 {
46875 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
46876 clonei->simdlen = clonei->vecsize_int;
46877 else
46878 clonei->simdlen = clonei->vecsize_float;
46879 clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
46880 if (clonei->simdlen > 16)
46881 clonei->simdlen = 16;
46882 }
46883 return ret;
46884 }
46885
46886 /* Add target attribute to SIMD clone NODE if needed. */
46887
46888 static void
46889 ix86_simd_clone_adjust (struct cgraph_node *node)
46890 {
46891 const char *str = NULL;
46892 gcc_assert (node->decl == cfun->decl);
46893 switch (node->simdclone->vecsize_mangle)
46894 {
46895 case 'b':
46896 if (!TARGET_SSE2)
46897 str = "sse2";
46898 break;
46899 case 'c':
46900 if (!TARGET_AVX)
46901 str = "avx";
46902 break;
46903 case 'd':
46904 if (!TARGET_AVX2)
46905 str = "avx2";
46906 break;
46907 default:
46908 gcc_unreachable ();
46909 }
46910 if (str == NULL)
46911 return;
46912 push_cfun (NULL);
46913 tree args = build_tree_list (NULL_TREE, build_string (strlen (str), str));
46914 bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0);
46915 gcc_assert (ok);
46916 pop_cfun ();
46917 ix86_previous_fndecl = NULL_TREE;
46918 ix86_set_current_function (node->decl);
46919 }
46920
46921 /* If SIMD clone NODE can't be used in a vectorized loop
46922 in current function, return -1, otherwise return a badness of using it
46923 (0 if it is most desirable from vecsize_mangle point of view, 1
46924 slightly less desirable, etc.). */
46925
46926 static int
46927 ix86_simd_clone_usable (struct cgraph_node *node)
46928 {
46929 switch (node->simdclone->vecsize_mangle)
46930 {
46931 case 'b':
46932 if (!TARGET_SSE2)
46933 return -1;
46934 if (!TARGET_AVX)
46935 return 0;
46936 return TARGET_AVX2 ? 2 : 1;
46937 case 'c':
46938 if (!TARGET_AVX)
46939 return -1;
46940 return TARGET_AVX2 ? 1 : 0;
46941 break;
46942 case 'd':
46943 if (!TARGET_AVX2)
46944 return -1;
46945 return 0;
46946 default:
46947 gcc_unreachable ();
46948 }
46949 }
46950
46951 /* This function gives out the number of memory references.
46952 This value determines the unrolling factor for
46953 bdver3 and bdver4 architectures. */
46954
46955 static int
46956 ix86_loop_memcount (rtx *x, unsigned *mem_count)
46957 {
46958 if (*x != NULL_RTX && MEM_P (*x))
46959 {
46960 enum machine_mode mode;
46961 unsigned int n_words;
46962
46963 mode = GET_MODE (*x);
46964 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
46965
46966 if (n_words > 4)
46967 (*mem_count)+=2;
46968 else
46969 (*mem_count)+=1;
46970 }
46971 return 0;
46972 }
46973
46974 /* This function adjusts the unroll factor based on
46975 the hardware capabilities. For ex, bdver3 has
46976 a loop buffer which makes unrolling of smaller
46977 loops less important. This function decides the
46978 unroll factor using number of memory references
46979 (value 32 is used) as a heuristic. */
46980
46981 static unsigned
46982 ix86_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
46983 {
46984 basic_block *bbs;
46985 rtx_insn *insn;
46986 unsigned i;
46987 unsigned mem_count = 0;
46988
46989 if (!TARGET_ADJUST_UNROLL)
46990 return nunroll;
46991
46992 /* Count the number of memory references within the loop body. */
46993 bbs = get_loop_body (loop);
46994 for (i = 0; i < loop->num_nodes; i++)
46995 {
46996 for (insn = BB_HEAD (bbs[i]); insn != BB_END (bbs[i]); insn = NEXT_INSN (insn))
46997 if (NONDEBUG_INSN_P (insn))
46998 for_each_rtx_in_insn (&insn, (rtx_function) ix86_loop_memcount,
46999 &mem_count);
47000 }
47001 free (bbs);
47002
47003 if (mem_count && mem_count <=32)
47004 return 32/mem_count;
47005
47006 return nunroll;
47007 }
47008
47009
47010 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
47011
47012 static bool
47013 ix86_float_exceptions_rounding_supported_p (void)
47014 {
47015 /* For x87 floating point with standard excess precision handling,
47016 there is no adddf3 pattern (since x87 floating point only has
47017 XFmode operations) so the default hook implementation gets this
47018 wrong. */
47019 return TARGET_80387 || TARGET_SSE_MATH;
47020 }
47021
47022 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
47023
47024 static void
47025 ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
47026 {
47027 if (!TARGET_80387 && !TARGET_SSE_MATH)
47028 return;
47029 tree exceptions_var = create_tmp_var (integer_type_node, NULL);
47030 if (TARGET_80387)
47031 {
47032 tree fenv_index_type = build_index_type (size_int (6));
47033 tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
47034 tree fenv_var = create_tmp_var (fenv_type, NULL);
47035 mark_addressable (fenv_var);
47036 tree fenv_ptr = build_pointer_type (fenv_type);
47037 tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
47038 fenv_addr = fold_convert (ptr_type_node, fenv_addr);
47039 tree fnstenv = ix86_builtins[IX86_BUILTIN_FNSTENV];
47040 tree fldenv = ix86_builtins[IX86_BUILTIN_FLDENV];
47041 tree fnstsw = ix86_builtins[IX86_BUILTIN_FNSTSW];
47042 tree fnclex = ix86_builtins[IX86_BUILTIN_FNCLEX];
47043 tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
47044 tree hold_fnclex = build_call_expr (fnclex, 0);
47045 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_fnstenv,
47046 hold_fnclex);
47047 *clear = build_call_expr (fnclex, 0);
47048 tree sw_var = create_tmp_var (short_unsigned_type_node, NULL);
47049 tree fnstsw_call = build_call_expr (fnstsw, 0);
47050 tree sw_mod = build2 (MODIFY_EXPR, short_unsigned_type_node,
47051 sw_var, fnstsw_call);
47052 tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
47053 tree update_mod = build2 (MODIFY_EXPR, integer_type_node,
47054 exceptions_var, exceptions_x87);
47055 *update = build2 (COMPOUND_EXPR, integer_type_node,
47056 sw_mod, update_mod);
47057 tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
47058 *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
47059 }
47060 if (TARGET_SSE_MATH)
47061 {
47062 tree mxcsr_orig_var = create_tmp_var (unsigned_type_node, NULL);
47063 tree mxcsr_mod_var = create_tmp_var (unsigned_type_node, NULL);
47064 tree stmxcsr = ix86_builtins[IX86_BUILTIN_STMXCSR];
47065 tree ldmxcsr = ix86_builtins[IX86_BUILTIN_LDMXCSR];
47066 tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
47067 tree hold_assign_orig = build2 (MODIFY_EXPR, unsigned_type_node,
47068 mxcsr_orig_var, stmxcsr_hold_call);
47069 tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
47070 mxcsr_orig_var,
47071 build_int_cst (unsigned_type_node, 0x1f80));
47072 hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
47073 build_int_cst (unsigned_type_node, 0xffffffc0));
47074 tree hold_assign_mod = build2 (MODIFY_EXPR, unsigned_type_node,
47075 mxcsr_mod_var, hold_mod_val);
47076 tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
47077 tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
47078 hold_assign_orig, hold_assign_mod);
47079 hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
47080 ldmxcsr_hold_call);
47081 if (*hold)
47082 *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
47083 else
47084 *hold = hold_all;
47085 tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
47086 if (*clear)
47087 *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
47088 ldmxcsr_clear_call);
47089 else
47090 *clear = ldmxcsr_clear_call;
47091 tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
47092 tree exceptions_sse = fold_convert (integer_type_node,
47093 stxmcsr_update_call);
47094 if (*update)
47095 {
47096 tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
47097 exceptions_var, exceptions_sse);
47098 tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
47099 exceptions_var, exceptions_mod);
47100 *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
47101 exceptions_assign);
47102 }
47103 else
47104 *update = build2 (MODIFY_EXPR, integer_type_node,
47105 exceptions_var, exceptions_sse);
47106 tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
47107 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
47108 ldmxcsr_update_call);
47109 }
47110 tree atomic_feraiseexcept
47111 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
47112 tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
47113 1, exceptions_var);
47114 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
47115 atomic_feraiseexcept_call);
47116 }
47117
47118 /* Initialize the GCC target structure. */
47119 #undef TARGET_RETURN_IN_MEMORY
47120 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
47121
47122 #undef TARGET_LEGITIMIZE_ADDRESS
47123 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
47124
47125 #undef TARGET_ATTRIBUTE_TABLE
47126 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
47127 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
47128 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
47129 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
47130 # undef TARGET_MERGE_DECL_ATTRIBUTES
47131 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
47132 #endif
47133
47134 #undef TARGET_COMP_TYPE_ATTRIBUTES
47135 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
47136
47137 #undef TARGET_INIT_BUILTINS
47138 #define TARGET_INIT_BUILTINS ix86_init_builtins
47139 #undef TARGET_BUILTIN_DECL
47140 #define TARGET_BUILTIN_DECL ix86_builtin_decl
47141 #undef TARGET_EXPAND_BUILTIN
47142 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
47143
47144 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
47145 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
47146 ix86_builtin_vectorized_function
47147
47148 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
47149 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
47150
47151 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
47152 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
47153
47154 #undef TARGET_VECTORIZE_BUILTIN_GATHER
47155 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
47156
47157 #undef TARGET_BUILTIN_RECIPROCAL
47158 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
47159
47160 #undef TARGET_ASM_FUNCTION_EPILOGUE
47161 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
47162
47163 #undef TARGET_ENCODE_SECTION_INFO
47164 #ifndef SUBTARGET_ENCODE_SECTION_INFO
47165 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
47166 #else
47167 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
47168 #endif
47169
47170 #undef TARGET_ASM_OPEN_PAREN
47171 #define TARGET_ASM_OPEN_PAREN ""
47172 #undef TARGET_ASM_CLOSE_PAREN
47173 #define TARGET_ASM_CLOSE_PAREN ""
47174
47175 #undef TARGET_ASM_BYTE_OP
47176 #define TARGET_ASM_BYTE_OP ASM_BYTE
47177
47178 #undef TARGET_ASM_ALIGNED_HI_OP
47179 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
47180 #undef TARGET_ASM_ALIGNED_SI_OP
47181 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
47182 #ifdef ASM_QUAD
47183 #undef TARGET_ASM_ALIGNED_DI_OP
47184 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
47185 #endif
47186
47187 #undef TARGET_PROFILE_BEFORE_PROLOGUE
47188 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
47189
47190 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
47191 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
47192
47193 #undef TARGET_ASM_UNALIGNED_HI_OP
47194 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
47195 #undef TARGET_ASM_UNALIGNED_SI_OP
47196 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
47197 #undef TARGET_ASM_UNALIGNED_DI_OP
47198 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
47199
47200 #undef TARGET_PRINT_OPERAND
47201 #define TARGET_PRINT_OPERAND ix86_print_operand
47202 #undef TARGET_PRINT_OPERAND_ADDRESS
47203 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
47204 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
47205 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
47206 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
47207 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
47208
47209 #undef TARGET_SCHED_INIT_GLOBAL
47210 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
47211 #undef TARGET_SCHED_ADJUST_COST
47212 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
47213 #undef TARGET_SCHED_ISSUE_RATE
47214 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
47215 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
47216 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
47217 ia32_multipass_dfa_lookahead
47218 #undef TARGET_SCHED_MACRO_FUSION_P
47219 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
47220 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
47221 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
47222
47223 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
47224 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
47225
47226 #undef TARGET_MEMMODEL_CHECK
47227 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
47228
47229 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
47230 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
47231
47232 #ifdef HAVE_AS_TLS
47233 #undef TARGET_HAVE_TLS
47234 #define TARGET_HAVE_TLS true
47235 #endif
47236 #undef TARGET_CANNOT_FORCE_CONST_MEM
47237 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
47238 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
47239 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
47240
47241 #undef TARGET_DELEGITIMIZE_ADDRESS
47242 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
47243
47244 #undef TARGET_MS_BITFIELD_LAYOUT_P
47245 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
47246
47247 #if TARGET_MACHO
47248 #undef TARGET_BINDS_LOCAL_P
47249 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
47250 #endif
47251 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
47252 #undef TARGET_BINDS_LOCAL_P
47253 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
47254 #endif
47255
47256 #undef TARGET_ASM_OUTPUT_MI_THUNK
47257 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
47258 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
47259 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
47260
47261 #undef TARGET_ASM_FILE_START
47262 #define TARGET_ASM_FILE_START x86_file_start
47263
47264 #undef TARGET_OPTION_OVERRIDE
47265 #define TARGET_OPTION_OVERRIDE ix86_option_override
47266
47267 #undef TARGET_REGISTER_MOVE_COST
47268 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
47269 #undef TARGET_MEMORY_MOVE_COST
47270 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
47271 #undef TARGET_RTX_COSTS
47272 #define TARGET_RTX_COSTS ix86_rtx_costs
47273 #undef TARGET_ADDRESS_COST
47274 #define TARGET_ADDRESS_COST ix86_address_cost
47275
47276 #undef TARGET_FIXED_CONDITION_CODE_REGS
47277 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
47278 #undef TARGET_CC_MODES_COMPATIBLE
47279 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
47280
47281 #undef TARGET_MACHINE_DEPENDENT_REORG
47282 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
47283
47284 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
47285 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
47286
47287 #undef TARGET_BUILD_BUILTIN_VA_LIST
47288 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
47289
47290 #undef TARGET_FOLD_BUILTIN
47291 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
47292
47293 #undef TARGET_COMPARE_VERSION_PRIORITY
47294 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
47295
47296 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
47297 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
47298 ix86_generate_version_dispatcher_body
47299
47300 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
47301 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
47302 ix86_get_function_versions_dispatcher
47303
47304 #undef TARGET_ENUM_VA_LIST_P
47305 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
47306
47307 #undef TARGET_FN_ABI_VA_LIST
47308 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
47309
47310 #undef TARGET_CANONICAL_VA_LIST_TYPE
47311 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
47312
47313 #undef TARGET_EXPAND_BUILTIN_VA_START
47314 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
47315
47316 #undef TARGET_MD_ASM_CLOBBERS
47317 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
47318
47319 #undef TARGET_PROMOTE_PROTOTYPES
47320 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
47321 #undef TARGET_SETUP_INCOMING_VARARGS
47322 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
47323 #undef TARGET_MUST_PASS_IN_STACK
47324 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
47325 #undef TARGET_FUNCTION_ARG_ADVANCE
47326 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
47327 #undef TARGET_FUNCTION_ARG
47328 #define TARGET_FUNCTION_ARG ix86_function_arg
47329 #undef TARGET_FUNCTION_ARG_BOUNDARY
47330 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
47331 #undef TARGET_PASS_BY_REFERENCE
47332 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
47333 #undef TARGET_INTERNAL_ARG_POINTER
47334 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
47335 #undef TARGET_UPDATE_STACK_BOUNDARY
47336 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
47337 #undef TARGET_GET_DRAP_RTX
47338 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
47339 #undef TARGET_STRICT_ARGUMENT_NAMING
47340 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
47341 #undef TARGET_STATIC_CHAIN
47342 #define TARGET_STATIC_CHAIN ix86_static_chain
47343 #undef TARGET_TRAMPOLINE_INIT
47344 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
47345 #undef TARGET_RETURN_POPS_ARGS
47346 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
47347
47348 #undef TARGET_LEGITIMATE_COMBINED_INSN
47349 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
47350
47351 #undef TARGET_ASAN_SHADOW_OFFSET
47352 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
47353
47354 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
47355 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
47356
47357 #undef TARGET_SCALAR_MODE_SUPPORTED_P
47358 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
47359
47360 #undef TARGET_VECTOR_MODE_SUPPORTED_P
47361 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
47362
47363 #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
47364 #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
47365 ix86_libgcc_floating_mode_supported_p
47366
47367 #undef TARGET_C_MODE_FOR_SUFFIX
47368 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
47369
47370 #ifdef HAVE_AS_TLS
47371 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
47372 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
47373 #endif
47374
47375 #ifdef SUBTARGET_INSERT_ATTRIBUTES
47376 #undef TARGET_INSERT_ATTRIBUTES
47377 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
47378 #endif
47379
47380 #undef TARGET_MANGLE_TYPE
47381 #define TARGET_MANGLE_TYPE ix86_mangle_type
47382
47383 #if !TARGET_MACHO
47384 #undef TARGET_STACK_PROTECT_FAIL
47385 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
47386 #endif
47387
47388 #undef TARGET_FUNCTION_VALUE
47389 #define TARGET_FUNCTION_VALUE ix86_function_value
47390
47391 #undef TARGET_FUNCTION_VALUE_REGNO_P
47392 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
47393
47394 #undef TARGET_PROMOTE_FUNCTION_MODE
47395 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
47396
47397 #undef TARGET_MEMBER_TYPE_FORCES_BLK
47398 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
47399
47400 #undef TARGET_INSTANTIATE_DECLS
47401 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
47402
47403 #undef TARGET_SECONDARY_RELOAD
47404 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
47405
47406 #undef TARGET_CLASS_MAX_NREGS
47407 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
47408
47409 #undef TARGET_PREFERRED_RELOAD_CLASS
47410 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
47411 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
47412 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
47413 #undef TARGET_CLASS_LIKELY_SPILLED_P
47414 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
47415
47416 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
47417 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
47418 ix86_builtin_vectorization_cost
47419 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
47420 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
47421 ix86_vectorize_vec_perm_const_ok
47422 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
47423 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
47424 ix86_preferred_simd_mode
47425 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
47426 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
47427 ix86_autovectorize_vector_sizes
47428 #undef TARGET_VECTORIZE_INIT_COST
47429 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
47430 #undef TARGET_VECTORIZE_ADD_STMT_COST
47431 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
47432 #undef TARGET_VECTORIZE_FINISH_COST
47433 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
47434 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
47435 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
47436
47437 #undef TARGET_SET_CURRENT_FUNCTION
47438 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
47439
47440 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
47441 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
47442
47443 #undef TARGET_OPTION_SAVE
47444 #define TARGET_OPTION_SAVE ix86_function_specific_save
47445
47446 #undef TARGET_OPTION_RESTORE
47447 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
47448
47449 #undef TARGET_OPTION_PRINT
47450 #define TARGET_OPTION_PRINT ix86_function_specific_print
47451
47452 #undef TARGET_OPTION_FUNCTION_VERSIONS
47453 #define TARGET_OPTION_FUNCTION_VERSIONS ix86_function_versions
47454
47455 #undef TARGET_CAN_INLINE_P
47456 #define TARGET_CAN_INLINE_P ix86_can_inline_p
47457
47458 #undef TARGET_EXPAND_TO_RTL_HOOK
47459 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
47460
47461 #undef TARGET_LEGITIMATE_ADDRESS_P
47462 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
47463
47464 #undef TARGET_LRA_P
47465 #define TARGET_LRA_P hook_bool_void_true
47466
47467 #undef TARGET_REGISTER_PRIORITY
47468 #define TARGET_REGISTER_PRIORITY ix86_register_priority
47469
47470 #undef TARGET_REGISTER_USAGE_LEVELING_P
47471 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
47472
47473 #undef TARGET_LEGITIMATE_CONSTANT_P
47474 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
47475
47476 #undef TARGET_FRAME_POINTER_REQUIRED
47477 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
47478
47479 #undef TARGET_CAN_ELIMINATE
47480 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
47481
47482 #undef TARGET_EXTRA_LIVE_ON_ENTRY
47483 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
47484
47485 #undef TARGET_ASM_CODE_END
47486 #define TARGET_ASM_CODE_END ix86_code_end
47487
47488 #undef TARGET_CONDITIONAL_REGISTER_USAGE
47489 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
47490
47491 #if TARGET_MACHO
47492 #undef TARGET_INIT_LIBFUNCS
47493 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
47494 #endif
47495
47496 #undef TARGET_LOOP_UNROLL_ADJUST
47497 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
47498
47499 #undef TARGET_SPILL_CLASS
47500 #define TARGET_SPILL_CLASS ix86_spill_class
47501
47502 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
47503 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
47504 ix86_simd_clone_compute_vecsize_and_simdlen
47505
47506 #undef TARGET_SIMD_CLONE_ADJUST
47507 #define TARGET_SIMD_CLONE_ADJUST \
47508 ix86_simd_clone_adjust
47509
47510 #undef TARGET_SIMD_CLONE_USABLE
47511 #define TARGET_SIMD_CLONE_USABLE \
47512 ix86_simd_clone_usable
47513
47514 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
47515 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
47516 ix86_float_exceptions_rounding_supported_p
47517
47518 #undef TARGET_MODE_EMIT
47519 #define TARGET_MODE_EMIT ix86_emit_mode_set
47520
47521 #undef TARGET_MODE_NEEDED
47522 #define TARGET_MODE_NEEDED ix86_mode_needed
47523
47524 #undef TARGET_MODE_AFTER
47525 #define TARGET_MODE_AFTER ix86_mode_after
47526
47527 #undef TARGET_MODE_ENTRY
47528 #define TARGET_MODE_ENTRY ix86_mode_entry
47529
47530 #undef TARGET_MODE_EXIT
47531 #define TARGET_MODE_EXIT ix86_mode_exit
47532
47533 #undef TARGET_MODE_PRIORITY
47534 #define TARGET_MODE_PRIORITY ix86_mode_priority
47535
47536 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
47537 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
47538
47539 struct gcc_target targetm = TARGET_INITIALIZER;
47540 \f
47541 #include "gt-i386.h"